|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 21000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.761904761904762e-08, |
|
"loss": 30.681, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 5e-06, |
|
"loss": 29.2188, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1e-05, |
|
"loss": 27.9484, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.5e-05, |
|
"loss": 25.4971, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2e-05, |
|
"loss": 21.1988, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.5e-05, |
|
"loss": 15.2033, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3e-05, |
|
"loss": 7.4762, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.5e-05, |
|
"loss": 1.8812, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4e-05, |
|
"loss": 0.5258, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.347, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 5e-05, |
|
"loss": 0.2779, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.33707377314567566, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.804, |
|
"eval_samples_per_second": 436.567, |
|
"eval_steps_per_second": 13.682, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 5.500000000000001e-05, |
|
"loss": 0.2162, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 6e-05, |
|
"loss": 0.1397, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 6.500000000000001e-05, |
|
"loss": 0.1035, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 7e-05, |
|
"loss": 0.0765, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 7.500000000000001e-05, |
|
"loss": 0.035, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 8e-05, |
|
"loss": 0.0481, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 8.5e-05, |
|
"loss": 0.0285, |
|
"step": 1785 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 9e-05, |
|
"loss": 0.0305, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 9.5e-05, |
|
"loss": 0.034, |
|
"step": 1995 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0346, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 0.01727573201060295, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.9791, |
|
"eval_samples_per_second": 358.485, |
|
"eval_steps_per_second": 11.235, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 9.944444444444446e-05, |
|
"loss": 0.0147, |
|
"step": 2205 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 9.888888888888889e-05, |
|
"loss": 0.0125, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 9.833333333333333e-05, |
|
"loss": 0.0139, |
|
"step": 2415 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 9.777777777777778e-05, |
|
"loss": 0.0129, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 9.722222222222223e-05, |
|
"loss": 0.0086, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 9.666666666666667e-05, |
|
"loss": 0.0118, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 9.611111111111112e-05, |
|
"loss": 0.0198, |
|
"step": 2835 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 9.555555555555557e-05, |
|
"loss": 0.0082, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 9.5e-05, |
|
"loss": 0.0074, |
|
"step": 3045 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 9.444444444444444e-05, |
|
"loss": 0.0072, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 0.007698288187384605, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.773, |
|
"eval_samples_per_second": 454.074, |
|
"eval_steps_per_second": 14.23, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 9.388888888888889e-05, |
|
"loss": 0.0146, |
|
"step": 3255 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 9.333333333333334e-05, |
|
"loss": 0.0072, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 9.277777777777778e-05, |
|
"loss": 0.0065, |
|
"step": 3465 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 9.222222222222223e-05, |
|
"loss": 0.0092, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 9.166666666666667e-05, |
|
"loss": 0.0065, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 9.111111111111112e-05, |
|
"loss": 0.0083, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 9.055555555555556e-05, |
|
"loss": 0.009, |
|
"step": 3885 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 9e-05, |
|
"loss": 0.0027, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 8.944444444444446e-05, |
|
"loss": 0.0073, |
|
"step": 4095 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 8.888888888888889e-05, |
|
"loss": 0.0057, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 0.009315615519881248, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.7737, |
|
"eval_samples_per_second": 453.665, |
|
"eval_steps_per_second": 14.217, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 8.833333333333333e-05, |
|
"loss": 0.0072, |
|
"step": 4305 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 8.777777777777778e-05, |
|
"loss": 0.0072, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 4.3, |
|
"learning_rate": 8.722222222222223e-05, |
|
"loss": 0.0109, |
|
"step": 4515 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 8.666666666666667e-05, |
|
"loss": 0.01, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 8.611111111111112e-05, |
|
"loss": 0.0013, |
|
"step": 4725 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"learning_rate": 8.555555555555556e-05, |
|
"loss": 0.017, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 8.5e-05, |
|
"loss": 0.0061, |
|
"step": 4935 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 8.444444444444444e-05, |
|
"loss": 0.0078, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"learning_rate": 8.38888888888889e-05, |
|
"loss": 0.0028, |
|
"step": 5145 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 8.333333333333334e-05, |
|
"loss": 0.005, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 0.005270855501294136, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.584, |
|
"eval_samples_per_second": 601.029, |
|
"eval_steps_per_second": 18.836, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"learning_rate": 8.277777777777778e-05, |
|
"loss": 0.006, |
|
"step": 5355 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"learning_rate": 8.222222222222222e-05, |
|
"loss": 0.0068, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 5.3, |
|
"learning_rate": 8.166666666666667e-05, |
|
"loss": 0.0029, |
|
"step": 5565 |
|
}, |
|
{ |
|
"epoch": 5.4, |
|
"learning_rate": 8.111111111111112e-05, |
|
"loss": 0.0016, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 5.5, |
|
"learning_rate": 8.055555555555556e-05, |
|
"loss": 0.0055, |
|
"step": 5775 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"learning_rate": 8e-05, |
|
"loss": 0.0029, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 5.7, |
|
"learning_rate": 7.944444444444444e-05, |
|
"loss": 0.006, |
|
"step": 5985 |
|
}, |
|
{ |
|
"epoch": 5.8, |
|
"learning_rate": 7.88888888888889e-05, |
|
"loss": 0.0028, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 5.9, |
|
"learning_rate": 7.833333333333333e-05, |
|
"loss": 0.0067, |
|
"step": 6195 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 7.777777777777778e-05, |
|
"loss": 0.002, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 0.005636307876557112, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.5895, |
|
"eval_samples_per_second": 595.407, |
|
"eval_steps_per_second": 18.659, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"learning_rate": 7.722222222222223e-05, |
|
"loss": 0.0003, |
|
"step": 6405 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"learning_rate": 7.666666666666667e-05, |
|
"loss": 0.002, |
|
"step": 6510 |
|
}, |
|
{ |
|
"epoch": 6.3, |
|
"learning_rate": 7.61111111111111e-05, |
|
"loss": 0.0011, |
|
"step": 6615 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 7.555555555555556e-05, |
|
"loss": 0.0028, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 6.5, |
|
"learning_rate": 7.500000000000001e-05, |
|
"loss": 0.0062, |
|
"step": 6825 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"learning_rate": 7.444444444444444e-05, |
|
"loss": 0.0006, |
|
"step": 6930 |
|
}, |
|
{ |
|
"epoch": 6.7, |
|
"learning_rate": 7.38888888888889e-05, |
|
"loss": 0.003, |
|
"step": 7035 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"learning_rate": 7.333333333333333e-05, |
|
"loss": 0.0048, |
|
"step": 7140 |
|
}, |
|
{ |
|
"epoch": 6.9, |
|
"learning_rate": 7.277777777777778e-05, |
|
"loss": 0.0038, |
|
"step": 7245 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 7.222222222222222e-05, |
|
"loss": 0.002, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 0.009145626798272133, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.605, |
|
"eval_samples_per_second": 580.167, |
|
"eval_steps_per_second": 18.182, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"learning_rate": 7.166666666666667e-05, |
|
"loss": 0.005, |
|
"step": 7455 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"learning_rate": 7.111111111111112e-05, |
|
"loss": 0.0018, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 7.3, |
|
"learning_rate": 7.055555555555556e-05, |
|
"loss": 0.0031, |
|
"step": 7665 |
|
}, |
|
{ |
|
"epoch": 7.4, |
|
"learning_rate": 7e-05, |
|
"loss": 0.0027, |
|
"step": 7770 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"learning_rate": 6.944444444444444e-05, |
|
"loss": 0.0011, |
|
"step": 7875 |
|
}, |
|
{ |
|
"epoch": 7.6, |
|
"learning_rate": 6.88888888888889e-05, |
|
"loss": 0.0032, |
|
"step": 7980 |
|
}, |
|
{ |
|
"epoch": 7.7, |
|
"learning_rate": 6.833333333333333e-05, |
|
"loss": 0.0018, |
|
"step": 8085 |
|
}, |
|
{ |
|
"epoch": 7.8, |
|
"learning_rate": 6.777777777777778e-05, |
|
"loss": 0.0032, |
|
"step": 8190 |
|
}, |
|
{ |
|
"epoch": 7.9, |
|
"learning_rate": 6.722222222222223e-05, |
|
"loss": 0.0045, |
|
"step": 8295 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 6.666666666666667e-05, |
|
"loss": 0.0018, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 0.0010060666827484965, |
|
"eval_max_distance": 0, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.618, |
|
"eval_samples_per_second": 567.961, |
|
"eval_steps_per_second": 17.799, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 8.1, |
|
"learning_rate": 6.611111111111111e-05, |
|
"loss": 0.003, |
|
"step": 8505 |
|
}, |
|
{ |
|
"epoch": 8.2, |
|
"learning_rate": 6.555555555555556e-05, |
|
"loss": 0.0023, |
|
"step": 8610 |
|
}, |
|
{ |
|
"epoch": 8.3, |
|
"learning_rate": 6.500000000000001e-05, |
|
"loss": 0.0028, |
|
"step": 8715 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"learning_rate": 6.444444444444446e-05, |
|
"loss": 0.002, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 8.5, |
|
"learning_rate": 6.388888888888888e-05, |
|
"loss": 0.0012, |
|
"step": 8925 |
|
}, |
|
{ |
|
"epoch": 8.6, |
|
"learning_rate": 6.333333333333333e-05, |
|
"loss": 0.0013, |
|
"step": 9030 |
|
}, |
|
{ |
|
"epoch": 8.7, |
|
"learning_rate": 6.277777777777778e-05, |
|
"loss": 0.0011, |
|
"step": 9135 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"learning_rate": 6.222222222222222e-05, |
|
"loss": 0.0008, |
|
"step": 9240 |
|
}, |
|
{ |
|
"epoch": 8.9, |
|
"learning_rate": 6.166666666666667e-05, |
|
"loss": 0.0029, |
|
"step": 9345 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 6.111111111111112e-05, |
|
"loss": 0.0044, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 0.004300011787563562, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.7083, |
|
"eval_samples_per_second": 495.526, |
|
"eval_steps_per_second": 15.529, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 9.1, |
|
"learning_rate": 6.055555555555555e-05, |
|
"loss": 0.0012, |
|
"step": 9555 |
|
}, |
|
{ |
|
"epoch": 9.2, |
|
"learning_rate": 6e-05, |
|
"loss": 0.002, |
|
"step": 9660 |
|
}, |
|
{ |
|
"epoch": 9.3, |
|
"learning_rate": 5.9444444444444445e-05, |
|
"loss": 0.0017, |
|
"step": 9765 |
|
}, |
|
{ |
|
"epoch": 9.4, |
|
"learning_rate": 5.8888888888888896e-05, |
|
"loss": 0.0002, |
|
"step": 9870 |
|
}, |
|
{ |
|
"epoch": 9.5, |
|
"learning_rate": 5.833333333333334e-05, |
|
"loss": 0.0015, |
|
"step": 9975 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"learning_rate": 5.7777777777777776e-05, |
|
"loss": 0.003, |
|
"step": 10080 |
|
}, |
|
{ |
|
"epoch": 9.7, |
|
"learning_rate": 5.722222222222222e-05, |
|
"loss": 0.0011, |
|
"step": 10185 |
|
}, |
|
{ |
|
"epoch": 9.8, |
|
"learning_rate": 5.666666666666667e-05, |
|
"loss": 0.0018, |
|
"step": 10290 |
|
}, |
|
{ |
|
"epoch": 9.9, |
|
"learning_rate": 5.6111111111111114e-05, |
|
"loss": 0.0003, |
|
"step": 10395 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 5.555555555555556e-05, |
|
"loss": 0.0024, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 0.004753963556140661, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.593, |
|
"eval_samples_per_second": 591.906, |
|
"eval_steps_per_second": 18.55, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 10.1, |
|
"learning_rate": 5.500000000000001e-05, |
|
"loss": 0.0004, |
|
"step": 10605 |
|
}, |
|
{ |
|
"epoch": 10.2, |
|
"learning_rate": 5.4444444444444446e-05, |
|
"loss": 0.0022, |
|
"step": 10710 |
|
}, |
|
{ |
|
"epoch": 10.3, |
|
"learning_rate": 5.388888888888889e-05, |
|
"loss": 0.0016, |
|
"step": 10815 |
|
}, |
|
{ |
|
"epoch": 10.4, |
|
"learning_rate": 5.333333333333333e-05, |
|
"loss": 0.0008, |
|
"step": 10920 |
|
}, |
|
{ |
|
"epoch": 10.5, |
|
"learning_rate": 5.2777777777777784e-05, |
|
"loss": 0.0002, |
|
"step": 11025 |
|
}, |
|
{ |
|
"epoch": 10.6, |
|
"learning_rate": 5.222222222222223e-05, |
|
"loss": 0.0023, |
|
"step": 11130 |
|
}, |
|
{ |
|
"epoch": 10.7, |
|
"learning_rate": 5.166666666666667e-05, |
|
"loss": 0.0009, |
|
"step": 11235 |
|
}, |
|
{ |
|
"epoch": 10.8, |
|
"learning_rate": 5.111111111111111e-05, |
|
"loss": 0.0036, |
|
"step": 11340 |
|
}, |
|
{ |
|
"epoch": 10.9, |
|
"learning_rate": 5.055555555555556e-05, |
|
"loss": 0.0049, |
|
"step": 11445 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 5e-05, |
|
"loss": 0.0032, |
|
"step": 11550 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 0.0022743879817426205, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.656, |
|
"eval_samples_per_second": 535.061, |
|
"eval_steps_per_second": 16.768, |
|
"step": 11550 |
|
}, |
|
{ |
|
"epoch": 11.1, |
|
"learning_rate": 4.9444444444444446e-05, |
|
"loss": 0.0036, |
|
"step": 11655 |
|
}, |
|
{ |
|
"epoch": 11.2, |
|
"learning_rate": 4.888888888888889e-05, |
|
"loss": 0.0014, |
|
"step": 11760 |
|
}, |
|
{ |
|
"epoch": 11.3, |
|
"learning_rate": 4.8333333333333334e-05, |
|
"loss": 0.0022, |
|
"step": 11865 |
|
}, |
|
{ |
|
"epoch": 11.4, |
|
"learning_rate": 4.7777777777777784e-05, |
|
"loss": 0.0034, |
|
"step": 11970 |
|
}, |
|
{ |
|
"epoch": 11.5, |
|
"learning_rate": 4.722222222222222e-05, |
|
"loss": 0.0009, |
|
"step": 12075 |
|
}, |
|
{ |
|
"epoch": 11.6, |
|
"learning_rate": 4.666666666666667e-05, |
|
"loss": 0.0012, |
|
"step": 12180 |
|
}, |
|
{ |
|
"epoch": 11.7, |
|
"learning_rate": 4.6111111111111115e-05, |
|
"loss": 0.0012, |
|
"step": 12285 |
|
}, |
|
{ |
|
"epoch": 11.8, |
|
"learning_rate": 4.555555555555556e-05, |
|
"loss": 0.0023, |
|
"step": 12390 |
|
}, |
|
{ |
|
"epoch": 11.9, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.0003, |
|
"step": 12495 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 4.4444444444444447e-05, |
|
"loss": 0.0028, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 0.00027711206348612905, |
|
"eval_max_distance": 0, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.5911, |
|
"eval_samples_per_second": 593.792, |
|
"eval_steps_per_second": 18.609, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 12.1, |
|
"learning_rate": 4.388888888888889e-05, |
|
"loss": 0.001, |
|
"step": 12705 |
|
}, |
|
{ |
|
"epoch": 12.2, |
|
"learning_rate": 4.3333333333333334e-05, |
|
"loss": 0.0011, |
|
"step": 12810 |
|
}, |
|
{ |
|
"epoch": 12.3, |
|
"learning_rate": 4.277777777777778e-05, |
|
"loss": 0.0028, |
|
"step": 12915 |
|
}, |
|
{ |
|
"epoch": 12.4, |
|
"learning_rate": 4.222222222222222e-05, |
|
"loss": 0.0009, |
|
"step": 13020 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 0.0001, |
|
"step": 13125 |
|
}, |
|
{ |
|
"epoch": 12.6, |
|
"learning_rate": 4.111111111111111e-05, |
|
"loss": 0.0005, |
|
"step": 13230 |
|
}, |
|
{ |
|
"epoch": 12.7, |
|
"learning_rate": 4.055555555555556e-05, |
|
"loss": 0.003, |
|
"step": 13335 |
|
}, |
|
{ |
|
"epoch": 12.8, |
|
"learning_rate": 4e-05, |
|
"loss": 0.0, |
|
"step": 13440 |
|
}, |
|
{ |
|
"epoch": 12.9, |
|
"learning_rate": 3.944444444444445e-05, |
|
"loss": 0.0038, |
|
"step": 13545 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 3.888888888888889e-05, |
|
"loss": 0.0005, |
|
"step": 13650 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_loss": 0.0007963060052134097, |
|
"eval_max_distance": 0, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.66, |
|
"eval_samples_per_second": 531.819, |
|
"eval_steps_per_second": 16.667, |
|
"step": 13650 |
|
}, |
|
{ |
|
"epoch": 13.1, |
|
"learning_rate": 3.8333333333333334e-05, |
|
"loss": 0.0009, |
|
"step": 13755 |
|
}, |
|
{ |
|
"epoch": 13.2, |
|
"learning_rate": 3.777777777777778e-05, |
|
"loss": 0.0004, |
|
"step": 13860 |
|
}, |
|
{ |
|
"epoch": 13.3, |
|
"learning_rate": 3.722222222222222e-05, |
|
"loss": 0.0085, |
|
"step": 13965 |
|
}, |
|
{ |
|
"epoch": 13.4, |
|
"learning_rate": 3.6666666666666666e-05, |
|
"loss": 0.0028, |
|
"step": 14070 |
|
}, |
|
{ |
|
"epoch": 13.5, |
|
"learning_rate": 3.611111111111111e-05, |
|
"loss": 0.0019, |
|
"step": 14175 |
|
}, |
|
{ |
|
"epoch": 13.6, |
|
"learning_rate": 3.555555555555556e-05, |
|
"loss": 0.0008, |
|
"step": 14280 |
|
}, |
|
{ |
|
"epoch": 13.7, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.0001, |
|
"step": 14385 |
|
}, |
|
{ |
|
"epoch": 13.8, |
|
"learning_rate": 3.444444444444445e-05, |
|
"loss": 0.0014, |
|
"step": 14490 |
|
}, |
|
{ |
|
"epoch": 13.9, |
|
"learning_rate": 3.388888888888889e-05, |
|
"loss": 0.002, |
|
"step": 14595 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 0.0026, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_loss": 0.0011878299992531538, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.599, |
|
"eval_samples_per_second": 585.977, |
|
"eval_steps_per_second": 18.364, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 14.1, |
|
"learning_rate": 3.277777777777778e-05, |
|
"loss": 0.0016, |
|
"step": 14805 |
|
}, |
|
{ |
|
"epoch": 14.2, |
|
"learning_rate": 3.222222222222223e-05, |
|
"loss": 0.0008, |
|
"step": 14910 |
|
}, |
|
{ |
|
"epoch": 14.3, |
|
"learning_rate": 3.1666666666666666e-05, |
|
"loss": 0.0005, |
|
"step": 15015 |
|
}, |
|
{ |
|
"epoch": 14.4, |
|
"learning_rate": 3.111111111111111e-05, |
|
"loss": 0.0005, |
|
"step": 15120 |
|
}, |
|
{ |
|
"epoch": 14.5, |
|
"learning_rate": 3.055555555555556e-05, |
|
"loss": 0.001, |
|
"step": 15225 |
|
}, |
|
{ |
|
"epoch": 14.6, |
|
"learning_rate": 3e-05, |
|
"loss": 0.002, |
|
"step": 15330 |
|
}, |
|
{ |
|
"epoch": 14.7, |
|
"learning_rate": 2.9444444444444448e-05, |
|
"loss": 0.0001, |
|
"step": 15435 |
|
}, |
|
{ |
|
"epoch": 14.8, |
|
"learning_rate": 2.8888888888888888e-05, |
|
"loss": 0.0, |
|
"step": 15540 |
|
}, |
|
{ |
|
"epoch": 14.9, |
|
"learning_rate": 2.8333333333333335e-05, |
|
"loss": 0.0, |
|
"step": 15645 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 0.0011, |
|
"step": 15750 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_loss": 0.017123280093073845, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.6147, |
|
"eval_samples_per_second": 570.998, |
|
"eval_steps_per_second": 17.895, |
|
"step": 15750 |
|
}, |
|
{ |
|
"epoch": 15.1, |
|
"learning_rate": 2.7222222222222223e-05, |
|
"loss": 0.0001, |
|
"step": 15855 |
|
}, |
|
{ |
|
"epoch": 15.2, |
|
"learning_rate": 2.6666666666666667e-05, |
|
"loss": 0.0031, |
|
"step": 15960 |
|
}, |
|
{ |
|
"epoch": 15.3, |
|
"learning_rate": 2.6111111111111114e-05, |
|
"loss": 0.0001, |
|
"step": 16065 |
|
}, |
|
{ |
|
"epoch": 15.4, |
|
"learning_rate": 2.5555555555555554e-05, |
|
"loss": 0.0016, |
|
"step": 16170 |
|
}, |
|
{ |
|
"epoch": 15.5, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.0006, |
|
"step": 16275 |
|
}, |
|
{ |
|
"epoch": 15.6, |
|
"learning_rate": 2.4444444444444445e-05, |
|
"loss": 0.0002, |
|
"step": 16380 |
|
}, |
|
{ |
|
"epoch": 15.7, |
|
"learning_rate": 2.3888888888888892e-05, |
|
"loss": 0.0008, |
|
"step": 16485 |
|
}, |
|
{ |
|
"epoch": 15.8, |
|
"learning_rate": 2.3333333333333336e-05, |
|
"loss": 0.0016, |
|
"step": 16590 |
|
}, |
|
{ |
|
"epoch": 15.9, |
|
"learning_rate": 2.277777777777778e-05, |
|
"loss": 0.0002, |
|
"step": 16695 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 2.2222222222222223e-05, |
|
"loss": 0.0, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_loss": 0.011970149353146553, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.591, |
|
"eval_samples_per_second": 593.899, |
|
"eval_steps_per_second": 18.612, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 16.1, |
|
"learning_rate": 2.1666666666666667e-05, |
|
"loss": 0.0009, |
|
"step": 16905 |
|
}, |
|
{ |
|
"epoch": 16.2, |
|
"learning_rate": 2.111111111111111e-05, |
|
"loss": 0.0009, |
|
"step": 17010 |
|
}, |
|
{ |
|
"epoch": 16.3, |
|
"learning_rate": 2.0555555555555555e-05, |
|
"loss": 0.0004, |
|
"step": 17115 |
|
}, |
|
{ |
|
"epoch": 16.4, |
|
"learning_rate": 2e-05, |
|
"loss": 0.0001, |
|
"step": 17220 |
|
}, |
|
{ |
|
"epoch": 16.5, |
|
"learning_rate": 1.9444444444444445e-05, |
|
"loss": 0.0032, |
|
"step": 17325 |
|
}, |
|
{ |
|
"epoch": 16.6, |
|
"learning_rate": 1.888888888888889e-05, |
|
"loss": 0.0, |
|
"step": 17430 |
|
}, |
|
{ |
|
"epoch": 16.7, |
|
"learning_rate": 1.8333333333333333e-05, |
|
"loss": 0.0002, |
|
"step": 17535 |
|
}, |
|
{ |
|
"epoch": 16.8, |
|
"learning_rate": 1.777777777777778e-05, |
|
"loss": 0.0, |
|
"step": 17640 |
|
}, |
|
{ |
|
"epoch": 16.9, |
|
"learning_rate": 1.7222222222222224e-05, |
|
"loss": 0.001, |
|
"step": 17745 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 0.0001, |
|
"step": 17850 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_loss": 0.003116948762908578, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.6156, |
|
"eval_samples_per_second": 570.218, |
|
"eval_steps_per_second": 17.87, |
|
"step": 17850 |
|
}, |
|
{ |
|
"epoch": 17.1, |
|
"learning_rate": 1.6111111111111115e-05, |
|
"loss": 0.0011, |
|
"step": 17955 |
|
}, |
|
{ |
|
"epoch": 17.2, |
|
"learning_rate": 1.5555555555555555e-05, |
|
"loss": 0.0004, |
|
"step": 18060 |
|
}, |
|
{ |
|
"epoch": 17.3, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.0003, |
|
"step": 18165 |
|
}, |
|
{ |
|
"epoch": 17.4, |
|
"learning_rate": 1.4444444444444444e-05, |
|
"loss": 0.0001, |
|
"step": 18270 |
|
}, |
|
{ |
|
"epoch": 17.5, |
|
"learning_rate": 1.388888888888889e-05, |
|
"loss": 0.0001, |
|
"step": 18375 |
|
}, |
|
{ |
|
"epoch": 17.6, |
|
"learning_rate": 1.3333333333333333e-05, |
|
"loss": 0.0004, |
|
"step": 18480 |
|
}, |
|
{ |
|
"epoch": 17.7, |
|
"learning_rate": 1.2777777777777777e-05, |
|
"loss": 0.0007, |
|
"step": 18585 |
|
}, |
|
{ |
|
"epoch": 17.8, |
|
"learning_rate": 1.2222222222222222e-05, |
|
"loss": 0.0, |
|
"step": 18690 |
|
}, |
|
{ |
|
"epoch": 17.9, |
|
"learning_rate": 1.1666666666666668e-05, |
|
"loss": 0.0, |
|
"step": 18795 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"learning_rate": 1.1111111111111112e-05, |
|
"loss": 0.0007, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_loss": 0.016535792499780655, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.5881, |
|
"eval_samples_per_second": 596.809, |
|
"eval_steps_per_second": 18.703, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 18.1, |
|
"learning_rate": 1.0555555555555555e-05, |
|
"loss": 0.0004, |
|
"step": 19005 |
|
}, |
|
{ |
|
"epoch": 18.2, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0002, |
|
"step": 19110 |
|
}, |
|
{ |
|
"epoch": 18.3, |
|
"learning_rate": 9.444444444444445e-06, |
|
"loss": 0.0001, |
|
"step": 19215 |
|
}, |
|
{ |
|
"epoch": 18.4, |
|
"learning_rate": 8.88888888888889e-06, |
|
"loss": 0.0009, |
|
"step": 19320 |
|
}, |
|
{ |
|
"epoch": 18.5, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 0.0149, |
|
"step": 19425 |
|
}, |
|
{ |
|
"epoch": 18.6, |
|
"learning_rate": 7.777777777777777e-06, |
|
"loss": 0.0003, |
|
"step": 19530 |
|
}, |
|
{ |
|
"epoch": 18.7, |
|
"learning_rate": 7.222222222222222e-06, |
|
"loss": 0.0, |
|
"step": 19635 |
|
}, |
|
{ |
|
"epoch": 18.8, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 0.0, |
|
"step": 19740 |
|
}, |
|
{ |
|
"epoch": 18.9, |
|
"learning_rate": 6.111111111111111e-06, |
|
"loss": 0.0, |
|
"step": 19845 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"learning_rate": 5.555555555555556e-06, |
|
"loss": 0.001, |
|
"step": 19950 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_loss": 0.01935943029820919, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.6346, |
|
"eval_samples_per_second": 553.109, |
|
"eval_steps_per_second": 17.334, |
|
"step": 19950 |
|
}, |
|
{ |
|
"epoch": 19.1, |
|
"learning_rate": 5e-06, |
|
"loss": 0.001, |
|
"step": 20055 |
|
}, |
|
{ |
|
"epoch": 19.2, |
|
"learning_rate": 4.444444444444445e-06, |
|
"loss": 0.0, |
|
"step": 20160 |
|
}, |
|
{ |
|
"epoch": 19.3, |
|
"learning_rate": 3.888888888888889e-06, |
|
"loss": 0.0004, |
|
"step": 20265 |
|
}, |
|
{ |
|
"epoch": 19.4, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 0.0, |
|
"step": 20370 |
|
}, |
|
{ |
|
"epoch": 19.5, |
|
"learning_rate": 2.777777777777778e-06, |
|
"loss": 0.0011, |
|
"step": 20475 |
|
}, |
|
{ |
|
"epoch": 19.6, |
|
"learning_rate": 2.2222222222222225e-06, |
|
"loss": 0.0011, |
|
"step": 20580 |
|
}, |
|
{ |
|
"epoch": 19.7, |
|
"learning_rate": 1.6666666666666667e-06, |
|
"loss": 0.0, |
|
"step": 20685 |
|
}, |
|
{ |
|
"epoch": 19.8, |
|
"learning_rate": 1.1111111111111112e-06, |
|
"loss": 0.0003, |
|
"step": 20790 |
|
}, |
|
{ |
|
"epoch": 19.9, |
|
"learning_rate": 5.555555555555556e-07, |
|
"loss": 0.0004, |
|
"step": 20895 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 0.0, |
|
"loss": 0.0001, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_loss": 0.010230864398181438, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.597, |
|
"eval_samples_per_second": 587.939, |
|
"eval_steps_per_second": 18.425, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 21000, |
|
"total_flos": 3.345253371675648e+16, |
|
"train_loss": 0.6543119361943335, |
|
"train_runtime": 2482.634, |
|
"train_samples_per_second": 270.592, |
|
"train_steps_per_second": 8.459 |
|
} |
|
], |
|
"logging_steps": 105, |
|
"max_steps": 21000, |
|
"num_train_epochs": 20, |
|
"save_steps": 210, |
|
"total_flos": 3.345253371675648e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|