|
{ |
|
"best_metric": 28.648953267516852, |
|
"best_model_checkpoint": "./checkpoint-5000", |
|
"epoch": 65.012, |
|
"eval_steps": 500, |
|
"global_step": 5000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.8e-08, |
|
"loss": 1.6929, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 9.8e-08, |
|
"loss": 1.6161, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.4799999999999998e-07, |
|
"loss": 1.5665, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.98e-07, |
|
"loss": 1.5875, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 2.48e-07, |
|
"loss": 1.4633, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 2.98e-07, |
|
"loss": 1.3517, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 3.4799999999999994e-07, |
|
"loss": 1.3473, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 3.98e-07, |
|
"loss": 1.2353, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 4.48e-07, |
|
"loss": 1.1232, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 4.979999999999999e-07, |
|
"loss": 1.0898, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 5.48e-07, |
|
"loss": 0.9945, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 5.979999999999999e-07, |
|
"loss": 0.8935, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 6.48e-07, |
|
"loss": 0.8898, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 6.979999999999999e-07, |
|
"loss": 0.8355, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 7.48e-07, |
|
"loss": 0.7488, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 7.98e-07, |
|
"loss": 0.7638, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"learning_rate": 8.48e-07, |
|
"loss": 0.7228, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"learning_rate": 8.98e-07, |
|
"loss": 0.6542, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 9.479999999999999e-07, |
|
"loss": 0.6631, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"learning_rate": 9.98e-07, |
|
"loss": 0.6425, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"eval_loss": 0.702507495880127, |
|
"eval_runtime": 521.2801, |
|
"eval_samples_per_second": 5.96, |
|
"eval_steps_per_second": 0.188, |
|
"eval_wer": 41.44768424787627, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"learning_rate": 9.946666666666666e-07, |
|
"loss": 0.5748, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 9.891111111111112e-07, |
|
"loss": 0.5951, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"learning_rate": 9.835555555555554e-07, |
|
"loss": 0.5775, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"learning_rate": 9.78e-07, |
|
"loss": 0.5234, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 9.724444444444444e-07, |
|
"loss": 0.5375, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 9.668888888888888e-07, |
|
"loss": 0.5398, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 9.613333333333334e-07, |
|
"loss": 0.479, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 9.557777777777776e-07, |
|
"loss": 0.4986, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"learning_rate": 9.502222222222221e-07, |
|
"loss": 0.5009, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"learning_rate": 9.446666666666666e-07, |
|
"loss": 0.4526, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 9.39111111111111e-07, |
|
"loss": 0.4678, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 10.01, |
|
"learning_rate": 9.335555555555555e-07, |
|
"loss": 0.4692, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 10.01, |
|
"learning_rate": 9.28e-07, |
|
"loss": 0.4286, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 9.224444444444443e-07, |
|
"loss": 0.4415, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 11.01, |
|
"learning_rate": 9.168888888888889e-07, |
|
"loss": 0.4503, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 11.01, |
|
"learning_rate": 9.113333333333333e-07, |
|
"loss": 0.4035, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 9.057777777777778e-07, |
|
"loss": 0.4175, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 12.01, |
|
"learning_rate": 9.002222222222222e-07, |
|
"loss": 0.4333, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 12.01, |
|
"learning_rate": 8.946666666666667e-07, |
|
"loss": 0.3904, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 8.89111111111111e-07, |
|
"loss": 0.3973, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_loss": 0.5367385149002075, |
|
"eval_runtime": 333.3595, |
|
"eval_samples_per_second": 9.32, |
|
"eval_steps_per_second": 0.294, |
|
"eval_wer": 33.96923462252927, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 13.01, |
|
"learning_rate": 8.835555555555555e-07, |
|
"loss": 0.4134, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 13.01, |
|
"learning_rate": 8.78e-07, |
|
"loss": 0.3698, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 8.724444444444444e-07, |
|
"loss": 0.3784, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 14.01, |
|
"learning_rate": 8.668888888888889e-07, |
|
"loss": 0.4007, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 14.01, |
|
"learning_rate": 8.613333333333332e-07, |
|
"loss": 0.3591, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 8.557777777777777e-07, |
|
"loss": 0.3615, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 15.01, |
|
"learning_rate": 8.502222222222221e-07, |
|
"loss": 0.3856, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 15.01, |
|
"learning_rate": 8.446666666666666e-07, |
|
"loss": 0.3527, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 8.391111111111111e-07, |
|
"loss": 0.3435, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 16.01, |
|
"learning_rate": 8.335555555555555e-07, |
|
"loss": 0.3771, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 16.01, |
|
"learning_rate": 8.28e-07, |
|
"loss": 0.3343, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"learning_rate": 8.224444444444444e-07, |
|
"loss": 0.3335, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 17.01, |
|
"learning_rate": 8.168888888888889e-07, |
|
"loss": 0.363, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 17.01, |
|
"learning_rate": 8.113333333333333e-07, |
|
"loss": 0.3262, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"learning_rate": 8.057777777777778e-07, |
|
"loss": 0.3186, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 18.01, |
|
"learning_rate": 8.002222222222222e-07, |
|
"loss": 0.3554, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 18.01, |
|
"learning_rate": 7.946666666666666e-07, |
|
"loss": 0.3174, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"learning_rate": 7.891111111111111e-07, |
|
"loss": 0.3104, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 19.01, |
|
"learning_rate": 7.835555555555555e-07, |
|
"loss": 0.3405, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 19.01, |
|
"learning_rate": 7.78e-07, |
|
"loss": 0.3125, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 19.01, |
|
"eval_loss": 0.4926978647708893, |
|
"eval_runtime": 338.2106, |
|
"eval_samples_per_second": 9.187, |
|
"eval_steps_per_second": 0.29, |
|
"eval_wer": 31.44580576485567, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 7.724444444444444e-07, |
|
"loss": 0.2975, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 20.01, |
|
"learning_rate": 7.668888888888888e-07, |
|
"loss": 0.335, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 20.01, |
|
"learning_rate": 7.613333333333333e-07, |
|
"loss": 0.3028, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"learning_rate": 7.557777777777777e-07, |
|
"loss": 0.291, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 21.01, |
|
"learning_rate": 7.502222222222222e-07, |
|
"loss": 0.3219, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 21.01, |
|
"learning_rate": 7.446666666666666e-07, |
|
"loss": 0.2992, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"learning_rate": 7.391111111111111e-07, |
|
"loss": 0.2821, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 22.01, |
|
"learning_rate": 7.335555555555554e-07, |
|
"loss": 0.3121, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 22.01, |
|
"learning_rate": 7.28e-07, |
|
"loss": 0.2933, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"learning_rate": 7.224444444444445e-07, |
|
"loss": 0.2704, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 23.01, |
|
"learning_rate": 7.168888888888889e-07, |
|
"loss": 0.3079, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 23.01, |
|
"learning_rate": 7.113333333333334e-07, |
|
"loss": 0.284, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"learning_rate": 7.057777777777777e-07, |
|
"loss": 0.2646, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 24.01, |
|
"learning_rate": 7.002222222222222e-07, |
|
"loss": 0.2982, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 24.01, |
|
"learning_rate": 6.946666666666666e-07, |
|
"loss": 0.2794, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 24.02, |
|
"learning_rate": 6.891111111111111e-07, |
|
"loss": 0.26, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"learning_rate": 6.835555555555555e-07, |
|
"loss": 0.294, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 25.01, |
|
"learning_rate": 6.78e-07, |
|
"loss": 0.2774, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 25.02, |
|
"learning_rate": 6.724444444444444e-07, |
|
"loss": 0.2473, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"learning_rate": 6.668888888888888e-07, |
|
"loss": 0.2848, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_loss": 0.47389376163482666, |
|
"eval_runtime": 332.7237, |
|
"eval_samples_per_second": 9.338, |
|
"eval_steps_per_second": 0.295, |
|
"eval_wer": 30.103734006804284, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 26.01, |
|
"learning_rate": 6.613333333333333e-07, |
|
"loss": 0.2707, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 26.01, |
|
"learning_rate": 6.557777777777777e-07, |
|
"loss": 0.2461, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"learning_rate": 6.502222222222222e-07, |
|
"loss": 0.2753, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 27.01, |
|
"learning_rate": 6.446666666666666e-07, |
|
"loss": 0.2666, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 27.01, |
|
"learning_rate": 6.39111111111111e-07, |
|
"loss": 0.2428, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"learning_rate": 6.335555555555556e-07, |
|
"loss": 0.2672, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 28.01, |
|
"learning_rate": 6.28e-07, |
|
"loss": 0.2628, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 28.01, |
|
"learning_rate": 6.224444444444445e-07, |
|
"loss": 0.2378, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"learning_rate": 6.168888888888889e-07, |
|
"loss": 0.2615, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 29.01, |
|
"learning_rate": 6.113333333333333e-07, |
|
"loss": 0.2566, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 29.01, |
|
"learning_rate": 6.057777777777777e-07, |
|
"loss": 0.2334, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"learning_rate": 6.002222222222222e-07, |
|
"loss": 0.2507, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 30.01, |
|
"learning_rate": 5.946666666666667e-07, |
|
"loss": 0.2576, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 30.01, |
|
"learning_rate": 5.891111111111111e-07, |
|
"loss": 0.2266, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"learning_rate": 5.835555555555556e-07, |
|
"loss": 0.2519, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 31.01, |
|
"learning_rate": 5.779999999999999e-07, |
|
"loss": 0.2478, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 31.01, |
|
"learning_rate": 5.724444444444444e-07, |
|
"loss": 0.2257, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"learning_rate": 5.668888888888888e-07, |
|
"loss": 0.2446, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 32.01, |
|
"learning_rate": 5.613333333333333e-07, |
|
"loss": 0.2452, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 32.01, |
|
"learning_rate": 5.557777777777778e-07, |
|
"loss": 0.2201, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 32.01, |
|
"eval_loss": 0.46750521659851074, |
|
"eval_runtime": 335.6468, |
|
"eval_samples_per_second": 9.257, |
|
"eval_steps_per_second": 0.292, |
|
"eval_wer": 29.485921813362275, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"learning_rate": 5.502222222222221e-07, |
|
"loss": 0.2385, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 33.01, |
|
"learning_rate": 5.446666666666666e-07, |
|
"loss": 0.2409, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 33.01, |
|
"learning_rate": 5.39111111111111e-07, |
|
"loss": 0.2203, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"learning_rate": 5.335555555555556e-07, |
|
"loss": 0.2328, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 34.01, |
|
"learning_rate": 5.28e-07, |
|
"loss": 0.24, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 34.01, |
|
"learning_rate": 5.224444444444445e-07, |
|
"loss": 0.2136, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"learning_rate": 5.168888888888888e-07, |
|
"loss": 0.2286, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 35.01, |
|
"learning_rate": 5.113333333333333e-07, |
|
"loss": 0.2334, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 35.01, |
|
"learning_rate": 5.057777777777778e-07, |
|
"loss": 0.2142, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"learning_rate": 5.002222222222222e-07, |
|
"loss": 0.2218, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 36.01, |
|
"learning_rate": 4.946666666666666e-07, |
|
"loss": 0.2342, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 36.01, |
|
"learning_rate": 4.891111111111111e-07, |
|
"loss": 0.2074, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"learning_rate": 4.835555555555555e-07, |
|
"loss": 0.2169, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 37.01, |
|
"learning_rate": 4.779999999999999e-07, |
|
"loss": 0.2273, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 37.01, |
|
"learning_rate": 4.724444444444444e-07, |
|
"loss": 0.2087, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"learning_rate": 4.668888888888889e-07, |
|
"loss": 0.2139, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 38.01, |
|
"learning_rate": 4.613333333333333e-07, |
|
"loss": 0.2256, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 38.01, |
|
"learning_rate": 4.5577777777777774e-07, |
|
"loss": 0.2044, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"learning_rate": 4.5022222222222225e-07, |
|
"loss": 0.2059, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 39.01, |
|
"learning_rate": 4.4466666666666665e-07, |
|
"loss": 0.2257, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 39.01, |
|
"eval_loss": 0.46374234557151794, |
|
"eval_runtime": 331.7281, |
|
"eval_samples_per_second": 9.366, |
|
"eval_steps_per_second": 0.295, |
|
"eval_wer": 28.99334182129365, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 39.01, |
|
"learning_rate": 4.391111111111111e-07, |
|
"loss": 0.2031, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"learning_rate": 4.3355555555555556e-07, |
|
"loss": 0.205, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 40.01, |
|
"learning_rate": 4.2799999999999997e-07, |
|
"loss": 0.2182, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 40.01, |
|
"learning_rate": 4.224444444444444e-07, |
|
"loss": 0.2037, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"learning_rate": 4.1688888888888883e-07, |
|
"loss": 0.1995, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 41.01, |
|
"learning_rate": 4.113333333333333e-07, |
|
"loss": 0.2205, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 41.01, |
|
"learning_rate": 4.057777777777778e-07, |
|
"loss": 0.1982, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"learning_rate": 4.002222222222222e-07, |
|
"loss": 0.1954, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 42.01, |
|
"learning_rate": 3.9466666666666665e-07, |
|
"loss": 0.2145, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 42.01, |
|
"learning_rate": 3.891111111111111e-07, |
|
"loss": 0.1975, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"learning_rate": 3.835555555555555e-07, |
|
"loss": 0.1911, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 43.01, |
|
"learning_rate": 3.7799999999999997e-07, |
|
"loss": 0.2149, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 43.01, |
|
"learning_rate": 3.7244444444444443e-07, |
|
"loss": 0.1959, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"learning_rate": 3.668888888888889e-07, |
|
"loss": 0.1898, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 44.01, |
|
"learning_rate": 3.6133333333333334e-07, |
|
"loss": 0.2104, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 44.01, |
|
"learning_rate": 3.557777777777778e-07, |
|
"loss": 0.1947, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"learning_rate": 3.502222222222222e-07, |
|
"loss": 0.1863, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 45.01, |
|
"learning_rate": 3.4466666666666666e-07, |
|
"loss": 0.2093, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 45.01, |
|
"learning_rate": 3.3911111111111106e-07, |
|
"loss": 0.1914, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"learning_rate": 3.335555555555555e-07, |
|
"loss": 0.1837, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_loss": 0.46572384238243103, |
|
"eval_runtime": 334.5411, |
|
"eval_samples_per_second": 9.287, |
|
"eval_steps_per_second": 0.293, |
|
"eval_wer": 28.914028093757178, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 46.01, |
|
"learning_rate": 3.28e-07, |
|
"loss": 0.2041, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 46.01, |
|
"learning_rate": 3.2244444444444443e-07, |
|
"loss": 0.1936, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"learning_rate": 3.168888888888889e-07, |
|
"loss": 0.1808, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 47.01, |
|
"learning_rate": 3.1133333333333334e-07, |
|
"loss": 0.2032, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 47.01, |
|
"learning_rate": 3.0577777777777775e-07, |
|
"loss": 0.1924, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"learning_rate": 3.002222222222222e-07, |
|
"loss": 0.1767, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 48.01, |
|
"learning_rate": 2.9466666666666666e-07, |
|
"loss": 0.2025, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 48.01, |
|
"learning_rate": 2.8911111111111106e-07, |
|
"loss": 0.1902, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"learning_rate": 2.8355555555555557e-07, |
|
"loss": 0.174, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 49.01, |
|
"learning_rate": 2.7800000000000003e-07, |
|
"loss": 0.2009, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 49.01, |
|
"learning_rate": 2.7244444444444443e-07, |
|
"loss": 0.1884, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 49.02, |
|
"learning_rate": 2.668888888888889e-07, |
|
"loss": 0.1728, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 50.01, |
|
"learning_rate": 2.613333333333333e-07, |
|
"loss": 0.1984, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 50.01, |
|
"learning_rate": 2.5577777777777775e-07, |
|
"loss": 0.1884, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 50.02, |
|
"learning_rate": 2.502222222222222e-07, |
|
"loss": 0.1717, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"learning_rate": 2.4466666666666666e-07, |
|
"loss": 0.1927, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 51.01, |
|
"learning_rate": 2.391111111111111e-07, |
|
"loss": 0.1909, |
|
"step": 3925 |
|
}, |
|
{ |
|
"epoch": 51.01, |
|
"learning_rate": 2.3355555555555552e-07, |
|
"loss": 0.1698, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"learning_rate": 2.28e-07, |
|
"loss": 0.1901, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 52.01, |
|
"learning_rate": 2.2244444444444443e-07, |
|
"loss": 0.1897, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 52.01, |
|
"eval_loss": 0.4658238887786865, |
|
"eval_runtime": 335.2394, |
|
"eval_samples_per_second": 9.268, |
|
"eval_steps_per_second": 0.292, |
|
"eval_wer": 28.74496462190311, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 52.01, |
|
"learning_rate": 2.1688888888888886e-07, |
|
"loss": 0.1677, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"learning_rate": 2.1133333333333335e-07, |
|
"loss": 0.1899, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 53.01, |
|
"learning_rate": 2.0577777777777778e-07, |
|
"loss": 0.1883, |
|
"step": 4075 |
|
}, |
|
{ |
|
"epoch": 53.01, |
|
"learning_rate": 2.002222222222222e-07, |
|
"loss": 0.1679, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"learning_rate": 1.9466666666666664e-07, |
|
"loss": 0.1869, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 54.01, |
|
"learning_rate": 1.8911111111111112e-07, |
|
"loss": 0.1881, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 54.01, |
|
"learning_rate": 1.8355555555555555e-07, |
|
"loss": 0.1673, |
|
"step": 4175 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"learning_rate": 1.7799999999999998e-07, |
|
"loss": 0.1847, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 55.01, |
|
"learning_rate": 1.7244444444444444e-07, |
|
"loss": 0.1876, |
|
"step": 4225 |
|
}, |
|
{ |
|
"epoch": 55.01, |
|
"learning_rate": 1.668888888888889e-07, |
|
"loss": 0.1667, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"learning_rate": 1.6133333333333332e-07, |
|
"loss": 0.1807, |
|
"step": 4275 |
|
}, |
|
{ |
|
"epoch": 56.01, |
|
"learning_rate": 1.5577777777777775e-07, |
|
"loss": 0.1851, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 56.01, |
|
"learning_rate": 1.5022222222222224e-07, |
|
"loss": 0.1705, |
|
"step": 4325 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"learning_rate": 1.4466666666666667e-07, |
|
"loss": 0.1773, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 57.01, |
|
"learning_rate": 1.391111111111111e-07, |
|
"loss": 0.1905, |
|
"step": 4375 |
|
}, |
|
{ |
|
"epoch": 57.01, |
|
"learning_rate": 1.3355555555555555e-07, |
|
"loss": 0.1629, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"learning_rate": 1.28e-07, |
|
"loss": 0.1774, |
|
"step": 4425 |
|
}, |
|
{ |
|
"epoch": 58.01, |
|
"learning_rate": 1.2244444444444444e-07, |
|
"loss": 0.1853, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 58.01, |
|
"learning_rate": 1.1688888888888888e-07, |
|
"loss": 0.1659, |
|
"step": 4475 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"learning_rate": 1.1133333333333334e-07, |
|
"loss": 0.1764, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_loss": 0.4676055610179901, |
|
"eval_runtime": 335.034, |
|
"eval_samples_per_second": 9.274, |
|
"eval_steps_per_second": 0.293, |
|
"eval_wer": 28.717830978272215, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 59.01, |
|
"learning_rate": 1.0577777777777777e-07, |
|
"loss": 0.1844, |
|
"step": 4525 |
|
}, |
|
{ |
|
"epoch": 59.01, |
|
"learning_rate": 1.0022222222222222e-07, |
|
"loss": 0.1682, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"learning_rate": 9.466666666666665e-08, |
|
"loss": 0.174, |
|
"step": 4575 |
|
}, |
|
{ |
|
"epoch": 60.01, |
|
"learning_rate": 8.911111111111111e-08, |
|
"loss": 0.1835, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 60.01, |
|
"learning_rate": 8.355555555555554e-08, |
|
"loss": 0.166, |
|
"step": 4625 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"learning_rate": 7.8e-08, |
|
"loss": 0.1747, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 61.01, |
|
"learning_rate": 7.244444444444445e-08, |
|
"loss": 0.1837, |
|
"step": 4675 |
|
}, |
|
{ |
|
"epoch": 61.01, |
|
"learning_rate": 6.688888888888888e-08, |
|
"loss": 0.1673, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"learning_rate": 6.133333333333333e-08, |
|
"loss": 0.1741, |
|
"step": 4725 |
|
}, |
|
{ |
|
"epoch": 62.01, |
|
"learning_rate": 5.5777777777777777e-08, |
|
"loss": 0.1816, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 62.01, |
|
"learning_rate": 5.022222222222222e-08, |
|
"loss": 0.166, |
|
"step": 4775 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"learning_rate": 4.466666666666666e-08, |
|
"loss": 0.1725, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 63.01, |
|
"learning_rate": 3.9111111111111106e-08, |
|
"loss": 0.1849, |
|
"step": 4825 |
|
}, |
|
{ |
|
"epoch": 63.01, |
|
"learning_rate": 3.355555555555555e-08, |
|
"loss": 0.1655, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"learning_rate": 2.8e-08, |
|
"loss": 0.1708, |
|
"step": 4875 |
|
}, |
|
{ |
|
"epoch": 64.01, |
|
"learning_rate": 2.2444444444444442e-08, |
|
"loss": 0.1835, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 64.01, |
|
"learning_rate": 1.6888888888888886e-08, |
|
"loss": 0.1664, |
|
"step": 4925 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"learning_rate": 1.1333333333333334e-08, |
|
"loss": 0.1673, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 65.01, |
|
"learning_rate": 5.777777777777777e-09, |
|
"loss": 0.1839, |
|
"step": 4975 |
|
}, |
|
{ |
|
"epoch": 65.01, |
|
"learning_rate": 2.2222222222222221e-10, |
|
"loss": 0.1681, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 65.01, |
|
"eval_loss": 0.46787169575691223, |
|
"eval_runtime": 334.6695, |
|
"eval_samples_per_second": 9.284, |
|
"eval_steps_per_second": 0.293, |
|
"eval_wer": 28.648953267516852, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 65.01, |
|
"step": 5000, |
|
"total_flos": 2.06793125756928e+19, |
|
"train_loss": 0.34305178251266477, |
|
"train_runtime": 22591.6172, |
|
"train_samples_per_second": 14.165, |
|
"train_steps_per_second": 0.221 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 5000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 9223372036854775807, |
|
"save_steps": 500, |
|
"total_flos": 2.06793125756928e+19, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|