|
{ |
|
"best_metric": 1.6565031400746337, |
|
"best_model_checkpoint": "./checkpoint-1000", |
|
"epoch": 66.66666666666667, |
|
"global_step": 1000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 2.1000000000000002e-06, |
|
"loss": 1.5608, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"eval_loss": 1.2470210790634155, |
|
"eval_runtime": 87.1381, |
|
"eval_samples_per_second": 2.743, |
|
"eval_steps_per_second": 0.344, |
|
"eval_wer": 9.884408846818967, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 4.600000000000001e-06, |
|
"loss": 0.7986, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"eval_loss": 0.46967005729675293, |
|
"eval_runtime": 113.6552, |
|
"eval_samples_per_second": 2.103, |
|
"eval_steps_per_second": 0.264, |
|
"eval_wer": 17.629926276508602, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 7.100000000000001e-06, |
|
"loss": 0.2654, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 0.23039382696151733, |
|
"eval_runtime": 129.519, |
|
"eval_samples_per_second": 1.845, |
|
"eval_steps_per_second": 0.232, |
|
"eval_wer": 19.495767725493764, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 6.67, |
|
"learning_rate": 9.600000000000001e-06, |
|
"loss": 0.1537, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 6.67, |
|
"eval_loss": 0.17667056620121002, |
|
"eval_runtime": 163.2833, |
|
"eval_samples_per_second": 1.464, |
|
"eval_steps_per_second": 0.184, |
|
"eval_wer": 80.85009556748885, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 8.33, |
|
"learning_rate": 9.766666666666667e-06, |
|
"loss": 0.0873, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 8.33, |
|
"eval_loss": 0.1339290887117386, |
|
"eval_runtime": 144.8884, |
|
"eval_samples_per_second": 1.65, |
|
"eval_steps_per_second": 0.207, |
|
"eval_wer": 12.332756894511695, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 9.48888888888889e-06, |
|
"loss": 0.0314, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 0.09715640544891357, |
|
"eval_runtime": 92.9931, |
|
"eval_samples_per_second": 2.57, |
|
"eval_steps_per_second": 0.323, |
|
"eval_wer": 2.0478747610812778, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 11.67, |
|
"learning_rate": 9.211111111111111e-06, |
|
"loss": 0.0063, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 11.67, |
|
"eval_loss": 0.11041156202554703, |
|
"eval_runtime": 92.8999, |
|
"eval_samples_per_second": 2.573, |
|
"eval_steps_per_second": 0.323, |
|
"eval_wer": 1.6838081368890507, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 13.33, |
|
"learning_rate": 8.933333333333333e-06, |
|
"loss": 0.0037, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 13.33, |
|
"eval_loss": 0.11353909224271774, |
|
"eval_runtime": 93.2005, |
|
"eval_samples_per_second": 2.564, |
|
"eval_steps_per_second": 0.322, |
|
"eval_wer": 1.9386547738236097, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 8.655555555555557e-06, |
|
"loss": 0.0027, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_loss": 0.11416648328304291, |
|
"eval_runtime": 93.2482, |
|
"eval_samples_per_second": 2.563, |
|
"eval_steps_per_second": 0.322, |
|
"eval_wer": 1.6656048056794392, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 16.67, |
|
"learning_rate": 8.377777777777779e-06, |
|
"loss": 0.0016, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 16.67, |
|
"eval_loss": 0.1213652566075325, |
|
"eval_runtime": 93.3293, |
|
"eval_samples_per_second": 2.561, |
|
"eval_steps_per_second": 0.321, |
|
"eval_wer": 1.847638117775553, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 18.33, |
|
"learning_rate": 8.1e-06, |
|
"loss": 0.0013, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 18.33, |
|
"eval_loss": 0.11827071011066437, |
|
"eval_runtime": 93.0929, |
|
"eval_samples_per_second": 2.567, |
|
"eval_steps_per_second": 0.322, |
|
"eval_wer": 1.6018931464457997, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 7.822222222222224e-06, |
|
"loss": 0.001, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_loss": 0.1215524896979332, |
|
"eval_runtime": 93.0458, |
|
"eval_samples_per_second": 2.569, |
|
"eval_steps_per_second": 0.322, |
|
"eval_wer": 1.7384181305178845, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 21.67, |
|
"learning_rate": 7.544444444444445e-06, |
|
"loss": 0.0009, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 21.67, |
|
"eval_loss": 0.12276798486709595, |
|
"eval_runtime": 92.9939, |
|
"eval_samples_per_second": 2.57, |
|
"eval_steps_per_second": 0.323, |
|
"eval_wer": 1.6291981432602167, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 23.33, |
|
"learning_rate": 7.266666666666668e-06, |
|
"loss": 0.0008, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 23.33, |
|
"eval_loss": 0.12465143948793411, |
|
"eval_runtime": 92.9329, |
|
"eval_samples_per_second": 2.572, |
|
"eval_steps_per_second": 0.323, |
|
"eval_wer": 1.6656048056794392, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"learning_rate": 6.9888888888888895e-06, |
|
"loss": 0.0008, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_loss": 0.12543533742427826, |
|
"eval_runtime": 93.1244, |
|
"eval_samples_per_second": 2.566, |
|
"eval_steps_per_second": 0.322, |
|
"eval_wer": 1.647401474469828, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 26.67, |
|
"learning_rate": 6.711111111111111e-06, |
|
"loss": 0.0007, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 26.67, |
|
"eval_loss": 0.12520712614059448, |
|
"eval_runtime": 93.0139, |
|
"eval_samples_per_second": 2.57, |
|
"eval_steps_per_second": 0.323, |
|
"eval_wer": 1.6291981432602167, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 28.33, |
|
"learning_rate": 6.433333333333333e-06, |
|
"loss": 0.0007, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 28.33, |
|
"eval_loss": 0.1271507740020752, |
|
"eval_runtime": 92.9162, |
|
"eval_samples_per_second": 2.572, |
|
"eval_steps_per_second": 0.323, |
|
"eval_wer": 1.6291981432602167, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"learning_rate": 6.155555555555556e-06, |
|
"loss": 0.0007, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_loss": 0.12834006547927856, |
|
"eval_runtime": 93.0797, |
|
"eval_samples_per_second": 2.568, |
|
"eval_steps_per_second": 0.322, |
|
"eval_wer": 1.702011468098662, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 31.67, |
|
"learning_rate": 5.877777777777778e-06, |
|
"loss": 0.0006, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 31.67, |
|
"eval_loss": 0.12884125113487244, |
|
"eval_runtime": 92.7701, |
|
"eval_samples_per_second": 2.576, |
|
"eval_steps_per_second": 0.323, |
|
"eval_wer": 1.6656048056794392, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 33.33, |
|
"learning_rate": 5.600000000000001e-06, |
|
"loss": 0.0006, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 33.33, |
|
"eval_loss": 0.12947146594524384, |
|
"eval_runtime": 92.971, |
|
"eval_samples_per_second": 2.571, |
|
"eval_steps_per_second": 0.323, |
|
"eval_wer": 1.702011468098662, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"learning_rate": 5.322222222222223e-06, |
|
"loss": 0.0006, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_loss": 0.12980106472969055, |
|
"eval_runtime": 92.8564, |
|
"eval_samples_per_second": 2.574, |
|
"eval_steps_per_second": 0.323, |
|
"eval_wer": 1.6656048056794392, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 36.67, |
|
"learning_rate": 5.044444444444445e-06, |
|
"loss": 0.0006, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 36.67, |
|
"eval_loss": 0.13186444342136383, |
|
"eval_runtime": 93.0759, |
|
"eval_samples_per_second": 2.568, |
|
"eval_steps_per_second": 0.322, |
|
"eval_wer": 1.702011468098662, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 38.33, |
|
"learning_rate": 4.766666666666667e-06, |
|
"loss": 0.0006, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 38.33, |
|
"eval_loss": 0.13074839115142822, |
|
"eval_runtime": 93.1217, |
|
"eval_samples_per_second": 2.567, |
|
"eval_steps_per_second": 0.322, |
|
"eval_wer": 1.729316464913079, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"learning_rate": 4.488888888888889e-06, |
|
"loss": 0.0005, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_loss": 0.1326516568660736, |
|
"eval_runtime": 92.8713, |
|
"eval_samples_per_second": 2.573, |
|
"eval_steps_per_second": 0.323, |
|
"eval_wer": 1.7475197961226905, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 41.67, |
|
"learning_rate": 4.211111111111112e-06, |
|
"loss": 0.0005, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 41.67, |
|
"eval_loss": 0.13262800872325897, |
|
"eval_runtime": 92.9841, |
|
"eval_samples_per_second": 2.57, |
|
"eval_steps_per_second": 0.323, |
|
"eval_wer": 1.702011468098662, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 43.33, |
|
"learning_rate": 3.9333333333333335e-06, |
|
"loss": 0.0005, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 43.33, |
|
"eval_loss": 0.13290846347808838, |
|
"eval_runtime": 92.8115, |
|
"eval_samples_per_second": 2.575, |
|
"eval_steps_per_second": 0.323, |
|
"eval_wer": 1.647401474469828, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"learning_rate": 3.6555555555555562e-06, |
|
"loss": 0.0005, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_loss": 0.1338340789079666, |
|
"eval_runtime": 93.0235, |
|
"eval_samples_per_second": 2.569, |
|
"eval_steps_per_second": 0.322, |
|
"eval_wer": 1.702011468098662, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 46.67, |
|
"learning_rate": 3.377777777777778e-06, |
|
"loss": 0.0005, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 46.67, |
|
"eval_loss": 0.1339310258626938, |
|
"eval_runtime": 92.9221, |
|
"eval_samples_per_second": 2.572, |
|
"eval_steps_per_second": 0.323, |
|
"eval_wer": 1.674706471284245, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 48.33, |
|
"learning_rate": 3.1000000000000004e-06, |
|
"loss": 0.0005, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 48.33, |
|
"eval_loss": 0.13417784869670868, |
|
"eval_runtime": 92.8791, |
|
"eval_samples_per_second": 2.573, |
|
"eval_steps_per_second": 0.323, |
|
"eval_wer": 1.7111131337034675, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"learning_rate": 2.8222222222222223e-06, |
|
"loss": 0.0005, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_loss": 0.13492102921009064, |
|
"eval_runtime": 92.778, |
|
"eval_samples_per_second": 2.576, |
|
"eval_steps_per_second": 0.323, |
|
"eval_wer": 1.7111131337034675, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 51.67, |
|
"learning_rate": 2.5444444444444446e-06, |
|
"loss": 0.0004, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 51.67, |
|
"eval_loss": 0.13491509854793549, |
|
"eval_runtime": 92.9969, |
|
"eval_samples_per_second": 2.57, |
|
"eval_steps_per_second": 0.323, |
|
"eval_wer": 1.7111131337034675, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 53.33, |
|
"learning_rate": 2.266666666666667e-06, |
|
"loss": 0.0005, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 53.33, |
|
"eval_loss": 0.13460427522659302, |
|
"eval_runtime": 92.9646, |
|
"eval_samples_per_second": 2.571, |
|
"eval_steps_per_second": 0.323, |
|
"eval_wer": 1.6565031400746337, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"learning_rate": 1.988888888888889e-06, |
|
"loss": 0.0005, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_loss": 0.13541632890701294, |
|
"eval_runtime": 92.8779, |
|
"eval_samples_per_second": 2.573, |
|
"eval_steps_per_second": 0.323, |
|
"eval_wer": 1.7111131337034675, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 56.67, |
|
"learning_rate": 1.7111111111111112e-06, |
|
"loss": 0.0004, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 56.67, |
|
"eval_loss": 0.13564594089984894, |
|
"eval_runtime": 92.7968, |
|
"eval_samples_per_second": 2.576, |
|
"eval_steps_per_second": 0.323, |
|
"eval_wer": 1.7111131337034675, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 58.33, |
|
"learning_rate": 1.4333333333333335e-06, |
|
"loss": 0.0004, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 58.33, |
|
"eval_loss": 0.13617311418056488, |
|
"eval_runtime": 93.0052, |
|
"eval_samples_per_second": 2.57, |
|
"eval_steps_per_second": 0.323, |
|
"eval_wer": 1.6565031400746337, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"learning_rate": 1.1555555555555556e-06, |
|
"loss": 0.0004, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_loss": 0.13621629774570465, |
|
"eval_runtime": 92.7177, |
|
"eval_samples_per_second": 2.578, |
|
"eval_steps_per_second": 0.324, |
|
"eval_wer": 1.7111131337034675, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 61.67, |
|
"learning_rate": 8.777777777777778e-07, |
|
"loss": 0.0004, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 61.67, |
|
"eval_loss": 0.1365342140197754, |
|
"eval_runtime": 92.7764, |
|
"eval_samples_per_second": 2.576, |
|
"eval_steps_per_second": 0.323, |
|
"eval_wer": 1.6565031400746337, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 63.33, |
|
"learning_rate": 6.000000000000001e-07, |
|
"loss": 0.0004, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 63.33, |
|
"eval_loss": 0.13664031028747559, |
|
"eval_runtime": 92.9018, |
|
"eval_samples_per_second": 2.573, |
|
"eval_steps_per_second": 0.323, |
|
"eval_wer": 1.6565031400746337, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"learning_rate": 3.2222222222222227e-07, |
|
"loss": 0.0004, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"eval_loss": 0.13669337332248688, |
|
"eval_runtime": 93.0078, |
|
"eval_samples_per_second": 2.57, |
|
"eval_steps_per_second": 0.323, |
|
"eval_wer": 1.6565031400746337, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 66.67, |
|
"learning_rate": 4.444444444444445e-08, |
|
"loss": 0.0004, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 66.67, |
|
"eval_loss": 0.1367739737033844, |
|
"eval_runtime": 92.9503, |
|
"eval_samples_per_second": 2.571, |
|
"eval_steps_per_second": 0.323, |
|
"eval_wer": 1.6565031400746337, |
|
"step": 1000 |
|
} |
|
], |
|
"max_steps": 1000, |
|
"num_train_epochs": 67, |
|
"total_flos": 1.839327907479552e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|