|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 5.0, |
|
"eval_steps": 500, |
|
"global_step": 9805, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.25497195308516063, |
|
"grad_norm": 1.5315192937850952, |
|
"learning_rate": 0.00010193679918450561, |
|
"loss": 3.8945, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.5099439061703213, |
|
"grad_norm": 1.6545052528381348, |
|
"learning_rate": 0.00019956935630099728, |
|
"loss": 0.5228, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.7649158592554819, |
|
"grad_norm": 1.1093895435333252, |
|
"learning_rate": 0.0001882366273798731, |
|
"loss": 0.3702, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_cer": 0.13674156347728234, |
|
"eval_loss": 0.287818968296051, |
|
"eval_runtime": 242.9432, |
|
"eval_samples_per_second": 92.038, |
|
"eval_steps_per_second": 11.505, |
|
"eval_wer": 0.3335439517042918, |
|
"step": 1961 |
|
}, |
|
{ |
|
"epoch": 1.0198878123406425, |
|
"grad_norm": 0.6819602847099304, |
|
"learning_rate": 0.0001769038984587489, |
|
"loss": 0.3114, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.2748597654258031, |
|
"grad_norm": 0.7735158801078796, |
|
"learning_rate": 0.00016557116953762465, |
|
"loss": 0.2571, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.5298317185109638, |
|
"grad_norm": 0.7301546931266785, |
|
"learning_rate": 0.00015423844061650046, |
|
"loss": 0.2411, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.7848036715961244, |
|
"grad_norm": 0.5918829441070557, |
|
"learning_rate": 0.00014290571169537624, |
|
"loss": 0.2333, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_cer": 0.12185579510710033, |
|
"eval_loss": 0.23235103487968445, |
|
"eval_runtime": 242.1687, |
|
"eval_samples_per_second": 92.332, |
|
"eval_steps_per_second": 11.542, |
|
"eval_wer": 0.26528299554267426, |
|
"step": 3922 |
|
}, |
|
{ |
|
"epoch": 2.039775624681285, |
|
"grad_norm": 0.7791718244552612, |
|
"learning_rate": 0.00013157298277425205, |
|
"loss": 0.2157, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.2947475777664454, |
|
"grad_norm": 0.5816757082939148, |
|
"learning_rate": 0.00012024025385312785, |
|
"loss": 0.1817, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.5497195308516063, |
|
"grad_norm": 0.42094656825065613, |
|
"learning_rate": 0.00010890752493200362, |
|
"loss": 0.1803, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.804691483936767, |
|
"grad_norm": 0.515943706035614, |
|
"learning_rate": 9.757479601087942e-05, |
|
"loss": 0.172, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_cer": 0.11621454523249779, |
|
"eval_loss": 0.213576540350914, |
|
"eval_runtime": 242.2848, |
|
"eval_samples_per_second": 92.288, |
|
"eval_steps_per_second": 11.536, |
|
"eval_wer": 0.246385720074449, |
|
"step": 5883 |
|
}, |
|
{ |
|
"epoch": 3.0596634370219276, |
|
"grad_norm": 0.397499680519104, |
|
"learning_rate": 8.624206708975522e-05, |
|
"loss": 0.1656, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 3.3146353901070884, |
|
"grad_norm": 0.5948652625083923, |
|
"learning_rate": 7.490933816863101e-05, |
|
"loss": 0.1388, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 3.569607343192249, |
|
"grad_norm": 0.5423580408096313, |
|
"learning_rate": 6.35766092475068e-05, |
|
"loss": 0.1383, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 3.8245792962774097, |
|
"grad_norm": 1.2003000974655151, |
|
"learning_rate": 5.22438803263826e-05, |
|
"loss": 0.1331, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_cer": 0.11265216708611207, |
|
"eval_loss": 0.20429827272891998, |
|
"eval_runtime": 242.3631, |
|
"eval_samples_per_second": 92.258, |
|
"eval_steps_per_second": 11.532, |
|
"eval_wer": 0.22865256897179317, |
|
"step": 7844 |
|
}, |
|
{ |
|
"epoch": 4.07955124936257, |
|
"grad_norm": 0.8045985102653503, |
|
"learning_rate": 4.091115140525839e-05, |
|
"loss": 0.123, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 4.3345232024477305, |
|
"grad_norm": 0.402927964925766, |
|
"learning_rate": 2.957842248413418e-05, |
|
"loss": 0.1052, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 4.589495155532891, |
|
"grad_norm": 0.42731374502182007, |
|
"learning_rate": 1.8245693563009974e-05, |
|
"loss": 0.1054, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 4.844467108618052, |
|
"grad_norm": 0.2811175286769867, |
|
"learning_rate": 6.912964641885767e-06, |
|
"loss": 0.1018, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_cer": 0.10983672160892569, |
|
"eval_loss": 0.2057330459356308, |
|
"eval_runtime": 270.0068, |
|
"eval_samples_per_second": 82.813, |
|
"eval_steps_per_second": 10.352, |
|
"eval_wer": 0.21943229191706215, |
|
"step": 9805 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 9805, |
|
"total_flos": 5.6911698946882765e+19, |
|
"train_loss": 0.39030011520891517, |
|
"train_runtime": 4855.0155, |
|
"train_samples_per_second": 64.613, |
|
"train_steps_per_second": 2.02 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 9805, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5.6911698946882765e+19, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|