|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.5209125475285172, |
|
"eval_steps": 100, |
|
"global_step": 1600, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.2933333333333334e-06, |
|
"loss": 19.0829, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_loss": 19.174156188964844, |
|
"eval_runtime": 65.9422, |
|
"eval_samples_per_second": 6.491, |
|
"eval_steps_per_second": 0.819, |
|
"eval_wer": 1.0007501875468867, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.6e-06, |
|
"loss": 18.8941, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_loss": 18.407485961914062, |
|
"eval_runtime": 66.0283, |
|
"eval_samples_per_second": 6.482, |
|
"eval_steps_per_second": 0.818, |
|
"eval_wer": 1.009627406851713, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.9333333333333335e-06, |
|
"loss": 17.5359, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_loss": 17.08553123474121, |
|
"eval_runtime": 65.7138, |
|
"eval_samples_per_second": 6.513, |
|
"eval_steps_per_second": 0.822, |
|
"eval_wer": 1.248312078019505, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 5.2666666666666665e-06, |
|
"loss": 16.0015, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_loss": 15.136056900024414, |
|
"eval_runtime": 65.1801, |
|
"eval_samples_per_second": 6.566, |
|
"eval_steps_per_second": 0.828, |
|
"eval_wer": 1.3134533633408352, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 6.5866666666666666e-06, |
|
"loss": 13.3605, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_loss": 12.500285148620605, |
|
"eval_runtime": 65.5504, |
|
"eval_samples_per_second": 6.529, |
|
"eval_steps_per_second": 0.824, |
|
"eval_wer": 1.0840210052513128, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 7.92e-06, |
|
"loss": 10.8925, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_loss": 9.41340160369873, |
|
"eval_runtime": 64.6713, |
|
"eval_samples_per_second": 6.618, |
|
"eval_steps_per_second": 0.835, |
|
"eval_wer": 1.0077519379844961, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 9.253333333333333e-06, |
|
"loss": 8.1129, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_loss": 6.854950904846191, |
|
"eval_runtime": 65.9954, |
|
"eval_samples_per_second": 6.485, |
|
"eval_steps_per_second": 0.818, |
|
"eval_wer": 1.0, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.0586666666666668e-05, |
|
"loss": 6.0586, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_loss": 5.390443801879883, |
|
"eval_runtime": 65.312, |
|
"eval_samples_per_second": 6.553, |
|
"eval_steps_per_second": 0.827, |
|
"eval_wer": 1.0, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.1920000000000001e-05, |
|
"loss": 5.1935, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_loss": 4.793299674987793, |
|
"eval_runtime": 66.778, |
|
"eval_samples_per_second": 6.409, |
|
"eval_steps_per_second": 0.809, |
|
"eval_wer": 1.0, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.3253333333333334e-05, |
|
"loss": 4.7315, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_loss": 4.52764368057251, |
|
"eval_runtime": 66.1481, |
|
"eval_samples_per_second": 6.47, |
|
"eval_steps_per_second": 0.816, |
|
"eval_wer": 0.9998749687421855, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 1.4586666666666667e-05, |
|
"loss": 4.4273, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"eval_loss": 4.082287788391113, |
|
"eval_runtime": 65.6019, |
|
"eval_samples_per_second": 6.524, |
|
"eval_steps_per_second": 0.823, |
|
"eval_wer": 1.0303825956489123, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 1.5920000000000003e-05, |
|
"loss": 4.0406, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"eval_loss": 3.9247243404388428, |
|
"eval_runtime": 64.106, |
|
"eval_samples_per_second": 6.676, |
|
"eval_steps_per_second": 0.842, |
|
"eval_wer": 1.0102525631407853, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 1.7253333333333336e-05, |
|
"loss": 3.9165, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"eval_loss": 3.870806932449341, |
|
"eval_runtime": 65.1335, |
|
"eval_samples_per_second": 6.571, |
|
"eval_steps_per_second": 0.829, |
|
"eval_wer": 1.0046261565391348, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 1.858666666666667e-05, |
|
"loss": 3.9965, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"eval_loss": 3.8457767963409424, |
|
"eval_runtime": 65.82, |
|
"eval_samples_per_second": 6.503, |
|
"eval_steps_per_second": 0.82, |
|
"eval_wer": 1.0012503125781445, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 1.9920000000000002e-05, |
|
"loss": 3.8683, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"eval_loss": 3.8101820945739746, |
|
"eval_runtime": 65.6088, |
|
"eval_samples_per_second": 6.524, |
|
"eval_steps_per_second": 0.823, |
|
"eval_wer": 1.0, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.1253333333333335e-05, |
|
"loss": 3.8863, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"eval_loss": 3.795001268386841, |
|
"eval_runtime": 65.0097, |
|
"eval_samples_per_second": 6.584, |
|
"eval_steps_per_second": 0.831, |
|
"eval_wer": 1.0037509377344336, |
|
"step": 1600 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 21040, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 200, |
|
"total_flos": 3.1104086860681257e+18, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|