hiba2's picture
Training in progress, step 200
ce20d4f
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.5209125475285172,
"eval_steps": 100,
"global_step": 1600,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.1,
"learning_rate": 1.2933333333333334e-06,
"loss": 19.0829,
"step": 100
},
{
"epoch": 0.1,
"eval_loss": 19.174156188964844,
"eval_runtime": 65.9422,
"eval_samples_per_second": 6.491,
"eval_steps_per_second": 0.819,
"eval_wer": 1.0007501875468867,
"step": 100
},
{
"epoch": 0.19,
"learning_rate": 2.6e-06,
"loss": 18.8941,
"step": 200
},
{
"epoch": 0.19,
"eval_loss": 18.407485961914062,
"eval_runtime": 66.0283,
"eval_samples_per_second": 6.482,
"eval_steps_per_second": 0.818,
"eval_wer": 1.009627406851713,
"step": 200
},
{
"epoch": 0.29,
"learning_rate": 3.9333333333333335e-06,
"loss": 17.5359,
"step": 300
},
{
"epoch": 0.29,
"eval_loss": 17.08553123474121,
"eval_runtime": 65.7138,
"eval_samples_per_second": 6.513,
"eval_steps_per_second": 0.822,
"eval_wer": 1.248312078019505,
"step": 300
},
{
"epoch": 0.38,
"learning_rate": 5.2666666666666665e-06,
"loss": 16.0015,
"step": 400
},
{
"epoch": 0.38,
"eval_loss": 15.136056900024414,
"eval_runtime": 65.1801,
"eval_samples_per_second": 6.566,
"eval_steps_per_second": 0.828,
"eval_wer": 1.3134533633408352,
"step": 400
},
{
"epoch": 0.48,
"learning_rate": 6.5866666666666666e-06,
"loss": 13.3605,
"step": 500
},
{
"epoch": 0.48,
"eval_loss": 12.500285148620605,
"eval_runtime": 65.5504,
"eval_samples_per_second": 6.529,
"eval_steps_per_second": 0.824,
"eval_wer": 1.0840210052513128,
"step": 500
},
{
"epoch": 0.57,
"learning_rate": 7.92e-06,
"loss": 10.8925,
"step": 600
},
{
"epoch": 0.57,
"eval_loss": 9.41340160369873,
"eval_runtime": 64.6713,
"eval_samples_per_second": 6.618,
"eval_steps_per_second": 0.835,
"eval_wer": 1.0077519379844961,
"step": 600
},
{
"epoch": 0.67,
"learning_rate": 9.253333333333333e-06,
"loss": 8.1129,
"step": 700
},
{
"epoch": 0.67,
"eval_loss": 6.854950904846191,
"eval_runtime": 65.9954,
"eval_samples_per_second": 6.485,
"eval_steps_per_second": 0.818,
"eval_wer": 1.0,
"step": 700
},
{
"epoch": 0.76,
"learning_rate": 1.0586666666666668e-05,
"loss": 6.0586,
"step": 800
},
{
"epoch": 0.76,
"eval_loss": 5.390443801879883,
"eval_runtime": 65.312,
"eval_samples_per_second": 6.553,
"eval_steps_per_second": 0.827,
"eval_wer": 1.0,
"step": 800
},
{
"epoch": 0.86,
"learning_rate": 1.1920000000000001e-05,
"loss": 5.1935,
"step": 900
},
{
"epoch": 0.86,
"eval_loss": 4.793299674987793,
"eval_runtime": 66.778,
"eval_samples_per_second": 6.409,
"eval_steps_per_second": 0.809,
"eval_wer": 1.0,
"step": 900
},
{
"epoch": 0.95,
"learning_rate": 1.3253333333333334e-05,
"loss": 4.7315,
"step": 1000
},
{
"epoch": 0.95,
"eval_loss": 4.52764368057251,
"eval_runtime": 66.1481,
"eval_samples_per_second": 6.47,
"eval_steps_per_second": 0.816,
"eval_wer": 0.9998749687421855,
"step": 1000
},
{
"epoch": 1.05,
"learning_rate": 1.4586666666666667e-05,
"loss": 4.4273,
"step": 1100
},
{
"epoch": 1.05,
"eval_loss": 4.082287788391113,
"eval_runtime": 65.6019,
"eval_samples_per_second": 6.524,
"eval_steps_per_second": 0.823,
"eval_wer": 1.0303825956489123,
"step": 1100
},
{
"epoch": 1.14,
"learning_rate": 1.5920000000000003e-05,
"loss": 4.0406,
"step": 1200
},
{
"epoch": 1.14,
"eval_loss": 3.9247243404388428,
"eval_runtime": 64.106,
"eval_samples_per_second": 6.676,
"eval_steps_per_second": 0.842,
"eval_wer": 1.0102525631407853,
"step": 1200
},
{
"epoch": 1.24,
"learning_rate": 1.7253333333333336e-05,
"loss": 3.9165,
"step": 1300
},
{
"epoch": 1.24,
"eval_loss": 3.870806932449341,
"eval_runtime": 65.1335,
"eval_samples_per_second": 6.571,
"eval_steps_per_second": 0.829,
"eval_wer": 1.0046261565391348,
"step": 1300
},
{
"epoch": 1.33,
"learning_rate": 1.858666666666667e-05,
"loss": 3.9965,
"step": 1400
},
{
"epoch": 1.33,
"eval_loss": 3.8457767963409424,
"eval_runtime": 65.82,
"eval_samples_per_second": 6.503,
"eval_steps_per_second": 0.82,
"eval_wer": 1.0012503125781445,
"step": 1400
},
{
"epoch": 1.43,
"learning_rate": 1.9920000000000002e-05,
"loss": 3.8683,
"step": 1500
},
{
"epoch": 1.43,
"eval_loss": 3.8101820945739746,
"eval_runtime": 65.6088,
"eval_samples_per_second": 6.524,
"eval_steps_per_second": 0.823,
"eval_wer": 1.0,
"step": 1500
},
{
"epoch": 1.52,
"learning_rate": 2.1253333333333335e-05,
"loss": 3.8863,
"step": 1600
},
{
"epoch": 1.52,
"eval_loss": 3.795001268386841,
"eval_runtime": 65.0097,
"eval_samples_per_second": 6.584,
"eval_steps_per_second": 0.831,
"eval_wer": 1.0037509377344336,
"step": 1600
}
],
"logging_steps": 100,
"max_steps": 21040,
"num_input_tokens_seen": 0,
"num_train_epochs": 20,
"save_steps": 200,
"total_flos": 3.1104086860681257e+18,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}