Davidsamuel101's picture
Added w2v2 with LM
e1d9525
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 5.0,
"eval_steps": 500,
"global_step": 9805,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.25497195308516063,
"grad_norm": 1.5315192937850952,
"learning_rate": 0.00010193679918450561,
"loss": 3.8945,
"step": 500
},
{
"epoch": 0.5099439061703213,
"grad_norm": 1.6545052528381348,
"learning_rate": 0.00019956935630099728,
"loss": 0.5228,
"step": 1000
},
{
"epoch": 0.7649158592554819,
"grad_norm": 1.1093895435333252,
"learning_rate": 0.0001882366273798731,
"loss": 0.3702,
"step": 1500
},
{
"epoch": 1.0,
"eval_cer": 0.13674156347728234,
"eval_loss": 0.287818968296051,
"eval_runtime": 242.9432,
"eval_samples_per_second": 92.038,
"eval_steps_per_second": 11.505,
"eval_wer": 0.3335439517042918,
"step": 1961
},
{
"epoch": 1.0198878123406425,
"grad_norm": 0.6819602847099304,
"learning_rate": 0.0001769038984587489,
"loss": 0.3114,
"step": 2000
},
{
"epoch": 1.2748597654258031,
"grad_norm": 0.7735158801078796,
"learning_rate": 0.00016557116953762465,
"loss": 0.2571,
"step": 2500
},
{
"epoch": 1.5298317185109638,
"grad_norm": 0.7301546931266785,
"learning_rate": 0.00015423844061650046,
"loss": 0.2411,
"step": 3000
},
{
"epoch": 1.7848036715961244,
"grad_norm": 0.5918829441070557,
"learning_rate": 0.00014290571169537624,
"loss": 0.2333,
"step": 3500
},
{
"epoch": 2.0,
"eval_cer": 0.12185579510710033,
"eval_loss": 0.23235103487968445,
"eval_runtime": 242.1687,
"eval_samples_per_second": 92.332,
"eval_steps_per_second": 11.542,
"eval_wer": 0.26528299554267426,
"step": 3922
},
{
"epoch": 2.039775624681285,
"grad_norm": 0.7791718244552612,
"learning_rate": 0.00013157298277425205,
"loss": 0.2157,
"step": 4000
},
{
"epoch": 2.2947475777664454,
"grad_norm": 0.5816757082939148,
"learning_rate": 0.00012024025385312785,
"loss": 0.1817,
"step": 4500
},
{
"epoch": 2.5497195308516063,
"grad_norm": 0.42094656825065613,
"learning_rate": 0.00010890752493200362,
"loss": 0.1803,
"step": 5000
},
{
"epoch": 2.804691483936767,
"grad_norm": 0.515943706035614,
"learning_rate": 9.757479601087942e-05,
"loss": 0.172,
"step": 5500
},
{
"epoch": 3.0,
"eval_cer": 0.11621454523249779,
"eval_loss": 0.213576540350914,
"eval_runtime": 242.2848,
"eval_samples_per_second": 92.288,
"eval_steps_per_second": 11.536,
"eval_wer": 0.246385720074449,
"step": 5883
},
{
"epoch": 3.0596634370219276,
"grad_norm": 0.397499680519104,
"learning_rate": 8.624206708975522e-05,
"loss": 0.1656,
"step": 6000
},
{
"epoch": 3.3146353901070884,
"grad_norm": 0.5948652625083923,
"learning_rate": 7.490933816863101e-05,
"loss": 0.1388,
"step": 6500
},
{
"epoch": 3.569607343192249,
"grad_norm": 0.5423580408096313,
"learning_rate": 6.35766092475068e-05,
"loss": 0.1383,
"step": 7000
},
{
"epoch": 3.8245792962774097,
"grad_norm": 1.2003000974655151,
"learning_rate": 5.22438803263826e-05,
"loss": 0.1331,
"step": 7500
},
{
"epoch": 4.0,
"eval_cer": 0.11265216708611207,
"eval_loss": 0.20429827272891998,
"eval_runtime": 242.3631,
"eval_samples_per_second": 92.258,
"eval_steps_per_second": 11.532,
"eval_wer": 0.22865256897179317,
"step": 7844
},
{
"epoch": 4.07955124936257,
"grad_norm": 0.8045985102653503,
"learning_rate": 4.091115140525839e-05,
"loss": 0.123,
"step": 8000
},
{
"epoch": 4.3345232024477305,
"grad_norm": 0.402927964925766,
"learning_rate": 2.957842248413418e-05,
"loss": 0.1052,
"step": 8500
},
{
"epoch": 4.589495155532891,
"grad_norm": 0.42731374502182007,
"learning_rate": 1.8245693563009974e-05,
"loss": 0.1054,
"step": 9000
},
{
"epoch": 4.844467108618052,
"grad_norm": 0.2811175286769867,
"learning_rate": 6.912964641885767e-06,
"loss": 0.1018,
"step": 9500
},
{
"epoch": 5.0,
"eval_cer": 0.10983672160892569,
"eval_loss": 0.2057330459356308,
"eval_runtime": 270.0068,
"eval_samples_per_second": 82.813,
"eval_steps_per_second": 10.352,
"eval_wer": 0.21943229191706215,
"step": 9805
},
{
"epoch": 5.0,
"step": 9805,
"total_flos": 5.6911698946882765e+19,
"train_loss": 0.39030011520891517,
"train_runtime": 4855.0155,
"train_samples_per_second": 64.613,
"train_steps_per_second": 2.02
}
],
"logging_steps": 500,
"max_steps": 9805,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 5.6911698946882765e+19,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}