csikasote's picture
End of training
99837c8 verified
{
"best_metric": 0.6922819018363953,
"best_model_checkpoint": "/scratch/skscla001/speech/results/whisper-medium-bsbigcgen-male-model/checkpoint-800",
"epoch": 4.487961476725522,
"eval_steps": 200,
"global_step": 1400,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.08025682182985554,
"grad_norm": 128.6523895263672,
"learning_rate": 4.0000000000000003e-07,
"loss": 12.3604,
"step": 25
},
{
"epoch": 0.16051364365971107,
"grad_norm": 105.59561920166016,
"learning_rate": 9.000000000000001e-07,
"loss": 10.9708,
"step": 50
},
{
"epoch": 0.24077046548956663,
"grad_norm": 80.34696197509766,
"learning_rate": 1.4000000000000001e-06,
"loss": 8.8039,
"step": 75
},
{
"epoch": 0.32102728731942215,
"grad_norm": 82.3702621459961,
"learning_rate": 1.9000000000000002e-06,
"loss": 6.3185,
"step": 100
},
{
"epoch": 0.4012841091492777,
"grad_norm": 69.2629623413086,
"learning_rate": 2.38e-06,
"loss": 5.5448,
"step": 125
},
{
"epoch": 0.48154093097913325,
"grad_norm": 57.369197845458984,
"learning_rate": 2.88e-06,
"loss": 4.7462,
"step": 150
},
{
"epoch": 0.5617977528089888,
"grad_norm": 66.99210357666016,
"learning_rate": 3.3800000000000007e-06,
"loss": 4.7684,
"step": 175
},
{
"epoch": 0.6420545746388443,
"grad_norm": 68.48348236083984,
"learning_rate": 3.88e-06,
"loss": 4.039,
"step": 200
},
{
"epoch": 0.6420545746388443,
"eval_loss": 0.9763619303703308,
"eval_runtime": 251.081,
"eval_samples_per_second": 1.713,
"eval_steps_per_second": 0.856,
"eval_wer": 0.694201564657156,
"step": 200
},
{
"epoch": 0.7223113964686998,
"grad_norm": 73.68655395507812,
"learning_rate": 4.38e-06,
"loss": 4.0338,
"step": 225
},
{
"epoch": 0.8025682182985554,
"grad_norm": 69.01504516601562,
"learning_rate": 4.880000000000001e-06,
"loss": 3.9039,
"step": 250
},
{
"epoch": 0.8828250401284109,
"grad_norm": 60.503868103027344,
"learning_rate": 5.380000000000001e-06,
"loss": 3.761,
"step": 275
},
{
"epoch": 0.9630818619582665,
"grad_norm": 53.41858673095703,
"learning_rate": 5.8800000000000005e-06,
"loss": 3.6084,
"step": 300
},
{
"epoch": 1.0417335473515248,
"grad_norm": 68.72418975830078,
"learning_rate": 6.380000000000001e-06,
"loss": 2.9504,
"step": 325
},
{
"epoch": 1.1219903691813804,
"grad_norm": 60.11503982543945,
"learning_rate": 6.88e-06,
"loss": 2.6441,
"step": 350
},
{
"epoch": 1.202247191011236,
"grad_norm": 52.51095962524414,
"learning_rate": 7.3800000000000005e-06,
"loss": 2.7013,
"step": 375
},
{
"epoch": 1.2825040128410916,
"grad_norm": 56.025001525878906,
"learning_rate": 7.88e-06,
"loss": 2.6473,
"step": 400
},
{
"epoch": 1.2825040128410916,
"eval_loss": 0.7705269455909729,
"eval_runtime": 246.2904,
"eval_samples_per_second": 1.746,
"eval_steps_per_second": 0.873,
"eval_wer": 0.5812241141279337,
"step": 400
},
{
"epoch": 1.362760834670947,
"grad_norm": 56.739906311035156,
"learning_rate": 8.380000000000001e-06,
"loss": 2.6946,
"step": 425
},
{
"epoch": 1.4430176565008026,
"grad_norm": 40.198238372802734,
"learning_rate": 8.880000000000001e-06,
"loss": 2.5734,
"step": 450
},
{
"epoch": 1.523274478330658,
"grad_norm": 53.963035583496094,
"learning_rate": 9.38e-06,
"loss": 2.5631,
"step": 475
},
{
"epoch": 1.6035313001605136,
"grad_norm": 58.04458999633789,
"learning_rate": 9.88e-06,
"loss": 2.7175,
"step": 500
},
{
"epoch": 1.6837881219903692,
"grad_norm": 76.02864074707031,
"learning_rate": 9.978482446206116e-06,
"loss": 2.9013,
"step": 525
},
{
"epoch": 1.7640449438202248,
"grad_norm": 55.0551872253418,
"learning_rate": 9.950169875424689e-06,
"loss": 2.7567,
"step": 550
},
{
"epoch": 1.8443017656500804,
"grad_norm": 45.51343536376953,
"learning_rate": 9.921857304643261e-06,
"loss": 2.5122,
"step": 575
},
{
"epoch": 1.9245585874799358,
"grad_norm": 45.42640686035156,
"learning_rate": 9.893544733861835e-06,
"loss": 2.3913,
"step": 600
},
{
"epoch": 1.9245585874799358,
"eval_loss": 0.700962483882904,
"eval_runtime": 243.9773,
"eval_samples_per_second": 1.762,
"eval_steps_per_second": 0.881,
"eval_wer": 0.5269213069489186,
"step": 600
},
{
"epoch": 2.0032102728731944,
"grad_norm": 30.623844146728516,
"learning_rate": 9.865232163080408e-06,
"loss": 2.4991,
"step": 625
},
{
"epoch": 2.0834670947030496,
"grad_norm": 52.41933059692383,
"learning_rate": 9.836919592298982e-06,
"loss": 1.1913,
"step": 650
},
{
"epoch": 2.163723916532905,
"grad_norm": 40.88090896606445,
"learning_rate": 9.808607021517554e-06,
"loss": 1.4546,
"step": 675
},
{
"epoch": 2.243980738362761,
"grad_norm": 38.167205810546875,
"learning_rate": 9.780294450736127e-06,
"loss": 1.4931,
"step": 700
},
{
"epoch": 2.3242375601926164,
"grad_norm": 31.256465911865234,
"learning_rate": 9.751981879954701e-06,
"loss": 1.4755,
"step": 725
},
{
"epoch": 2.404494382022472,
"grad_norm": 46.689884185791016,
"learning_rate": 9.723669309173273e-06,
"loss": 1.4373,
"step": 750
},
{
"epoch": 2.4847512038523276,
"grad_norm": 50.231746673583984,
"learning_rate": 9.695356738391848e-06,
"loss": 1.5603,
"step": 775
},
{
"epoch": 2.5650080256821832,
"grad_norm": 49.86467742919922,
"learning_rate": 9.66704416761042e-06,
"loss": 1.6283,
"step": 800
},
{
"epoch": 2.5650080256821832,
"eval_loss": 0.6922819018363953,
"eval_runtime": 248.6409,
"eval_samples_per_second": 1.729,
"eval_steps_per_second": 0.865,
"eval_wer": 0.5177174413253567,
"step": 800
},
{
"epoch": 2.6452648475120384,
"grad_norm": 43.58526611328125,
"learning_rate": 9.638731596828992e-06,
"loss": 1.717,
"step": 825
},
{
"epoch": 2.725521669341894,
"grad_norm": 34.9052619934082,
"learning_rate": 9.610419026047567e-06,
"loss": 1.6415,
"step": 850
},
{
"epoch": 2.8057784911717496,
"grad_norm": 45.21881103515625,
"learning_rate": 9.582106455266139e-06,
"loss": 1.4356,
"step": 875
},
{
"epoch": 2.886035313001605,
"grad_norm": 57.4312629699707,
"learning_rate": 9.553793884484713e-06,
"loss": 1.536,
"step": 900
},
{
"epoch": 2.966292134831461,
"grad_norm": 42.61119079589844,
"learning_rate": 9.525481313703286e-06,
"loss": 1.5945,
"step": 925
},
{
"epoch": 3.044943820224719,
"grad_norm": 22.571409225463867,
"learning_rate": 9.497168742921858e-06,
"loss": 0.9745,
"step": 950
},
{
"epoch": 3.125200642054575,
"grad_norm": 26.83526611328125,
"learning_rate": 9.468856172140432e-06,
"loss": 0.7011,
"step": 975
},
{
"epoch": 3.20545746388443,
"grad_norm": 32.36039733886719,
"learning_rate": 9.440543601359004e-06,
"loss": 0.7783,
"step": 1000
},
{
"epoch": 3.20545746388443,
"eval_loss": 0.7241026163101196,
"eval_runtime": 248.287,
"eval_samples_per_second": 1.732,
"eval_steps_per_second": 0.866,
"eval_wer": 0.5131155085135757,
"step": 1000
},
{
"epoch": 3.2857142857142856,
"grad_norm": 42.650146484375,
"learning_rate": 9.412231030577577e-06,
"loss": 0.6489,
"step": 1025
},
{
"epoch": 3.365971107544141,
"grad_norm": 29.87259864807129,
"learning_rate": 9.383918459796151e-06,
"loss": 0.9697,
"step": 1050
},
{
"epoch": 3.446227929373997,
"grad_norm": 32.14972686767578,
"learning_rate": 9.355605889014723e-06,
"loss": 0.7128,
"step": 1075
},
{
"epoch": 3.5264847512038524,
"grad_norm": 40.77785110473633,
"learning_rate": 9.327293318233296e-06,
"loss": 0.8084,
"step": 1100
},
{
"epoch": 3.606741573033708,
"grad_norm": 39.83961486816406,
"learning_rate": 9.298980747451868e-06,
"loss": 0.8268,
"step": 1125
},
{
"epoch": 3.686998394863563,
"grad_norm": 34.79006576538086,
"learning_rate": 9.270668176670442e-06,
"loss": 0.6832,
"step": 1150
},
{
"epoch": 3.767255216693419,
"grad_norm": 24.103979110717773,
"learning_rate": 9.242355605889015e-06,
"loss": 0.7754,
"step": 1175
},
{
"epoch": 3.8475120385232744,
"grad_norm": 42.03251647949219,
"learning_rate": 9.214043035107589e-06,
"loss": 0.8723,
"step": 1200
},
{
"epoch": 3.8475120385232744,
"eval_loss": 0.720678985118866,
"eval_runtime": 245.5357,
"eval_samples_per_second": 1.751,
"eval_steps_per_second": 0.876,
"eval_wer": 0.5140358950759318,
"step": 1200
},
{
"epoch": 3.92776886035313,
"grad_norm": 27.95992088317871,
"learning_rate": 9.185730464326161e-06,
"loss": 0.8405,
"step": 1225
},
{
"epoch": 4.006420545746389,
"grad_norm": 15.731476783752441,
"learning_rate": 9.157417893544734e-06,
"loss": 0.6524,
"step": 1250
},
{
"epoch": 4.086677367576244,
"grad_norm": 15.728172302246094,
"learning_rate": 9.129105322763308e-06,
"loss": 0.3319,
"step": 1275
},
{
"epoch": 4.166934189406099,
"grad_norm": 30.88848876953125,
"learning_rate": 9.10079275198188e-06,
"loss": 0.4029,
"step": 1300
},
{
"epoch": 4.247191011235955,
"grad_norm": 20.073183059692383,
"learning_rate": 9.072480181200455e-06,
"loss": 0.4016,
"step": 1325
},
{
"epoch": 4.32744783306581,
"grad_norm": 21.41939353942871,
"learning_rate": 9.044167610419027e-06,
"loss": 0.3585,
"step": 1350
},
{
"epoch": 4.407704654895666,
"grad_norm": 25.81800651550293,
"learning_rate": 9.0158550396376e-06,
"loss": 0.3965,
"step": 1375
},
{
"epoch": 4.487961476725522,
"grad_norm": 21.839908599853516,
"learning_rate": 8.987542468856174e-06,
"loss": 0.3633,
"step": 1400
},
{
"epoch": 4.487961476725522,
"eval_loss": 0.7589691281318665,
"eval_runtime": 247.0866,
"eval_samples_per_second": 1.74,
"eval_steps_per_second": 0.87,
"eval_wer": 0.49608835710998617,
"step": 1400
},
{
"epoch": 4.487961476725522,
"step": 1400,
"total_flos": 1.14103628660736e+19,
"train_loss": 2.465633350099836,
"train_runtime": 4460.9347,
"train_samples_per_second": 16.752,
"train_steps_per_second": 2.091
}
],
"logging_steps": 25,
"max_steps": 9330,
"num_input_tokens_seen": 0,
"num_train_epochs": 30,
"save_steps": 200,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 3,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 3
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.14103628660736e+19,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}