|
{ |
|
"best_metric": 0.6922819018363953, |
|
"best_model_checkpoint": "/scratch/skscla001/speech/results/whisper-medium-bsbigcgen-male-model/checkpoint-800", |
|
"epoch": 4.487961476725522, |
|
"eval_steps": 200, |
|
"global_step": 1400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.08025682182985554, |
|
"grad_norm": 128.6523895263672, |
|
"learning_rate": 4.0000000000000003e-07, |
|
"loss": 12.3604, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.16051364365971107, |
|
"grad_norm": 105.59561920166016, |
|
"learning_rate": 9.000000000000001e-07, |
|
"loss": 10.9708, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.24077046548956663, |
|
"grad_norm": 80.34696197509766, |
|
"learning_rate": 1.4000000000000001e-06, |
|
"loss": 8.8039, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.32102728731942215, |
|
"grad_norm": 82.3702621459961, |
|
"learning_rate": 1.9000000000000002e-06, |
|
"loss": 6.3185, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.4012841091492777, |
|
"grad_norm": 69.2629623413086, |
|
"learning_rate": 2.38e-06, |
|
"loss": 5.5448, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.48154093097913325, |
|
"grad_norm": 57.369197845458984, |
|
"learning_rate": 2.88e-06, |
|
"loss": 4.7462, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.5617977528089888, |
|
"grad_norm": 66.99210357666016, |
|
"learning_rate": 3.3800000000000007e-06, |
|
"loss": 4.7684, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.6420545746388443, |
|
"grad_norm": 68.48348236083984, |
|
"learning_rate": 3.88e-06, |
|
"loss": 4.039, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.6420545746388443, |
|
"eval_loss": 0.9763619303703308, |
|
"eval_runtime": 251.081, |
|
"eval_samples_per_second": 1.713, |
|
"eval_steps_per_second": 0.856, |
|
"eval_wer": 0.694201564657156, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.7223113964686998, |
|
"grad_norm": 73.68655395507812, |
|
"learning_rate": 4.38e-06, |
|
"loss": 4.0338, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.8025682182985554, |
|
"grad_norm": 69.01504516601562, |
|
"learning_rate": 4.880000000000001e-06, |
|
"loss": 3.9039, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.8828250401284109, |
|
"grad_norm": 60.503868103027344, |
|
"learning_rate": 5.380000000000001e-06, |
|
"loss": 3.761, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.9630818619582665, |
|
"grad_norm": 53.41858673095703, |
|
"learning_rate": 5.8800000000000005e-06, |
|
"loss": 3.6084, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.0417335473515248, |
|
"grad_norm": 68.72418975830078, |
|
"learning_rate": 6.380000000000001e-06, |
|
"loss": 2.9504, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 1.1219903691813804, |
|
"grad_norm": 60.11503982543945, |
|
"learning_rate": 6.88e-06, |
|
"loss": 2.6441, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.202247191011236, |
|
"grad_norm": 52.51095962524414, |
|
"learning_rate": 7.3800000000000005e-06, |
|
"loss": 2.7013, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 1.2825040128410916, |
|
"grad_norm": 56.025001525878906, |
|
"learning_rate": 7.88e-06, |
|
"loss": 2.6473, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.2825040128410916, |
|
"eval_loss": 0.7705269455909729, |
|
"eval_runtime": 246.2904, |
|
"eval_samples_per_second": 1.746, |
|
"eval_steps_per_second": 0.873, |
|
"eval_wer": 0.5812241141279337, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.362760834670947, |
|
"grad_norm": 56.739906311035156, |
|
"learning_rate": 8.380000000000001e-06, |
|
"loss": 2.6946, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 1.4430176565008026, |
|
"grad_norm": 40.198238372802734, |
|
"learning_rate": 8.880000000000001e-06, |
|
"loss": 2.5734, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.523274478330658, |
|
"grad_norm": 53.963035583496094, |
|
"learning_rate": 9.38e-06, |
|
"loss": 2.5631, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 1.6035313001605136, |
|
"grad_norm": 58.04458999633789, |
|
"learning_rate": 9.88e-06, |
|
"loss": 2.7175, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.6837881219903692, |
|
"grad_norm": 76.02864074707031, |
|
"learning_rate": 9.978482446206116e-06, |
|
"loss": 2.9013, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 1.7640449438202248, |
|
"grad_norm": 55.0551872253418, |
|
"learning_rate": 9.950169875424689e-06, |
|
"loss": 2.7567, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.8443017656500804, |
|
"grad_norm": 45.51343536376953, |
|
"learning_rate": 9.921857304643261e-06, |
|
"loss": 2.5122, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 1.9245585874799358, |
|
"grad_norm": 45.42640686035156, |
|
"learning_rate": 9.893544733861835e-06, |
|
"loss": 2.3913, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.9245585874799358, |
|
"eval_loss": 0.700962483882904, |
|
"eval_runtime": 243.9773, |
|
"eval_samples_per_second": 1.762, |
|
"eval_steps_per_second": 0.881, |
|
"eval_wer": 0.5269213069489186, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.0032102728731944, |
|
"grad_norm": 30.623844146728516, |
|
"learning_rate": 9.865232163080408e-06, |
|
"loss": 2.4991, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 2.0834670947030496, |
|
"grad_norm": 52.41933059692383, |
|
"learning_rate": 9.836919592298982e-06, |
|
"loss": 1.1913, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 2.163723916532905, |
|
"grad_norm": 40.88090896606445, |
|
"learning_rate": 9.808607021517554e-06, |
|
"loss": 1.4546, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 2.243980738362761, |
|
"grad_norm": 38.167205810546875, |
|
"learning_rate": 9.780294450736127e-06, |
|
"loss": 1.4931, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.3242375601926164, |
|
"grad_norm": 31.256465911865234, |
|
"learning_rate": 9.751981879954701e-06, |
|
"loss": 1.4755, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 2.404494382022472, |
|
"grad_norm": 46.689884185791016, |
|
"learning_rate": 9.723669309173273e-06, |
|
"loss": 1.4373, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 2.4847512038523276, |
|
"grad_norm": 50.231746673583984, |
|
"learning_rate": 9.695356738391848e-06, |
|
"loss": 1.5603, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 2.5650080256821832, |
|
"grad_norm": 49.86467742919922, |
|
"learning_rate": 9.66704416761042e-06, |
|
"loss": 1.6283, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.5650080256821832, |
|
"eval_loss": 0.6922819018363953, |
|
"eval_runtime": 248.6409, |
|
"eval_samples_per_second": 1.729, |
|
"eval_steps_per_second": 0.865, |
|
"eval_wer": 0.5177174413253567, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.6452648475120384, |
|
"grad_norm": 43.58526611328125, |
|
"learning_rate": 9.638731596828992e-06, |
|
"loss": 1.717, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 2.725521669341894, |
|
"grad_norm": 34.9052619934082, |
|
"learning_rate": 9.610419026047567e-06, |
|
"loss": 1.6415, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 2.8057784911717496, |
|
"grad_norm": 45.21881103515625, |
|
"learning_rate": 9.582106455266139e-06, |
|
"loss": 1.4356, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 2.886035313001605, |
|
"grad_norm": 57.4312629699707, |
|
"learning_rate": 9.553793884484713e-06, |
|
"loss": 1.536, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.966292134831461, |
|
"grad_norm": 42.61119079589844, |
|
"learning_rate": 9.525481313703286e-06, |
|
"loss": 1.5945, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 3.044943820224719, |
|
"grad_norm": 22.571409225463867, |
|
"learning_rate": 9.497168742921858e-06, |
|
"loss": 0.9745, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 3.125200642054575, |
|
"grad_norm": 26.83526611328125, |
|
"learning_rate": 9.468856172140432e-06, |
|
"loss": 0.7011, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 3.20545746388443, |
|
"grad_norm": 32.36039733886719, |
|
"learning_rate": 9.440543601359004e-06, |
|
"loss": 0.7783, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.20545746388443, |
|
"eval_loss": 0.7241026163101196, |
|
"eval_runtime": 248.287, |
|
"eval_samples_per_second": 1.732, |
|
"eval_steps_per_second": 0.866, |
|
"eval_wer": 0.5131155085135757, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.2857142857142856, |
|
"grad_norm": 42.650146484375, |
|
"learning_rate": 9.412231030577577e-06, |
|
"loss": 0.6489, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 3.365971107544141, |
|
"grad_norm": 29.87259864807129, |
|
"learning_rate": 9.383918459796151e-06, |
|
"loss": 0.9697, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 3.446227929373997, |
|
"grad_norm": 32.14972686767578, |
|
"learning_rate": 9.355605889014723e-06, |
|
"loss": 0.7128, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 3.5264847512038524, |
|
"grad_norm": 40.77785110473633, |
|
"learning_rate": 9.327293318233296e-06, |
|
"loss": 0.8084, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 3.606741573033708, |
|
"grad_norm": 39.83961486816406, |
|
"learning_rate": 9.298980747451868e-06, |
|
"loss": 0.8268, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 3.686998394863563, |
|
"grad_norm": 34.79006576538086, |
|
"learning_rate": 9.270668176670442e-06, |
|
"loss": 0.6832, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 3.767255216693419, |
|
"grad_norm": 24.103979110717773, |
|
"learning_rate": 9.242355605889015e-06, |
|
"loss": 0.7754, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 3.8475120385232744, |
|
"grad_norm": 42.03251647949219, |
|
"learning_rate": 9.214043035107589e-06, |
|
"loss": 0.8723, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.8475120385232744, |
|
"eval_loss": 0.720678985118866, |
|
"eval_runtime": 245.5357, |
|
"eval_samples_per_second": 1.751, |
|
"eval_steps_per_second": 0.876, |
|
"eval_wer": 0.5140358950759318, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.92776886035313, |
|
"grad_norm": 27.95992088317871, |
|
"learning_rate": 9.185730464326161e-06, |
|
"loss": 0.8405, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 4.006420545746389, |
|
"grad_norm": 15.731476783752441, |
|
"learning_rate": 9.157417893544734e-06, |
|
"loss": 0.6524, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 4.086677367576244, |
|
"grad_norm": 15.728172302246094, |
|
"learning_rate": 9.129105322763308e-06, |
|
"loss": 0.3319, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 4.166934189406099, |
|
"grad_norm": 30.88848876953125, |
|
"learning_rate": 9.10079275198188e-06, |
|
"loss": 0.4029, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 4.247191011235955, |
|
"grad_norm": 20.073183059692383, |
|
"learning_rate": 9.072480181200455e-06, |
|
"loss": 0.4016, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 4.32744783306581, |
|
"grad_norm": 21.41939353942871, |
|
"learning_rate": 9.044167610419027e-06, |
|
"loss": 0.3585, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 4.407704654895666, |
|
"grad_norm": 25.81800651550293, |
|
"learning_rate": 9.0158550396376e-06, |
|
"loss": 0.3965, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 4.487961476725522, |
|
"grad_norm": 21.839908599853516, |
|
"learning_rate": 8.987542468856174e-06, |
|
"loss": 0.3633, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 4.487961476725522, |
|
"eval_loss": 0.7589691281318665, |
|
"eval_runtime": 247.0866, |
|
"eval_samples_per_second": 1.74, |
|
"eval_steps_per_second": 0.87, |
|
"eval_wer": 0.49608835710998617, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 4.487961476725522, |
|
"step": 1400, |
|
"total_flos": 1.14103628660736e+19, |
|
"train_loss": 2.465633350099836, |
|
"train_runtime": 4460.9347, |
|
"train_samples_per_second": 16.752, |
|
"train_steps_per_second": 2.091 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 9330, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 30, |
|
"save_steps": 200, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 3 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.14103628660736e+19, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|