whisper-medium-bsbigcgen-male-model / trainer_state.json

End of training

99837c8 verified 2 months ago

12.6 kB

	{
	"best_metric": 0.6922819018363953,
	"best_model_checkpoint": "/scratch/skscla001/speech/results/whisper-medium-bsbigcgen-male-model/checkpoint-800",
	"epoch": 4.487961476725522,
	"eval_steps": 200,
	"global_step": 1400,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.08025682182985554,
	"grad_norm": 128.6523895263672,
	"learning_rate": 4.0000000000000003e-07,
	"loss": 12.3604,
	"step": 25
	},
	{
	"epoch": 0.16051364365971107,
	"grad_norm": 105.59561920166016,
	"learning_rate": 9.000000000000001e-07,
	"loss": 10.9708,
	"step": 50
	},
	{
	"epoch": 0.24077046548956663,
	"grad_norm": 80.34696197509766,
	"learning_rate": 1.4000000000000001e-06,
	"loss": 8.8039,
	"step": 75
	},
	{
	"epoch": 0.32102728731942215,
	"grad_norm": 82.3702621459961,
	"learning_rate": 1.9000000000000002e-06,
	"loss": 6.3185,
	"step": 100
	},
	{
	"epoch": 0.4012841091492777,
	"grad_norm": 69.2629623413086,
	"learning_rate": 2.38e-06,
	"loss": 5.5448,
	"step": 125
	},
	{
	"epoch": 0.48154093097913325,
	"grad_norm": 57.369197845458984,
	"learning_rate": 2.88e-06,
	"loss": 4.7462,
	"step": 150
	},
	{
	"epoch": 0.5617977528089888,
	"grad_norm": 66.99210357666016,
	"learning_rate": 3.3800000000000007e-06,
	"loss": 4.7684,
	"step": 175
	},
	{
	"epoch": 0.6420545746388443,
	"grad_norm": 68.48348236083984,
	"learning_rate": 3.88e-06,
	"loss": 4.039,
	"step": 200
	},
	{
	"epoch": 0.6420545746388443,
	"eval_loss": 0.9763619303703308,
	"eval_runtime": 251.081,
	"eval_samples_per_second": 1.713,
	"eval_steps_per_second": 0.856,
	"eval_wer": 0.694201564657156,
	"step": 200
	},
	{
	"epoch": 0.7223113964686998,
	"grad_norm": 73.68655395507812,
	"learning_rate": 4.38e-06,
	"loss": 4.0338,
	"step": 225
	},
	{
	"epoch": 0.8025682182985554,
	"grad_norm": 69.01504516601562,
	"learning_rate": 4.880000000000001e-06,
	"loss": 3.9039,
	"step": 250
	},
	{
	"epoch": 0.8828250401284109,
	"grad_norm": 60.503868103027344,
	"learning_rate": 5.380000000000001e-06,
	"loss": 3.761,
	"step": 275
	},
	{
	"epoch": 0.9630818619582665,
	"grad_norm": 53.41858673095703,
	"learning_rate": 5.8800000000000005e-06,
	"loss": 3.6084,
	"step": 300
	},
	{
	"epoch": 1.0417335473515248,
	"grad_norm": 68.72418975830078,
	"learning_rate": 6.380000000000001e-06,
	"loss": 2.9504,
	"step": 325
	},
	{
	"epoch": 1.1219903691813804,
	"grad_norm": 60.11503982543945,
	"learning_rate": 6.88e-06,
	"loss": 2.6441,
	"step": 350
	},
	{
	"epoch": 1.202247191011236,
	"grad_norm": 52.51095962524414,
	"learning_rate": 7.3800000000000005e-06,
	"loss": 2.7013,
	"step": 375
	},
	{
	"epoch": 1.2825040128410916,
	"grad_norm": 56.025001525878906,
	"learning_rate": 7.88e-06,
	"loss": 2.6473,
	"step": 400
	},
	{
	"epoch": 1.2825040128410916,
	"eval_loss": 0.7705269455909729,
	"eval_runtime": 246.2904,
	"eval_samples_per_second": 1.746,
	"eval_steps_per_second": 0.873,
	"eval_wer": 0.5812241141279337,
	"step": 400
	},
	{
	"epoch": 1.362760834670947,
	"grad_norm": 56.739906311035156,
	"learning_rate": 8.380000000000001e-06,
	"loss": 2.6946,
	"step": 425
	},
	{
	"epoch": 1.4430176565008026,
	"grad_norm": 40.198238372802734,
	"learning_rate": 8.880000000000001e-06,
	"loss": 2.5734,
	"step": 450
	},
	{
	"epoch": 1.523274478330658,
	"grad_norm": 53.963035583496094,
	"learning_rate": 9.38e-06,
	"loss": 2.5631,
	"step": 475
	},
	{
	"epoch": 1.6035313001605136,
	"grad_norm": 58.04458999633789,
	"learning_rate": 9.88e-06,
	"loss": 2.7175,
	"step": 500
	},
	{
	"epoch": 1.6837881219903692,
	"grad_norm": 76.02864074707031,
	"learning_rate": 9.978482446206116e-06,
	"loss": 2.9013,
	"step": 525
	},
	{
	"epoch": 1.7640449438202248,
	"grad_norm": 55.0551872253418,
	"learning_rate": 9.950169875424689e-06,
	"loss": 2.7567,
	"step": 550
	},
	{
	"epoch": 1.8443017656500804,
	"grad_norm": 45.51343536376953,
	"learning_rate": 9.921857304643261e-06,
	"loss": 2.5122,
	"step": 575
	},
	{
	"epoch": 1.9245585874799358,
	"grad_norm": 45.42640686035156,
	"learning_rate": 9.893544733861835e-06,
	"loss": 2.3913,
	"step": 600
	},
	{
	"epoch": 1.9245585874799358,
	"eval_loss": 0.700962483882904,
	"eval_runtime": 243.9773,
	"eval_samples_per_second": 1.762,
	"eval_steps_per_second": 0.881,
	"eval_wer": 0.5269213069489186,
	"step": 600
	},
	{
	"epoch": 2.0032102728731944,
	"grad_norm": 30.623844146728516,
	"learning_rate": 9.865232163080408e-06,
	"loss": 2.4991,
	"step": 625
	},
	{
	"epoch": 2.0834670947030496,
	"grad_norm": 52.41933059692383,
	"learning_rate": 9.836919592298982e-06,
	"loss": 1.1913,
	"step": 650
	},
	{
	"epoch": 2.163723916532905,
	"grad_norm": 40.88090896606445,
	"learning_rate": 9.808607021517554e-06,
	"loss": 1.4546,
	"step": 675
	},
	{
	"epoch": 2.243980738362761,
	"grad_norm": 38.167205810546875,
	"learning_rate": 9.780294450736127e-06,
	"loss": 1.4931,
	"step": 700
	},
	{
	"epoch": 2.3242375601926164,
	"grad_norm": 31.256465911865234,
	"learning_rate": 9.751981879954701e-06,
	"loss": 1.4755,
	"step": 725
	},
	{
	"epoch": 2.404494382022472,
	"grad_norm": 46.689884185791016,
	"learning_rate": 9.723669309173273e-06,
	"loss": 1.4373,
	"step": 750
	},
	{
	"epoch": 2.4847512038523276,
	"grad_norm": 50.231746673583984,
	"learning_rate": 9.695356738391848e-06,
	"loss": 1.5603,
	"step": 775
	},
	{
	"epoch": 2.5650080256821832,
	"grad_norm": 49.86467742919922,
	"learning_rate": 9.66704416761042e-06,
	"loss": 1.6283,
	"step": 800
	},
	{
	"epoch": 2.5650080256821832,
	"eval_loss": 0.6922819018363953,
	"eval_runtime": 248.6409,
	"eval_samples_per_second": 1.729,
	"eval_steps_per_second": 0.865,
	"eval_wer": 0.5177174413253567,
	"step": 800
	},
	{
	"epoch": 2.6452648475120384,
	"grad_norm": 43.58526611328125,
	"learning_rate": 9.638731596828992e-06,
	"loss": 1.717,
	"step": 825
	},
	{
	"epoch": 2.725521669341894,
	"grad_norm": 34.9052619934082,
	"learning_rate": 9.610419026047567e-06,
	"loss": 1.6415,
	"step": 850
	},
	{
	"epoch": 2.8057784911717496,
	"grad_norm": 45.21881103515625,
	"learning_rate": 9.582106455266139e-06,
	"loss": 1.4356,
	"step": 875
	},
	{
	"epoch": 2.886035313001605,
	"grad_norm": 57.4312629699707,
	"learning_rate": 9.553793884484713e-06,
	"loss": 1.536,
	"step": 900
	},
	{
	"epoch": 2.966292134831461,
	"grad_norm": 42.61119079589844,
	"learning_rate": 9.525481313703286e-06,
	"loss": 1.5945,
	"step": 925
	},
	{
	"epoch": 3.044943820224719,
	"grad_norm": 22.571409225463867,
	"learning_rate": 9.497168742921858e-06,
	"loss": 0.9745,
	"step": 950
	},
	{
	"epoch": 3.125200642054575,
	"grad_norm": 26.83526611328125,
	"learning_rate": 9.468856172140432e-06,
	"loss": 0.7011,
	"step": 975
	},
	{
	"epoch": 3.20545746388443,
	"grad_norm": 32.36039733886719,
	"learning_rate": 9.440543601359004e-06,
	"loss": 0.7783,
	"step": 1000
	},
	{
	"epoch": 3.20545746388443,
	"eval_loss": 0.7241026163101196,
	"eval_runtime": 248.287,
	"eval_samples_per_second": 1.732,
	"eval_steps_per_second": 0.866,
	"eval_wer": 0.5131155085135757,
	"step": 1000
	},
	{
	"epoch": 3.2857142857142856,
	"grad_norm": 42.650146484375,
	"learning_rate": 9.412231030577577e-06,
	"loss": 0.6489,
	"step": 1025
	},
	{
	"epoch": 3.365971107544141,
	"grad_norm": 29.87259864807129,
	"learning_rate": 9.383918459796151e-06,
	"loss": 0.9697,
	"step": 1050
	},
	{
	"epoch": 3.446227929373997,
	"grad_norm": 32.14972686767578,
	"learning_rate": 9.355605889014723e-06,
	"loss": 0.7128,
	"step": 1075
	},
	{
	"epoch": 3.5264847512038524,
	"grad_norm": 40.77785110473633,
	"learning_rate": 9.327293318233296e-06,
	"loss": 0.8084,
	"step": 1100
	},
	{
	"epoch": 3.606741573033708,
	"grad_norm": 39.83961486816406,
	"learning_rate": 9.298980747451868e-06,
	"loss": 0.8268,
	"step": 1125
	},
	{
	"epoch": 3.686998394863563,
	"grad_norm": 34.79006576538086,
	"learning_rate": 9.270668176670442e-06,
	"loss": 0.6832,
	"step": 1150
	},
	{
	"epoch": 3.767255216693419,
	"grad_norm": 24.103979110717773,
	"learning_rate": 9.242355605889015e-06,
	"loss": 0.7754,
	"step": 1175
	},
	{
	"epoch": 3.8475120385232744,
	"grad_norm": 42.03251647949219,
	"learning_rate": 9.214043035107589e-06,
	"loss": 0.8723,
	"step": 1200
	},
	{
	"epoch": 3.8475120385232744,
	"eval_loss": 0.720678985118866,
	"eval_runtime": 245.5357,
	"eval_samples_per_second": 1.751,
	"eval_steps_per_second": 0.876,
	"eval_wer": 0.5140358950759318,
	"step": 1200
	},
	{
	"epoch": 3.92776886035313,
	"grad_norm": 27.95992088317871,
	"learning_rate": 9.185730464326161e-06,
	"loss": 0.8405,
	"step": 1225
	},
	{
	"epoch": 4.006420545746389,
	"grad_norm": 15.731476783752441,
	"learning_rate": 9.157417893544734e-06,
	"loss": 0.6524,
	"step": 1250
	},
	{
	"epoch": 4.086677367576244,
	"grad_norm": 15.728172302246094,
	"learning_rate": 9.129105322763308e-06,
	"loss": 0.3319,
	"step": 1275
	},
	{
	"epoch": 4.166934189406099,
	"grad_norm": 30.88848876953125,
	"learning_rate": 9.10079275198188e-06,
	"loss": 0.4029,
	"step": 1300
	},
	{
	"epoch": 4.247191011235955,
	"grad_norm": 20.073183059692383,
	"learning_rate": 9.072480181200455e-06,
	"loss": 0.4016,
	"step": 1325
	},
	{
	"epoch": 4.32744783306581,
	"grad_norm": 21.41939353942871,
	"learning_rate": 9.044167610419027e-06,
	"loss": 0.3585,
	"step": 1350
	},
	{
	"epoch": 4.407704654895666,
	"grad_norm": 25.81800651550293,
	"learning_rate": 9.0158550396376e-06,
	"loss": 0.3965,
	"step": 1375
	},
	{
	"epoch": 4.487961476725522,
	"grad_norm": 21.839908599853516,
	"learning_rate": 8.987542468856174e-06,
	"loss": 0.3633,
	"step": 1400
	},
	{
	"epoch": 4.487961476725522,
	"eval_loss": 0.7589691281318665,
	"eval_runtime": 247.0866,
	"eval_samples_per_second": 1.74,
	"eval_steps_per_second": 0.87,
	"eval_wer": 0.49608835710998617,
	"step": 1400
	},
	{
	"epoch": 4.487961476725522,
	"step": 1400,
	"total_flos": 1.14103628660736e+19,
	"train_loss": 2.465633350099836,
	"train_runtime": 4460.9347,
	"train_samples_per_second": 16.752,
	"train_steps_per_second": 2.091
	}
	],
	"logging_steps": 25,
	"max_steps": 9330,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 30,
	"save_steps": 200,
	"stateful_callbacks": {
	"EarlyStoppingCallback": {
	"args": {
	"early_stopping_patience": 3,
	"early_stopping_threshold": 0.0
	},
	"attributes": {
	"early_stopping_patience_counter": 3
	}
	},
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 1.14103628660736e+19,
	"train_batch_size": 2,
	"trial_name": null,
	"trial_params": null
	}