|
{ |
|
"best_global_step": 800, |
|
"best_metric": 0.6969724297523499, |
|
"best_model_checkpoint": "/scratch/skscla001/experiments/datasets/results/whisper-medium-bigcgen-baseline-42/checkpoint-800", |
|
"epoch": 4.878718535469107, |
|
"eval_steps": 200, |
|
"global_step": 1600, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.07627765064836003, |
|
"grad_norm": 32.69679260253906, |
|
"learning_rate": 4.2000000000000006e-07, |
|
"loss": 3.3916, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.15255530129672007, |
|
"grad_norm": 27.17233657836914, |
|
"learning_rate": 9.200000000000001e-07, |
|
"loss": 2.7163, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.2288329519450801, |
|
"grad_norm": 22.44352912902832, |
|
"learning_rate": 1.42e-06, |
|
"loss": 2.1063, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.30511060259344014, |
|
"grad_norm": 17.82234764099121, |
|
"learning_rate": 1.9200000000000003e-06, |
|
"loss": 1.5258, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.38138825324180015, |
|
"grad_norm": 16.569429397583008, |
|
"learning_rate": 2.42e-06, |
|
"loss": 1.2767, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.4576659038901602, |
|
"grad_norm": 18.720935821533203, |
|
"learning_rate": 2.9e-06, |
|
"loss": 1.1612, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.5339435545385202, |
|
"grad_norm": 18.467731475830078, |
|
"learning_rate": 3.4000000000000005e-06, |
|
"loss": 1.0317, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.6102212051868803, |
|
"grad_norm": 9.183775901794434, |
|
"learning_rate": 3.900000000000001e-06, |
|
"loss": 1.0745, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.6102212051868803, |
|
"eval_loss": 0.9366264939308167, |
|
"eval_runtime": 278.3426, |
|
"eval_samples_per_second": 1.638, |
|
"eval_steps_per_second": 0.819, |
|
"eval_wer": 0.6486166942539607, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.6864988558352403, |
|
"grad_norm": 20.13412857055664, |
|
"learning_rate": 4.4e-06, |
|
"loss": 1.0986, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.7627765064836003, |
|
"grad_norm": 17.606948852539062, |
|
"learning_rate": 4.9000000000000005e-06, |
|
"loss": 0.8614, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.8390541571319603, |
|
"grad_norm": 13.448685646057129, |
|
"learning_rate": 5.400000000000001e-06, |
|
"loss": 0.9057, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.9153318077803204, |
|
"grad_norm": 18.768592834472656, |
|
"learning_rate": 5.9e-06, |
|
"loss": 0.9076, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.9916094584286804, |
|
"grad_norm": 12.053692817687988, |
|
"learning_rate": 6.4000000000000006e-06, |
|
"loss": 0.7214, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 1.0671243325705568, |
|
"grad_norm": 13.019783020019531, |
|
"learning_rate": 6.9e-06, |
|
"loss": 0.7012, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.1434019832189168, |
|
"grad_norm": 16.344667434692383, |
|
"learning_rate": 7.4e-06, |
|
"loss": 0.7199, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 1.2196796338672768, |
|
"grad_norm": 13.561891555786133, |
|
"learning_rate": 7.9e-06, |
|
"loss": 0.6532, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.2196796338672768, |
|
"eval_loss": 0.7690292000770569, |
|
"eval_runtime": 283.1267, |
|
"eval_samples_per_second": 1.611, |
|
"eval_steps_per_second": 0.805, |
|
"eval_wer": 0.5467013478363679, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.2959572845156369, |
|
"grad_norm": 15.391003608703613, |
|
"learning_rate": 8.400000000000001e-06, |
|
"loss": 0.616, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 1.372234935163997, |
|
"grad_norm": 14.098180770874023, |
|
"learning_rate": 8.900000000000001e-06, |
|
"loss": 0.6977, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.4485125858123569, |
|
"grad_norm": 12.179559707641602, |
|
"learning_rate": 9.4e-06, |
|
"loss": 0.6716, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 1.524790236460717, |
|
"grad_norm": 18.046316146850586, |
|
"learning_rate": 9.9e-06, |
|
"loss": 0.6829, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.6010678871090769, |
|
"grad_norm": 12.71342658996582, |
|
"learning_rate": 9.955555555555556e-06, |
|
"loss": 0.7193, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 1.677345537757437, |
|
"grad_norm": 11.328106880187988, |
|
"learning_rate": 9.9e-06, |
|
"loss": 0.6129, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.7536231884057971, |
|
"grad_norm": 11.869524002075195, |
|
"learning_rate": 9.844444444444446e-06, |
|
"loss": 0.6352, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 1.8299008390541571, |
|
"grad_norm": 12.608686447143555, |
|
"learning_rate": 9.78888888888889e-06, |
|
"loss": 0.6347, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.8299008390541571, |
|
"eval_loss": 0.7060141563415527, |
|
"eval_runtime": 285.6618, |
|
"eval_samples_per_second": 1.596, |
|
"eval_steps_per_second": 0.798, |
|
"eval_wer": 0.5128872073776306, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.9061784897025171, |
|
"grad_norm": 14.89091968536377, |
|
"learning_rate": 9.733333333333334e-06, |
|
"loss": 0.6794, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 1.9824561403508771, |
|
"grad_norm": 17.0143985748291, |
|
"learning_rate": 9.677777777777778e-06, |
|
"loss": 0.6517, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 2.0579710144927534, |
|
"grad_norm": 11.61669921875, |
|
"learning_rate": 9.622222222222222e-06, |
|
"loss": 0.4539, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 2.1342486651411137, |
|
"grad_norm": 6.573554515838623, |
|
"learning_rate": 9.566666666666668e-06, |
|
"loss": 0.3674, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.2105263157894735, |
|
"grad_norm": 11.21182918548584, |
|
"learning_rate": 9.511111111111112e-06, |
|
"loss": 0.3828, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 2.2868039664378337, |
|
"grad_norm": 14.237340927124023, |
|
"learning_rate": 9.455555555555557e-06, |
|
"loss": 0.3999, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 2.363081617086194, |
|
"grad_norm": 9.172303199768066, |
|
"learning_rate": 9.4e-06, |
|
"loss": 0.4579, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 2.4393592677345537, |
|
"grad_norm": 10.187737464904785, |
|
"learning_rate": 9.344444444444446e-06, |
|
"loss": 0.4066, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.4393592677345537, |
|
"eval_loss": 0.6969724297523499, |
|
"eval_runtime": 289.906, |
|
"eval_samples_per_second": 1.573, |
|
"eval_steps_per_second": 0.786, |
|
"eval_wer": 0.526129108536297, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.515636918382914, |
|
"grad_norm": 11.924391746520996, |
|
"learning_rate": 9.28888888888889e-06, |
|
"loss": 0.382, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 2.5919145690312737, |
|
"grad_norm": 10.492657661437988, |
|
"learning_rate": 9.233333333333334e-06, |
|
"loss": 0.3796, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 2.668192219679634, |
|
"grad_norm": 9.099035263061523, |
|
"learning_rate": 9.17777777777778e-06, |
|
"loss": 0.5154, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 2.744469870327994, |
|
"grad_norm": 11.572857856750488, |
|
"learning_rate": 9.122222222222223e-06, |
|
"loss": 0.4487, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.820747520976354, |
|
"grad_norm": 13.59435749053955, |
|
"learning_rate": 9.066666666666667e-06, |
|
"loss": 0.4478, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 2.8970251716247137, |
|
"grad_norm": 11.392141342163086, |
|
"learning_rate": 9.011111111111111e-06, |
|
"loss": 0.352, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 2.973302822273074, |
|
"grad_norm": 8.065723419189453, |
|
"learning_rate": 8.955555555555555e-06, |
|
"loss": 0.3986, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 3.0488176964149503, |
|
"grad_norm": 7.743978977203369, |
|
"learning_rate": 8.900000000000001e-06, |
|
"loss": 0.2542, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.0488176964149503, |
|
"eval_loss": 0.7139758467674255, |
|
"eval_runtime": 280.6617, |
|
"eval_samples_per_second": 1.625, |
|
"eval_steps_per_second": 0.812, |
|
"eval_wer": 0.5034287065500118, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.1250953470633105, |
|
"grad_norm": 6.686219215393066, |
|
"learning_rate": 8.844444444444445e-06, |
|
"loss": 0.1859, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 3.2013729977116703, |
|
"grad_norm": 8.531457901000977, |
|
"learning_rate": 8.788888888888891e-06, |
|
"loss": 0.2228, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 3.2776506483600305, |
|
"grad_norm": 8.079030990600586, |
|
"learning_rate": 8.733333333333333e-06, |
|
"loss": 0.233, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 3.3539282990083907, |
|
"grad_norm": 7.210234642028809, |
|
"learning_rate": 8.677777777777779e-06, |
|
"loss": 0.278, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 3.4302059496567505, |
|
"grad_norm": 9.321666717529297, |
|
"learning_rate": 8.622222222222223e-06, |
|
"loss": 0.2022, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 3.5064836003051107, |
|
"grad_norm": 8.422845840454102, |
|
"learning_rate": 8.566666666666667e-06, |
|
"loss": 0.2462, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 3.5827612509534705, |
|
"grad_norm": 7.176828384399414, |
|
"learning_rate": 8.511111111111113e-06, |
|
"loss": 0.2568, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 3.6590389016018308, |
|
"grad_norm": 7.077821731567383, |
|
"learning_rate": 8.455555555555555e-06, |
|
"loss": 0.252, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.6590389016018308, |
|
"eval_loss": 0.7221043705940247, |
|
"eval_runtime": 282.4738, |
|
"eval_samples_per_second": 1.614, |
|
"eval_steps_per_second": 0.807, |
|
"eval_wer": 0.4833293922913218, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.7353165522501905, |
|
"grad_norm": 8.898653984069824, |
|
"learning_rate": 8.400000000000001e-06, |
|
"loss": 0.2351, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 3.8115942028985508, |
|
"grad_norm": 8.900397300720215, |
|
"learning_rate": 8.344444444444445e-06, |
|
"loss": 0.2656, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 3.887871853546911, |
|
"grad_norm": 6.858781814575195, |
|
"learning_rate": 8.288888888888889e-06, |
|
"loss": 0.2253, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 3.964149504195271, |
|
"grad_norm": 5.616495609283447, |
|
"learning_rate": 8.233333333333335e-06, |
|
"loss": 0.2518, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 4.0396643783371475, |
|
"grad_norm": 2.58622407913208, |
|
"learning_rate": 8.177777777777779e-06, |
|
"loss": 0.1329, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 4.115942028985507, |
|
"grad_norm": 4.7402544021606445, |
|
"learning_rate": 8.122222222222223e-06, |
|
"loss": 0.0939, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 4.192219679633867, |
|
"grad_norm": 7.061638355255127, |
|
"learning_rate": 8.066666666666667e-06, |
|
"loss": 0.1122, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 4.268497330282227, |
|
"grad_norm": 3.740872383117676, |
|
"learning_rate": 8.011111111111113e-06, |
|
"loss": 0.137, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 4.268497330282227, |
|
"eval_loss": 0.757293701171875, |
|
"eval_runtime": 278.9207, |
|
"eval_samples_per_second": 1.635, |
|
"eval_steps_per_second": 0.817, |
|
"eval_wer": 0.4878221801844408, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 4.344774980930588, |
|
"grad_norm": 4.499791622161865, |
|
"learning_rate": 7.955555555555557e-06, |
|
"loss": 0.0954, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 4.421052631578947, |
|
"grad_norm": 4.751846790313721, |
|
"learning_rate": 7.9e-06, |
|
"loss": 0.1071, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 4.497330282227307, |
|
"grad_norm": 9.211798667907715, |
|
"learning_rate": 7.844444444444446e-06, |
|
"loss": 0.102, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 4.573607932875667, |
|
"grad_norm": 6.26869535446167, |
|
"learning_rate": 7.788888888888889e-06, |
|
"loss": 0.0968, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 4.649885583524028, |
|
"grad_norm": 6.295158386230469, |
|
"learning_rate": 7.733333333333334e-06, |
|
"loss": 0.1196, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 4.726163234172388, |
|
"grad_norm": 9.435331344604492, |
|
"learning_rate": 7.677777777777778e-06, |
|
"loss": 0.1279, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 4.802440884820747, |
|
"grad_norm": 3.417738914489746, |
|
"learning_rate": 7.622222222222223e-06, |
|
"loss": 0.1082, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 4.878718535469107, |
|
"grad_norm": 7.170241832733154, |
|
"learning_rate": 7.566666666666667e-06, |
|
"loss": 0.114, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 4.878718535469107, |
|
"eval_loss": 0.793424129486084, |
|
"eval_runtime": 277.9238, |
|
"eval_samples_per_second": 1.641, |
|
"eval_steps_per_second": 0.82, |
|
"eval_wer": 0.4812012296051076, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 4.878718535469107, |
|
"step": 1600, |
|
"total_flos": 1.305149555736576e+19, |
|
"train_loss": 0.5828571186959743, |
|
"train_runtime": 5664.3172, |
|
"train_samples_per_second": 7.062, |
|
"train_steps_per_second": 0.883 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 5000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 16, |
|
"save_steps": 200, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 4, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 4 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.305149555736576e+19, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|