|
{ |
|
"best_metric": 0.25722527503967285, |
|
"best_model_checkpoint": "whisper-medium-v3-ff4/checkpoint-20000", |
|
"epoch": 3.3243830712185143, |
|
"eval_steps": 5000, |
|
"global_step": 26000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.06393044367727913, |
|
"grad_norm": 8.506481170654297, |
|
"learning_rate": 1.965626204238921e-05, |
|
"loss": 0.9546, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.12786088735455825, |
|
"grad_norm": 9.10377025604248, |
|
"learning_rate": 1.9270905587668595e-05, |
|
"loss": 0.5883, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.19179133103183735, |
|
"grad_norm": 6.146899223327637, |
|
"learning_rate": 1.888554913294798e-05, |
|
"loss": 0.5081, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.2557217747091165, |
|
"grad_norm": 4.1534929275512695, |
|
"learning_rate": 1.8500192678227363e-05, |
|
"loss": 0.4631, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.3196522183863956, |
|
"grad_norm": 5.8231587409973145, |
|
"learning_rate": 1.8114836223506743e-05, |
|
"loss": 0.4313, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.3835826620636747, |
|
"grad_norm": 7.882592678070068, |
|
"learning_rate": 1.772947976878613e-05, |
|
"loss": 0.417, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.4475131057409538, |
|
"grad_norm": 5.645440101623535, |
|
"learning_rate": 1.7344123314065514e-05, |
|
"loss": 0.4162, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.511443549418233, |
|
"grad_norm": 4.277677536010742, |
|
"learning_rate": 1.6958766859344894e-05, |
|
"loss": 0.378, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.5753739930955121, |
|
"grad_norm": 4.594604969024658, |
|
"learning_rate": 1.6573410404624278e-05, |
|
"loss": 0.3653, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.6393044367727913, |
|
"grad_norm": 6.918728351593018, |
|
"learning_rate": 1.6188053949903662e-05, |
|
"loss": 0.3543, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.6393044367727913, |
|
"eval_loss": 0.3379935026168823, |
|
"eval_runtime": 1120.3014, |
|
"eval_samples_per_second": 3.096, |
|
"eval_steps_per_second": 0.387, |
|
"eval_wer": 0.2361289200878013, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.7032348804500703, |
|
"grad_norm": 5.27606725692749, |
|
"learning_rate": 1.5802697495183046e-05, |
|
"loss": 0.34, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.7671653241273494, |
|
"grad_norm": 8.176605224609375, |
|
"learning_rate": 1.541734104046243e-05, |
|
"loss": 0.3273, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.8310957678046286, |
|
"grad_norm": 4.268400192260742, |
|
"learning_rate": 1.5032755298651254e-05, |
|
"loss": 0.3305, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.8950262114819076, |
|
"grad_norm": 7.465890884399414, |
|
"learning_rate": 1.4647398843930638e-05, |
|
"loss": 0.318, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.9589566551591868, |
|
"grad_norm": 7.091923236846924, |
|
"learning_rate": 1.426204238921002e-05, |
|
"loss": 0.2965, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.022887098836466, |
|
"grad_norm": 5.541090965270996, |
|
"learning_rate": 1.3878227360308287e-05, |
|
"loss": 0.2865, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.086817542513745, |
|
"grad_norm": 5.313994407653809, |
|
"learning_rate": 1.349287090558767e-05, |
|
"loss": 0.2109, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.1507479861910241, |
|
"grad_norm": 4.2993621826171875, |
|
"learning_rate": 1.3107514450867053e-05, |
|
"loss": 0.2191, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.2146784298683033, |
|
"grad_norm": 5.328224182128906, |
|
"learning_rate": 1.2722157996146436e-05, |
|
"loss": 0.2196, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.2786088735455823, |
|
"grad_norm": 5.929138660430908, |
|
"learning_rate": 1.2336801541425821e-05, |
|
"loss": 0.2259, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.2786088735455823, |
|
"eval_loss": 0.2893017828464508, |
|
"eval_runtime": 1112.5698, |
|
"eval_samples_per_second": 3.117, |
|
"eval_steps_per_second": 0.39, |
|
"eval_wer": 0.2021819920457262, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.3425393172228615, |
|
"grad_norm": 4.919782638549805, |
|
"learning_rate": 1.1951445086705203e-05, |
|
"loss": 0.2224, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.4064697609001406, |
|
"grad_norm": 5.959251403808594, |
|
"learning_rate": 1.1566088631984587e-05, |
|
"loss": 0.2125, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.4704002045774198, |
|
"grad_norm": 4.974765777587891, |
|
"learning_rate": 1.118073217726397e-05, |
|
"loss": 0.2074, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 1.534330648254699, |
|
"grad_norm": 5.756904125213623, |
|
"learning_rate": 1.0795375722543353e-05, |
|
"loss": 0.2089, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.598261091931978, |
|
"grad_norm": 3.7231180667877197, |
|
"learning_rate": 1.0410019267822737e-05, |
|
"loss": 0.215, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 1.6621915356092571, |
|
"grad_norm": 4.505956172943115, |
|
"learning_rate": 1.0024662813102119e-05, |
|
"loss": 0.204, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.7261219792865363, |
|
"grad_norm": 2.797830820083618, |
|
"learning_rate": 9.639306358381504e-06, |
|
"loss": 0.1983, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.7900524229638153, |
|
"grad_norm": 6.62880802154541, |
|
"learning_rate": 9.254720616570328e-06, |
|
"loss": 0.2004, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.8539828666410945, |
|
"grad_norm": 4.8896284103393555, |
|
"learning_rate": 8.869364161849712e-06, |
|
"loss": 0.2023, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 1.9179133103183736, |
|
"grad_norm": 4.615833759307861, |
|
"learning_rate": 8.484007707129094e-06, |
|
"loss": 0.1938, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.9179133103183736, |
|
"eval_loss": 0.2596728801727295, |
|
"eval_runtime": 1104.7223, |
|
"eval_samples_per_second": 3.139, |
|
"eval_steps_per_second": 0.393, |
|
"eval_wer": 0.1790146263012627, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.9818437539956526, |
|
"grad_norm": 2.99739408493042, |
|
"learning_rate": 8.098651252408478e-06, |
|
"loss": 0.1871, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 2.045774197672932, |
|
"grad_norm": 1.4894932508468628, |
|
"learning_rate": 7.713294797687862e-06, |
|
"loss": 0.1419, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 2.109704641350211, |
|
"grad_norm": 3.1801295280456543, |
|
"learning_rate": 7.327938342967246e-06, |
|
"loss": 0.1193, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 2.17363508502749, |
|
"grad_norm": 3.464812994003296, |
|
"learning_rate": 6.942581888246629e-06, |
|
"loss": 0.1196, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 2.2375655287047693, |
|
"grad_norm": 2.4718642234802246, |
|
"learning_rate": 6.5572254335260125e-06, |
|
"loss": 0.1173, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 2.3014959723820483, |
|
"grad_norm": 4.073084354400635, |
|
"learning_rate": 6.172639691714837e-06, |
|
"loss": 0.1179, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 2.3654264160593277, |
|
"grad_norm": 2.377967119216919, |
|
"learning_rate": 5.78728323699422e-06, |
|
"loss": 0.1158, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 2.4293568597366066, |
|
"grad_norm": 6.478204250335693, |
|
"learning_rate": 5.401926782273603e-06, |
|
"loss": 0.1229, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 2.4932873034138856, |
|
"grad_norm": 1.8411247730255127, |
|
"learning_rate": 5.016570327552986e-06, |
|
"loss": 0.1144, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 2.5572177470911646, |
|
"grad_norm": 3.3542532920837402, |
|
"learning_rate": 4.631984585741811e-06, |
|
"loss": 0.1119, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 2.5572177470911646, |
|
"eval_loss": 0.25722527503967285, |
|
"eval_runtime": 1107.3303, |
|
"eval_samples_per_second": 3.132, |
|
"eval_steps_per_second": 0.392, |
|
"eval_wer": 0.16938691239432335, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 2.621148190768444, |
|
"grad_norm": 4.66868257522583, |
|
"learning_rate": 4.246628131021195e-06, |
|
"loss": 0.1126, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 2.685078634445723, |
|
"grad_norm": 5.47554874420166, |
|
"learning_rate": 3.861271676300578e-06, |
|
"loss": 0.1171, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 2.7490090781230023, |
|
"grad_norm": 3.905674695968628, |
|
"learning_rate": 3.475915221579962e-06, |
|
"loss": 0.1158, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 2.8129395218002813, |
|
"grad_norm": 3.0056424140930176, |
|
"learning_rate": 3.0913294797687864e-06, |
|
"loss": 0.1133, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 2.8768699654775602, |
|
"grad_norm": 1.467344880104065, |
|
"learning_rate": 2.70597302504817e-06, |
|
"loss": 0.1085, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 2.9408004091548396, |
|
"grad_norm": 1.9786620140075684, |
|
"learning_rate": 2.3213872832369944e-06, |
|
"loss": 0.1084, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 3.0047308528321186, |
|
"grad_norm": 3.245360851287842, |
|
"learning_rate": 1.9360308285163777e-06, |
|
"loss": 0.102, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 3.0686612965093976, |
|
"grad_norm": 0.7349840998649597, |
|
"learning_rate": 1.5506743737957611e-06, |
|
"loss": 0.063, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 3.132591740186677, |
|
"grad_norm": 1.5207897424697876, |
|
"learning_rate": 1.1653179190751445e-06, |
|
"loss": 0.0616, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 3.196522183863956, |
|
"grad_norm": 3.616459846496582, |
|
"learning_rate": 7.799614643545279e-07, |
|
"loss": 0.0632, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 3.196522183863956, |
|
"eval_loss": 0.2614915072917938, |
|
"eval_runtime": 1121.1739, |
|
"eval_samples_per_second": 3.093, |
|
"eval_steps_per_second": 0.387, |
|
"eval_wer": 0.16601829917631974, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 3.2604526275412353, |
|
"grad_norm": 1.1991757154464722, |
|
"learning_rate": 3.946050096339114e-07, |
|
"loss": 0.0598, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 3.3243830712185143, |
|
"grad_norm": 3.6829333305358887, |
|
"learning_rate": 9.248554913294799e-09, |
|
"loss": 0.0593, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 3.3243830712185143, |
|
"step": 26000, |
|
"total_flos": 6.002316599795712e+19, |
|
"train_loss": 0.2320793916262113, |
|
"train_runtime": 50624.8997, |
|
"train_samples_per_second": 4.109, |
|
"train_steps_per_second": 0.514 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 26000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 5000, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 1 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 6.002316599795712e+19, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|