|
{ |
|
"best_metric": 0.20377855002880096, |
|
"best_model_checkpoint": "/scratch/skscla001/results/mms-zeroshot-300m-bembaspeech-model/checkpoint-4600", |
|
"epoch": 3.6542515811665495, |
|
"eval_steps": 200, |
|
"global_step": 5200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.14054813773717498, |
|
"eval_loss": 2.2802751064300537, |
|
"eval_runtime": 82.1473, |
|
"eval_samples_per_second": 18.162, |
|
"eval_steps_per_second": 2.276, |
|
"eval_wer": 1.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.28109627547434995, |
|
"eval_loss": 0.2944619655609131, |
|
"eval_runtime": 82.5798, |
|
"eval_samples_per_second": 18.067, |
|
"eval_steps_per_second": 2.264, |
|
"eval_wer": 0.5100787265334371, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.35137034434293746, |
|
"grad_norm": 0.8652539849281311, |
|
"learning_rate": 0.00029720356891289033, |
|
"loss": 2.741, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.42164441321152496, |
|
"eval_loss": 0.2641391158103943, |
|
"eval_runtime": 81.2987, |
|
"eval_samples_per_second": 18.352, |
|
"eval_steps_per_second": 2.3, |
|
"eval_wer": 0.4834328229085561, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.5621925509486999, |
|
"eval_loss": 0.26114964485168457, |
|
"eval_runtime": 81.2645, |
|
"eval_samples_per_second": 18.36, |
|
"eval_steps_per_second": 2.301, |
|
"eval_wer": 0.47434899212734666, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.7027406886858749, |
|
"grad_norm": 1.3384147882461548, |
|
"learning_rate": 0.00029368161540267664, |
|
"loss": 0.5962, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.7027406886858749, |
|
"eval_loss": 0.2608044445514679, |
|
"eval_runtime": 81.9453, |
|
"eval_samples_per_second": 18.207, |
|
"eval_steps_per_second": 2.282, |
|
"eval_wer": 0.4830867722121291, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.8432888264230499, |
|
"eval_loss": 0.24766217172145844, |
|
"eval_runtime": 81.5485, |
|
"eval_samples_per_second": 18.296, |
|
"eval_steps_per_second": 2.293, |
|
"eval_wer": 0.4562678432390345, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.9838369641602249, |
|
"eval_loss": 0.24071051180362701, |
|
"eval_runtime": 81.0412, |
|
"eval_samples_per_second": 18.41, |
|
"eval_steps_per_second": 2.307, |
|
"eval_wer": 0.4567004066095683, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.0541110330288124, |
|
"grad_norm": 0.5781381130218506, |
|
"learning_rate": 0.000290159661892463, |
|
"loss": 0.536, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.1243851018973998, |
|
"eval_loss": 0.23427943885326385, |
|
"eval_runtime": 82.3276, |
|
"eval_samples_per_second": 18.123, |
|
"eval_steps_per_second": 2.271, |
|
"eval_wer": 0.44121463794445887, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.264933239634575, |
|
"eval_loss": 0.23070427775382996, |
|
"eval_runtime": 82.28, |
|
"eval_samples_per_second": 18.133, |
|
"eval_steps_per_second": 2.273, |
|
"eval_wer": 0.44225279003373996, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.4054813773717498, |
|
"grad_norm": 0.5301165580749512, |
|
"learning_rate": 0.0002866377083822493, |
|
"loss": 0.5221, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.4054813773717498, |
|
"eval_loss": 0.22523300349712372, |
|
"eval_runtime": 81.337, |
|
"eval_samples_per_second": 18.343, |
|
"eval_steps_per_second": 2.299, |
|
"eval_wer": 0.4348127000605589, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.5460295151089247, |
|
"eval_loss": 0.22277304530143738, |
|
"eval_runtime": 81.5619, |
|
"eval_samples_per_second": 18.293, |
|
"eval_steps_per_second": 2.293, |
|
"eval_wer": 0.4325633705337832, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.6865776528460998, |
|
"eval_loss": 0.21623817086219788, |
|
"eval_runtime": 81.5646, |
|
"eval_samples_per_second": 18.292, |
|
"eval_steps_per_second": 2.293, |
|
"eval_wer": 0.4252963059088156, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.7568517217146873, |
|
"grad_norm": 0.5294317007064819, |
|
"learning_rate": 0.00028311575487203566, |
|
"loss": 0.5027, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.8271257905832747, |
|
"eval_loss": 0.22000491619110107, |
|
"eval_runtime": 81.31, |
|
"eval_samples_per_second": 18.35, |
|
"eval_steps_per_second": 2.3, |
|
"eval_wer": 0.4188078553508089, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.9676739283204498, |
|
"eval_loss": 0.2131248265504837, |
|
"eval_runtime": 81.3644, |
|
"eval_samples_per_second": 18.337, |
|
"eval_steps_per_second": 2.298, |
|
"eval_wer": 0.4142226836231508, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.1082220660576247, |
|
"grad_norm": 0.3984699547290802, |
|
"learning_rate": 0.000279593801361822, |
|
"loss": 0.4818, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.1082220660576247, |
|
"eval_loss": 0.22807464003562927, |
|
"eval_runtime": 81.2983, |
|
"eval_samples_per_second": 18.352, |
|
"eval_steps_per_second": 2.3, |
|
"eval_wer": 0.42806471148023184, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.2487702037947996, |
|
"eval_loss": 0.21783553063869476, |
|
"eval_runtime": 81.2747, |
|
"eval_samples_per_second": 18.357, |
|
"eval_steps_per_second": 2.301, |
|
"eval_wer": 0.41465524699368456, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 2.3893183415319745, |
|
"eval_loss": 0.2122805118560791, |
|
"eval_runtime": 81.1791, |
|
"eval_samples_per_second": 18.379, |
|
"eval_steps_per_second": 2.304, |
|
"eval_wer": 0.41552037373475215, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 2.459592410400562, |
|
"grad_norm": 0.6205651760101318, |
|
"learning_rate": 0.00027607184785160833, |
|
"loss": 0.4619, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.52986647926915, |
|
"eval_loss": 0.21417662501335144, |
|
"eval_runtime": 81.5176, |
|
"eval_samples_per_second": 18.303, |
|
"eval_steps_per_second": 2.294, |
|
"eval_wer": 0.40790725841335757, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 2.6704146170063248, |
|
"eval_loss": 0.21559232473373413, |
|
"eval_runtime": 81.1016, |
|
"eval_samples_per_second": 18.397, |
|
"eval_steps_per_second": 2.306, |
|
"eval_wer": 0.40107275715892376, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 2.8109627547434997, |
|
"grad_norm": 0.5996519923210144, |
|
"learning_rate": 0.0002725498943413947, |
|
"loss": 0.464, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.8109627547434997, |
|
"eval_loss": 0.2071918100118637, |
|
"eval_runtime": 82.6744, |
|
"eval_samples_per_second": 18.047, |
|
"eval_steps_per_second": 2.262, |
|
"eval_wer": 0.40089973181071026, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.9515108924806746, |
|
"eval_loss": 0.21284320950508118, |
|
"eval_runtime": 81.7324, |
|
"eval_samples_per_second": 18.255, |
|
"eval_steps_per_second": 2.288, |
|
"eval_wer": 0.401332295181244, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 3.0920590302178494, |
|
"eval_loss": 0.20559127628803253, |
|
"eval_runtime": 81.3555, |
|
"eval_samples_per_second": 18.339, |
|
"eval_steps_per_second": 2.299, |
|
"eval_wer": 0.3982178389134008, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 3.162333099086437, |
|
"grad_norm": 0.6319628357887268, |
|
"learning_rate": 0.000269027940831181, |
|
"loss": 0.4464, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 3.232607167955025, |
|
"eval_loss": 0.20377855002880096, |
|
"eval_runtime": 82.1978, |
|
"eval_samples_per_second": 18.151, |
|
"eval_steps_per_second": 2.275, |
|
"eval_wer": 0.40072670646249675, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 3.3731553056921997, |
|
"eval_loss": 0.20886844396591187, |
|
"eval_runtime": 81.607, |
|
"eval_samples_per_second": 18.283, |
|
"eval_steps_per_second": 2.291, |
|
"eval_wer": 0.39873691495804137, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 3.5137034434293746, |
|
"grad_norm": 0.6382594704627991, |
|
"learning_rate": 0.0002655059873209673, |
|
"loss": 0.4418, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 3.5137034434293746, |
|
"eval_loss": 0.20426709949970245, |
|
"eval_runtime": 81.6377, |
|
"eval_samples_per_second": 18.276, |
|
"eval_steps_per_second": 2.291, |
|
"eval_wer": 0.40089973181071026, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 3.6542515811665495, |
|
"eval_loss": 0.2049737125635147, |
|
"eval_runtime": 81.6534, |
|
"eval_samples_per_second": 18.272, |
|
"eval_steps_per_second": 2.29, |
|
"eval_wer": 0.3964010727571589, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 3.6542515811665495, |
|
"step": 5200, |
|
"total_flos": 8.128487028902814e+18, |
|
"train_loss": 0.7083046766427847, |
|
"train_runtime": 5849.6566, |
|
"train_samples_per_second": 58.347, |
|
"train_steps_per_second": 7.298 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 42690, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 30, |
|
"save_steps": 200, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 3 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 8.128487028902814e+18, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|