{ "best_metric": 0.20377855002880096, "best_model_checkpoint": "/scratch/skscla001/results/mms-zeroshot-300m-bembaspeech-model/checkpoint-4600", "epoch": 3.6542515811665495, "eval_steps": 200, "global_step": 5200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.14054813773717498, "eval_loss": 2.2802751064300537, "eval_runtime": 82.1473, "eval_samples_per_second": 18.162, "eval_steps_per_second": 2.276, "eval_wer": 1.0, "step": 200 }, { "epoch": 0.28109627547434995, "eval_loss": 0.2944619655609131, "eval_runtime": 82.5798, "eval_samples_per_second": 18.067, "eval_steps_per_second": 2.264, "eval_wer": 0.5100787265334371, "step": 400 }, { "epoch": 0.35137034434293746, "grad_norm": 0.8652539849281311, "learning_rate": 0.00029720356891289033, "loss": 2.741, "step": 500 }, { "epoch": 0.42164441321152496, "eval_loss": 0.2641391158103943, "eval_runtime": 81.2987, "eval_samples_per_second": 18.352, "eval_steps_per_second": 2.3, "eval_wer": 0.4834328229085561, "step": 600 }, { "epoch": 0.5621925509486999, "eval_loss": 0.26114964485168457, "eval_runtime": 81.2645, "eval_samples_per_second": 18.36, "eval_steps_per_second": 2.301, "eval_wer": 0.47434899212734666, "step": 800 }, { "epoch": 0.7027406886858749, "grad_norm": 1.3384147882461548, "learning_rate": 0.00029368161540267664, "loss": 0.5962, "step": 1000 }, { "epoch": 0.7027406886858749, "eval_loss": 0.2608044445514679, "eval_runtime": 81.9453, "eval_samples_per_second": 18.207, "eval_steps_per_second": 2.282, "eval_wer": 0.4830867722121291, "step": 1000 }, { "epoch": 0.8432888264230499, "eval_loss": 0.24766217172145844, "eval_runtime": 81.5485, "eval_samples_per_second": 18.296, "eval_steps_per_second": 2.293, "eval_wer": 0.4562678432390345, "step": 1200 }, { "epoch": 0.9838369641602249, "eval_loss": 0.24071051180362701, "eval_runtime": 81.0412, "eval_samples_per_second": 18.41, "eval_steps_per_second": 2.307, "eval_wer": 0.4567004066095683, "step": 1400 }, { "epoch": 1.0541110330288124, "grad_norm": 0.5781381130218506, "learning_rate": 0.000290159661892463, "loss": 0.536, "step": 1500 }, { "epoch": 1.1243851018973998, "eval_loss": 0.23427943885326385, "eval_runtime": 82.3276, "eval_samples_per_second": 18.123, "eval_steps_per_second": 2.271, "eval_wer": 0.44121463794445887, "step": 1600 }, { "epoch": 1.264933239634575, "eval_loss": 0.23070427775382996, "eval_runtime": 82.28, "eval_samples_per_second": 18.133, "eval_steps_per_second": 2.273, "eval_wer": 0.44225279003373996, "step": 1800 }, { "epoch": 1.4054813773717498, "grad_norm": 0.5301165580749512, "learning_rate": 0.0002866377083822493, "loss": 0.5221, "step": 2000 }, { "epoch": 1.4054813773717498, "eval_loss": 0.22523300349712372, "eval_runtime": 81.337, "eval_samples_per_second": 18.343, "eval_steps_per_second": 2.299, "eval_wer": 0.4348127000605589, "step": 2000 }, { "epoch": 1.5460295151089247, "eval_loss": 0.22277304530143738, "eval_runtime": 81.5619, "eval_samples_per_second": 18.293, "eval_steps_per_second": 2.293, "eval_wer": 0.4325633705337832, "step": 2200 }, { "epoch": 1.6865776528460998, "eval_loss": 0.21623817086219788, "eval_runtime": 81.5646, "eval_samples_per_second": 18.292, "eval_steps_per_second": 2.293, "eval_wer": 0.4252963059088156, "step": 2400 }, { "epoch": 1.7568517217146873, "grad_norm": 0.5294317007064819, "learning_rate": 0.00028311575487203566, "loss": 0.5027, "step": 2500 }, { "epoch": 1.8271257905832747, "eval_loss": 0.22000491619110107, "eval_runtime": 81.31, "eval_samples_per_second": 18.35, "eval_steps_per_second": 2.3, "eval_wer": 0.4188078553508089, "step": 2600 }, { "epoch": 1.9676739283204498, "eval_loss": 0.2131248265504837, "eval_runtime": 81.3644, "eval_samples_per_second": 18.337, "eval_steps_per_second": 2.298, "eval_wer": 0.4142226836231508, "step": 2800 }, { "epoch": 2.1082220660576247, "grad_norm": 0.3984699547290802, "learning_rate": 0.000279593801361822, "loss": 0.4818, "step": 3000 }, { "epoch": 2.1082220660576247, "eval_loss": 0.22807464003562927, "eval_runtime": 81.2983, "eval_samples_per_second": 18.352, "eval_steps_per_second": 2.3, "eval_wer": 0.42806471148023184, "step": 3000 }, { "epoch": 2.2487702037947996, "eval_loss": 0.21783553063869476, "eval_runtime": 81.2747, "eval_samples_per_second": 18.357, "eval_steps_per_second": 2.301, "eval_wer": 0.41465524699368456, "step": 3200 }, { "epoch": 2.3893183415319745, "eval_loss": 0.2122805118560791, "eval_runtime": 81.1791, "eval_samples_per_second": 18.379, "eval_steps_per_second": 2.304, "eval_wer": 0.41552037373475215, "step": 3400 }, { "epoch": 2.459592410400562, "grad_norm": 0.6205651760101318, "learning_rate": 0.00027607184785160833, "loss": 0.4619, "step": 3500 }, { "epoch": 2.52986647926915, "eval_loss": 0.21417662501335144, "eval_runtime": 81.5176, "eval_samples_per_second": 18.303, "eval_steps_per_second": 2.294, "eval_wer": 0.40790725841335757, "step": 3600 }, { "epoch": 2.6704146170063248, "eval_loss": 0.21559232473373413, "eval_runtime": 81.1016, "eval_samples_per_second": 18.397, "eval_steps_per_second": 2.306, "eval_wer": 0.40107275715892376, "step": 3800 }, { "epoch": 2.8109627547434997, "grad_norm": 0.5996519923210144, "learning_rate": 0.0002725498943413947, "loss": 0.464, "step": 4000 }, { "epoch": 2.8109627547434997, "eval_loss": 0.2071918100118637, "eval_runtime": 82.6744, "eval_samples_per_second": 18.047, "eval_steps_per_second": 2.262, "eval_wer": 0.40089973181071026, "step": 4000 }, { "epoch": 2.9515108924806746, "eval_loss": 0.21284320950508118, "eval_runtime": 81.7324, "eval_samples_per_second": 18.255, "eval_steps_per_second": 2.288, "eval_wer": 0.401332295181244, "step": 4200 }, { "epoch": 3.0920590302178494, "eval_loss": 0.20559127628803253, "eval_runtime": 81.3555, "eval_samples_per_second": 18.339, "eval_steps_per_second": 2.299, "eval_wer": 0.3982178389134008, "step": 4400 }, { "epoch": 3.162333099086437, "grad_norm": 0.6319628357887268, "learning_rate": 0.000269027940831181, "loss": 0.4464, "step": 4500 }, { "epoch": 3.232607167955025, "eval_loss": 0.20377855002880096, "eval_runtime": 82.1978, "eval_samples_per_second": 18.151, "eval_steps_per_second": 2.275, "eval_wer": 0.40072670646249675, "step": 4600 }, { "epoch": 3.3731553056921997, "eval_loss": 0.20886844396591187, "eval_runtime": 81.607, "eval_samples_per_second": 18.283, "eval_steps_per_second": 2.291, "eval_wer": 0.39873691495804137, "step": 4800 }, { "epoch": 3.5137034434293746, "grad_norm": 0.6382594704627991, "learning_rate": 0.0002655059873209673, "loss": 0.4418, "step": 5000 }, { "epoch": 3.5137034434293746, "eval_loss": 0.20426709949970245, "eval_runtime": 81.6377, "eval_samples_per_second": 18.276, "eval_steps_per_second": 2.291, "eval_wer": 0.40089973181071026, "step": 5000 }, { "epoch": 3.6542515811665495, "eval_loss": 0.2049737125635147, "eval_runtime": 81.6534, "eval_samples_per_second": 18.272, "eval_steps_per_second": 2.29, "eval_wer": 0.3964010727571589, "step": 5200 }, { "epoch": 3.6542515811665495, "step": 5200, "total_flos": 8.128487028902814e+18, "train_loss": 0.7083046766427847, "train_runtime": 5849.6566, "train_samples_per_second": 58.347, "train_steps_per_second": 7.298 } ], "logging_steps": 500, "max_steps": 42690, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 200, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 3 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 8.128487028902814e+18, "train_batch_size": 8, "trial_name": null, "trial_params": null }