|
{ |
|
"best_metric": 0.20998027920722961, |
|
"best_model_checkpoint": "/scratch/skscla001/results/mms-zeroshot-300m-genbed-f-model/checkpoint-5600", |
|
"epoch": 16.986301369863014, |
|
"eval_steps": 200, |
|
"global_step": 6200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.547945205479452, |
|
"eval_loss": 2.3236358165740967, |
|
"eval_runtime": 58.3404, |
|
"eval_samples_per_second": 16.627, |
|
"eval_steps_per_second": 2.091, |
|
"eval_wer": 1.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.095890410958904, |
|
"eval_loss": 0.3330934941768646, |
|
"eval_runtime": 57.5798, |
|
"eval_samples_per_second": 16.846, |
|
"eval_steps_per_second": 2.119, |
|
"eval_wer": 0.5504350628424106, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.36986301369863, |
|
"grad_norm": 0.44414466619491577, |
|
"learning_rate": 0.00028902304147465435, |
|
"loss": 2.6731, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.643835616438356, |
|
"eval_loss": 0.29691705107688904, |
|
"eval_runtime": 57.9342, |
|
"eval_samples_per_second": 16.743, |
|
"eval_steps_per_second": 2.106, |
|
"eval_wer": 0.5189601460951767, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.191780821917808, |
|
"eval_loss": 0.2805863320827484, |
|
"eval_runtime": 57.8326, |
|
"eval_samples_per_second": 16.773, |
|
"eval_steps_per_second": 2.11, |
|
"eval_wer": 0.5121925018799012, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.73972602739726, |
|
"grad_norm": 0.2989409565925598, |
|
"learning_rate": 0.00027519815668202764, |
|
"loss": 0.4193, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.73972602739726, |
|
"eval_loss": 0.2701254189014435, |
|
"eval_runtime": 57.9519, |
|
"eval_samples_per_second": 16.738, |
|
"eval_steps_per_second": 2.105, |
|
"eval_wer": 0.4741647867654958, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.287671232876712, |
|
"eval_loss": 0.27029839158058167, |
|
"eval_runtime": 57.8498, |
|
"eval_samples_per_second": 16.768, |
|
"eval_steps_per_second": 2.109, |
|
"eval_wer": 0.47695778279084755, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.8356164383561646, |
|
"eval_loss": 0.2574402093887329, |
|
"eval_runtime": 57.9028, |
|
"eval_samples_per_second": 16.752, |
|
"eval_steps_per_second": 2.107, |
|
"eval_wer": 0.47577613062627566, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 4.109589041095891, |
|
"grad_norm": 0.5375602841377258, |
|
"learning_rate": 0.00026137327188940093, |
|
"loss": 0.367, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 4.383561643835616, |
|
"eval_loss": 0.24872656166553497, |
|
"eval_runtime": 57.6765, |
|
"eval_samples_per_second": 16.818, |
|
"eval_steps_per_second": 2.115, |
|
"eval_wer": 0.4547212375120851, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 4.931506849315069, |
|
"eval_loss": 0.247171550989151, |
|
"eval_runtime": 58.1732, |
|
"eval_samples_per_second": 16.674, |
|
"eval_steps_per_second": 2.097, |
|
"eval_wer": 0.43366634439789453, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 5.47945205479452, |
|
"grad_norm": 0.536637544631958, |
|
"learning_rate": 0.00024754838709677417, |
|
"loss": 0.3377, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 5.47945205479452, |
|
"eval_loss": 0.24240365624427795, |
|
"eval_runtime": 58.1149, |
|
"eval_samples_per_second": 16.691, |
|
"eval_steps_per_second": 2.099, |
|
"eval_wer": 0.44666451820818565, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 6.027397260273973, |
|
"eval_loss": 0.23721589148044586, |
|
"eval_runtime": 57.9941, |
|
"eval_samples_per_second": 16.726, |
|
"eval_steps_per_second": 2.104, |
|
"eval_wer": 0.42743581480287896, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 6.575342465753424, |
|
"eval_loss": 0.23664695024490356, |
|
"eval_runtime": 58.3196, |
|
"eval_samples_per_second": 16.632, |
|
"eval_steps_per_second": 2.092, |
|
"eval_wer": 0.4224943602964873, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 6.8493150684931505, |
|
"grad_norm": 0.45388591289520264, |
|
"learning_rate": 0.00023372350230414744, |
|
"loss": 0.3282, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 7.123287671232877, |
|
"eval_loss": 0.23390649259090424, |
|
"eval_runtime": 58.129, |
|
"eval_samples_per_second": 16.687, |
|
"eval_steps_per_second": 2.099, |
|
"eval_wer": 0.4103555698786121, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 7.671232876712329, |
|
"eval_loss": 0.23517899215221405, |
|
"eval_runtime": 57.9143, |
|
"eval_samples_per_second": 16.749, |
|
"eval_steps_per_second": 2.107, |
|
"eval_wer": 0.4192716725749275, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 8.219178082191782, |
|
"grad_norm": 0.6077154278755188, |
|
"learning_rate": 0.0002198986175115207, |
|
"loss": 0.3018, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 8.219178082191782, |
|
"eval_loss": 0.2249327301979065, |
|
"eval_runtime": 57.7536, |
|
"eval_samples_per_second": 16.795, |
|
"eval_steps_per_second": 2.112, |
|
"eval_wer": 0.4097110323343001, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 8.767123287671232, |
|
"eval_loss": 0.22541025280952454, |
|
"eval_runtime": 58.012, |
|
"eval_samples_per_second": 16.721, |
|
"eval_steps_per_second": 2.103, |
|
"eval_wer": 0.40648834461274036, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 9.315068493150685, |
|
"eval_loss": 0.2250933200120926, |
|
"eval_runtime": 58.2195, |
|
"eval_samples_per_second": 16.661, |
|
"eval_steps_per_second": 2.096, |
|
"eval_wer": 0.40208400472660866, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 9.58904109589041, |
|
"grad_norm": 0.45964235067367554, |
|
"learning_rate": 0.00020607373271889397, |
|
"loss": 0.2945, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 9.863013698630137, |
|
"eval_loss": 0.22482535243034363, |
|
"eval_runtime": 57.7885, |
|
"eval_samples_per_second": 16.785, |
|
"eval_steps_per_second": 2.111, |
|
"eval_wer": 0.396927704372113, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 10.41095890410959, |
|
"eval_loss": 0.22121191024780273, |
|
"eval_runtime": 58.297, |
|
"eval_samples_per_second": 16.639, |
|
"eval_steps_per_second": 2.093, |
|
"eval_wer": 0.4001503920936728, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 10.95890410958904, |
|
"grad_norm": 1.2428817749023438, |
|
"learning_rate": 0.0001922488479262673, |
|
"loss": 0.2843, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 10.95890410958904, |
|
"eval_loss": 0.21997873485088348, |
|
"eval_runtime": 57.8696, |
|
"eval_samples_per_second": 16.762, |
|
"eval_steps_per_second": 2.108, |
|
"eval_wer": 0.39198624986572134, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 11.506849315068493, |
|
"eval_loss": 0.21829502284526825, |
|
"eval_runtime": 57.8135, |
|
"eval_samples_per_second": 16.778, |
|
"eval_steps_per_second": 2.11, |
|
"eval_wer": 0.3853260285744978, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 12.054794520547945, |
|
"eval_loss": 0.21739539504051208, |
|
"eval_runtime": 57.004, |
|
"eval_samples_per_second": 17.016, |
|
"eval_steps_per_second": 2.14, |
|
"eval_wer": 0.3889784079922656, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 12.32876712328767, |
|
"grad_norm": 0.44710573554039, |
|
"learning_rate": 0.00017842396313364056, |
|
"loss": 0.2755, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 12.602739726027398, |
|
"eval_loss": 0.21631112694740295, |
|
"eval_runtime": 57.2582, |
|
"eval_samples_per_second": 16.941, |
|
"eval_steps_per_second": 2.131, |
|
"eval_wer": 0.3955312063594371, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 13.150684931506849, |
|
"eval_loss": 0.21974815428256989, |
|
"eval_runtime": 57.1085, |
|
"eval_samples_per_second": 16.985, |
|
"eval_steps_per_second": 2.136, |
|
"eval_wer": 0.3894080996884735, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 13.698630136986301, |
|
"grad_norm": 0.3474729061126709, |
|
"learning_rate": 0.00016459907834101383, |
|
"loss": 0.2699, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 13.698630136986301, |
|
"eval_loss": 0.21634985506534576, |
|
"eval_runtime": 57.267, |
|
"eval_samples_per_second": 16.938, |
|
"eval_steps_per_second": 2.13, |
|
"eval_wer": 0.38994521430873347, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 14.246575342465754, |
|
"eval_loss": 0.21288762986660004, |
|
"eval_runtime": 56.9225, |
|
"eval_samples_per_second": 17.041, |
|
"eval_steps_per_second": 2.143, |
|
"eval_wer": 0.37694704049844235, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 14.794520547945206, |
|
"eval_loss": 0.2114371657371521, |
|
"eval_runtime": 57.0542, |
|
"eval_samples_per_second": 17.001, |
|
"eval_steps_per_second": 2.138, |
|
"eval_wer": 0.37587281125792243, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 15.068493150684931, |
|
"grad_norm": 0.5010984539985657, |
|
"learning_rate": 0.0001507741935483871, |
|
"loss": 0.2568, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 15.342465753424657, |
|
"eval_loss": 0.20998027920722961, |
|
"eval_runtime": 56.9906, |
|
"eval_samples_per_second": 17.02, |
|
"eval_steps_per_second": 2.141, |
|
"eval_wer": 0.3721130089161027, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 15.89041095890411, |
|
"eval_loss": 0.2139931619167328, |
|
"eval_runtime": 56.9487, |
|
"eval_samples_per_second": 17.033, |
|
"eval_steps_per_second": 2.142, |
|
"eval_wer": 0.36695670856160706, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 16.438356164383563, |
|
"grad_norm": 0.3882729411125183, |
|
"learning_rate": 0.00013694930875576036, |
|
"loss": 0.2521, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 16.438356164383563, |
|
"eval_loss": 0.21490569412708282, |
|
"eval_runtime": 57.0453, |
|
"eval_samples_per_second": 17.004, |
|
"eval_steps_per_second": 2.139, |
|
"eval_wer": 0.37426146739714256, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 16.986301369863014, |
|
"eval_loss": 0.21307234466075897, |
|
"eval_runtime": 57.0514, |
|
"eval_samples_per_second": 17.002, |
|
"eval_steps_per_second": 2.138, |
|
"eval_wer": 0.3720055859920507, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 16.986301369863014, |
|
"step": 6200, |
|
"total_flos": 9.649821343406506e+18, |
|
"train_loss": 0.4968759044524162, |
|
"train_runtime": 6151.3654, |
|
"train_samples_per_second": 14.226, |
|
"train_steps_per_second": 1.78 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 10950, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 30, |
|
"save_steps": 200, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 3 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 9.649821343406506e+18, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|