{ "best_metric": null, "best_model_checkpoint": null, "epoch": 4.063868392403532, "eval_steps": 800, "global_step": 16800, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.1935405830410064, "grad_norm": NaN, "learning_rate": 0.0002979720793534166, "loss": 5.9034, "step": 800 }, { "epoch": 0.1935405830410064, "eval_loss": Infinity, "eval_runtime": 557.9284, "eval_samples_per_second": 41.837, "eval_steps_per_second": 5.23, "eval_wer": 1.0, "step": 800 }, { "epoch": 0.3870811660820128, "grad_norm": 0.0, "learning_rate": 0.00029226304188096985, "loss": 4.005, "step": 1600 }, { "epoch": 0.3870811660820128, "eval_loss": Infinity, "eval_runtime": 524.4916, "eval_samples_per_second": 44.504, "eval_steps_per_second": 5.563, "eval_wer": 1.0, "step": 1600 }, { "epoch": 0.5806217491230192, "grad_norm": 0.0, "learning_rate": 0.0002864364437913299, "loss": 4.0676, "step": 2400 }, { "epoch": 0.5806217491230192, "eval_loss": Infinity, "eval_runtime": 522.6766, "eval_samples_per_second": 44.659, "eval_steps_per_second": 5.583, "eval_wer": 1.0, "step": 2400 }, { "epoch": 0.7741623321640256, "grad_norm": 0.0, "learning_rate": 0.0002806833210874357, "loss": 4.0978, "step": 3200 }, { "epoch": 0.7741623321640256, "eval_loss": Infinity, "eval_runtime": 524.1686, "eval_samples_per_second": 44.531, "eval_steps_per_second": 5.567, "eval_wer": 1.0, "step": 3200 }, { "epoch": 0.967702915205032, "grad_norm": 0.0, "learning_rate": 0.0002749595885378398, "loss": 4.1059, "step": 4000 }, { "epoch": 0.967702915205032, "eval_loss": Infinity, "eval_runtime": 525.0456, "eval_samples_per_second": 44.457, "eval_steps_per_second": 5.558, "eval_wer": 1.0, "step": 4000 }, { "epoch": 1.161122535381638, "grad_norm": 0.0, "learning_rate": 0.0002692064658339456, "loss": 4.0961, "step": 4800 }, { "epoch": 1.161122535381638, "eval_loss": Infinity, "eval_runtime": 525.4326, "eval_samples_per_second": 44.424, "eval_steps_per_second": 5.554, "eval_wer": 1.0, "step": 4800 }, { "epoch": 1.3546631184226443, "grad_norm": 0.0, "learning_rate": 0.00026350477590007343, "loss": 4.1137, "step": 5600 }, { "epoch": 1.3546631184226443, "eval_loss": Infinity, "eval_runtime": 524.7239, "eval_samples_per_second": 44.484, "eval_steps_per_second": 5.561, "eval_wer": 1.0, "step": 5600 }, { "epoch": 1.5482037014636507, "grad_norm": NaN, "learning_rate": 0.0002604114621601763, "loss": 7.5668, "step": 6400 }, { "epoch": 1.5482037014636507, "eval_loss": NaN, "eval_runtime": 525.2952, "eval_samples_per_second": 44.436, "eval_steps_per_second": 5.555, "eval_wer": 1.0, "step": 6400 }, { "epoch": 1.7417442845046571, "grad_norm": NaN, "learning_rate": 0.0002604114621601763, "loss": 0.0, "step": 7200 }, { "epoch": 1.7417442845046571, "eval_loss": NaN, "eval_runtime": 524.3577, "eval_samples_per_second": 44.515, "eval_steps_per_second": 5.565, "eval_wer": 1.0, "step": 7200 }, { "epoch": 1.9352848675456635, "grad_norm": NaN, "learning_rate": 0.0002604114621601763, "loss": 0.0, "step": 8000 }, { "epoch": 1.9352848675456635, "eval_loss": NaN, "eval_runtime": 524.5203, "eval_samples_per_second": 44.502, "eval_steps_per_second": 5.563, "eval_wer": 1.0, "step": 8000 }, { "epoch": 2.128704487722269, "grad_norm": NaN, "learning_rate": 0.0002604114621601763, "loss": 0.0, "step": 8800 }, { "epoch": 2.128704487722269, "eval_loss": NaN, "eval_runtime": 525.5034, "eval_samples_per_second": 44.418, "eval_steps_per_second": 5.553, "eval_wer": 1.0, "step": 8800 }, { "epoch": 2.322245070763276, "grad_norm": NaN, "learning_rate": 0.0002604114621601763, "loss": 0.0, "step": 9600 }, { "epoch": 2.322245070763276, "eval_loss": NaN, "eval_runtime": 524.5899, "eval_samples_per_second": 44.496, "eval_steps_per_second": 5.562, "eval_wer": 1.0, "step": 9600 }, { "epoch": 2.515785653804282, "grad_norm": NaN, "learning_rate": 0.0002604114621601763, "loss": 0.0, "step": 10400 }, { "epoch": 2.515785653804282, "eval_loss": NaN, "eval_runtime": 524.8508, "eval_samples_per_second": 44.474, "eval_steps_per_second": 5.56, "eval_wer": 1.0, "step": 10400 }, { "epoch": 2.7093262368452886, "grad_norm": NaN, "learning_rate": 0.0002604114621601763, "loss": 0.0, "step": 11200 }, { "epoch": 2.7093262368452886, "eval_loss": NaN, "eval_runtime": 524.5261, "eval_samples_per_second": 44.501, "eval_steps_per_second": 5.563, "eval_wer": 1.0, "step": 11200 }, { "epoch": 2.902866819886295, "grad_norm": NaN, "learning_rate": 0.0002604114621601763, "loss": 0.0, "step": 12000 }, { "epoch": 2.902866819886295, "eval_loss": NaN, "eval_runtime": 523.894, "eval_samples_per_second": 44.555, "eval_steps_per_second": 5.57, "eval_wer": 1.0, "step": 12000 }, { "epoch": 3.096286440062901, "grad_norm": NaN, "learning_rate": 0.0002604114621601763, "loss": 0.0, "step": 12800 }, { "epoch": 3.096286440062901, "eval_loss": NaN, "eval_runtime": 524.8229, "eval_samples_per_second": 44.476, "eval_steps_per_second": 5.56, "eval_wer": 1.0, "step": 12800 }, { "epoch": 3.289827023103907, "grad_norm": NaN, "learning_rate": 0.0002604114621601763, "loss": 0.0, "step": 13600 }, { "epoch": 3.289827023103907, "eval_loss": NaN, "eval_runtime": 523.5675, "eval_samples_per_second": 44.583, "eval_steps_per_second": 5.573, "eval_wer": 1.0, "step": 13600 }, { "epoch": 3.4833676061449137, "grad_norm": NaN, "learning_rate": 0.0002604114621601763, "loss": 0.0, "step": 14400 }, { "epoch": 3.4833676061449137, "eval_loss": NaN, "eval_runtime": 531.6114, "eval_samples_per_second": 43.908, "eval_steps_per_second": 5.489, "eval_wer": 1.0, "step": 14400 }, { "epoch": 3.67690818918592, "grad_norm": NaN, "learning_rate": 0.0002604114621601763, "loss": 0.0, "step": 15200 }, { "epoch": 3.67690818918592, "eval_loss": NaN, "eval_runtime": 544.415, "eval_samples_per_second": 42.875, "eval_steps_per_second": 5.36, "eval_wer": 1.0, "step": 15200 }, { "epoch": 3.8704487722269265, "grad_norm": NaN, "learning_rate": 0.0002604114621601763, "loss": 0.0, "step": 16000 }, { "epoch": 3.8704487722269265, "eval_loss": NaN, "eval_runtime": 540.5292, "eval_samples_per_second": 43.184, "eval_steps_per_second": 5.398, "eval_wer": 1.0, "step": 16000 }, { "epoch": 4.063868392403532, "grad_norm": NaN, "learning_rate": 0.0002604114621601763, "loss": 0.0, "step": 16800 }, { "epoch": 4.063868392403532, "eval_loss": NaN, "eval_runtime": 527.7152, "eval_samples_per_second": 44.232, "eval_steps_per_second": 5.529, "eval_wer": 1.0, "step": 16800 } ], "logging_steps": 800, "max_steps": 41330, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 800, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3.7959210547118785e+19, "train_batch_size": 16, "trial_name": null, "trial_params": null }