{ "best_metric": null, "best_model_checkpoint": null, "epoch": 4.0, "eval_steps": 500, "global_step": 320, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.125, "grad_norm": 71.75464630126953, "learning_rate": 4.0000000000000003e-07, "loss": 3.61, "step": 10 }, { "epoch": 0.25, "grad_norm": 45.94756317138672, "learning_rate": 1.4000000000000001e-06, "loss": 3.3547, "step": 20 }, { "epoch": 0.375, "grad_norm": 29.31285858154297, "learning_rate": 2.4000000000000003e-06, "loss": 2.7402, "step": 30 }, { "epoch": 0.5, "grad_norm": 19.44002914428711, "learning_rate": 3.4000000000000005e-06, "loss": 2.12, "step": 40 }, { "epoch": 0.625, "grad_norm": 13.159948348999023, "learning_rate": 4.4e-06, "loss": 1.5923, "step": 50 }, { "epoch": 0.75, "grad_norm": 8.778341293334961, "learning_rate": 5.400000000000001e-06, "loss": 1.2181, "step": 60 }, { "epoch": 0.875, "grad_norm": 5.181447982788086, "learning_rate": 6.4000000000000006e-06, "loss": 0.9039, "step": 70 }, { "epoch": 1.0, "grad_norm": 5.2995686531066895, "learning_rate": 7.4e-06, "loss": 0.6798, "step": 80 }, { "epoch": 1.0, "eval_cer": 0.1788136402712771, "eval_loss": 0.5512658953666687, "eval_runtime": 167.9142, "eval_samples_per_second": 7.522, "eval_steps_per_second": 0.06, "eval_wer": 0.6854535695115406, "step": 80 }, { "epoch": 1.125, "grad_norm": 3.869173288345337, "learning_rate": 8.400000000000001e-06, "loss": 0.5485, "step": 90 }, { "epoch": 1.25, "grad_norm": 3.1130523681640625, "learning_rate": 9.4e-06, "loss": 0.4481, "step": 100 }, { "epoch": 1.375, "grad_norm": 2.2851545810699463, "learning_rate": 9.81818181818182e-06, "loss": 0.4103, "step": 110 }, { "epoch": 1.5, "grad_norm": 2.8455662727355957, "learning_rate": 9.363636363636365e-06, "loss": 0.37, "step": 120 }, { "epoch": 1.625, "grad_norm": 2.575657367706299, "learning_rate": 8.90909090909091e-06, "loss": 0.3513, "step": 130 }, { "epoch": 1.75, "grad_norm": 2.5928118228912354, "learning_rate": 8.454545454545455e-06, "loss": 0.3504, "step": 140 }, { "epoch": 1.875, "grad_norm": 2.5617828369140625, "learning_rate": 8.000000000000001e-06, "loss": 0.324, "step": 150 }, { "epoch": 2.0, "grad_norm": 3.5175580978393555, "learning_rate": 7.545454545454546e-06, "loss": 0.3095, "step": 160 }, { "epoch": 2.0, "eval_cer": 0.09716305282261917, "eval_loss": 0.2984148859977722, "eval_runtime": 164.9586, "eval_samples_per_second": 7.656, "eval_steps_per_second": 0.061, "eval_wer": 0.4486312399355878, "step": 160 }, { "epoch": 2.125, "grad_norm": 2.2015202045440674, "learning_rate": 7.0909090909090916e-06, "loss": 0.2926, "step": 170 }, { "epoch": 2.25, "grad_norm": 3.0814766883850098, "learning_rate": 6.6363636363636375e-06, "loss": 0.2836, "step": 180 }, { "epoch": 2.375, "grad_norm": 1.8548959493637085, "learning_rate": 6.181818181818182e-06, "loss": 0.2743, "step": 190 }, { "epoch": 2.5, "grad_norm": 2.3138978481292725, "learning_rate": 5.727272727272728e-06, "loss": 0.2707, "step": 200 }, { "epoch": 2.625, "grad_norm": 2.2470438480377197, "learning_rate": 5.272727272727273e-06, "loss": 0.2799, "step": 210 }, { "epoch": 2.75, "grad_norm": 2.5589115619659424, "learning_rate": 4.818181818181819e-06, "loss": 0.2669, "step": 220 }, { "epoch": 2.875, "grad_norm": 2.1574714183807373, "learning_rate": 4.363636363636364e-06, "loss": 0.2615, "step": 230 }, { "epoch": 3.0, "grad_norm": 3.022969961166382, "learning_rate": 3.90909090909091e-06, "loss": 0.2673, "step": 240 }, { "epoch": 3.0, "eval_cer": 0.0882032667876588, "eval_loss": 0.2675623595714569, "eval_runtime": 162.9283, "eval_samples_per_second": 7.752, "eval_steps_per_second": 0.061, "eval_wer": 0.4142780461621041, "step": 240 }, { "epoch": 3.125, "grad_norm": 1.893800973892212, "learning_rate": 3.454545454545455e-06, "loss": 0.2601, "step": 250 }, { "epoch": 3.25, "grad_norm": 2.2726192474365234, "learning_rate": 3e-06, "loss": 0.2575, "step": 260 }, { "epoch": 3.375, "grad_norm": 1.9179538488388062, "learning_rate": 2.5454545454545456e-06, "loss": 0.2375, "step": 270 }, { "epoch": 3.5, "grad_norm": 1.734628677368164, "learning_rate": 2.090909090909091e-06, "loss": 0.238, "step": 280 }, { "epoch": 3.625, "grad_norm": 1.9225579500198364, "learning_rate": 1.6363636363636365e-06, "loss": 0.237, "step": 290 }, { "epoch": 3.75, "grad_norm": 1.7918205261230469, "learning_rate": 1.181818181818182e-06, "loss": 0.2364, "step": 300 }, { "epoch": 3.875, "grad_norm": 2.021205186843872, "learning_rate": 7.272727272727273e-07, "loss": 0.2422, "step": 310 }, { "epoch": 4.0, "grad_norm": 2.901258707046509, "learning_rate": 2.7272727272727274e-07, "loss": 0.2428, "step": 320 }, { "epoch": 4.0, "eval_cer": 0.08564332792052727, "eval_loss": 0.2611912190914154, "eval_runtime": 163.7635, "eval_samples_per_second": 7.712, "eval_steps_per_second": 0.061, "eval_wer": 0.40128824476650565, "step": 320 }, { "epoch": 4.0, "step": 320, "total_flos": 2.6372074438656e+18, "train_loss": 0.7337436556816102, "train_runtime": 5396.4455, "train_samples_per_second": 7.535, "train_steps_per_second": 0.059 } ], "logging_steps": 10, "max_steps": 320, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.6372074438656e+18, "train_batch_size": 128, "trial_name": null, "trial_params": null }