{ "best_metric": null, "best_model_checkpoint": null, "epoch": 10.0, "eval_steps": 200, "global_step": 5010, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.3992015968063872, "eval_loss": 1.943410873413086, "eval_runtime": 41.2309, "eval_samples_per_second": 11.52, "eval_steps_per_second": 1.455, "eval_wer": 1.0, "step": 200 }, { "epoch": 0.7984031936127745, "eval_loss": 0.3648405969142914, "eval_runtime": 41.3464, "eval_samples_per_second": 11.488, "eval_steps_per_second": 1.451, "eval_wer": 0.5411255411255411, "step": 400 }, { "epoch": 0.998003992015968, "grad_norm": 0.7569882273674011, "learning_rate": 0.00027568228105906314, "loss": 2.5692, "step": 500 }, { "epoch": 1.1976047904191618, "eval_loss": 0.3371403217315674, "eval_runtime": 40.9227, "eval_samples_per_second": 11.607, "eval_steps_per_second": 1.466, "eval_wer": 0.5174433409727527, "step": 600 }, { "epoch": 1.596806387225549, "eval_loss": 0.32294145226478577, "eval_runtime": 41.2686, "eval_samples_per_second": 11.51, "eval_steps_per_second": 1.454, "eval_wer": 0.5212630506748154, "step": 800 }, { "epoch": 1.996007984031936, "grad_norm": 0.6301750540733337, "learning_rate": 0.000245132382892057, "loss": 0.3941, "step": 1000 }, { "epoch": 1.996007984031936, "eval_loss": 0.31834524869918823, "eval_runtime": 40.8769, "eval_samples_per_second": 11.62, "eval_steps_per_second": 1.468, "eval_wer": 0.49146931499872676, "step": 1000 }, { "epoch": 2.3952095808383236, "eval_loss": 0.3067522943019867, "eval_runtime": 40.1904, "eval_samples_per_second": 11.819, "eval_steps_per_second": 1.493, "eval_wer": 0.5072574484339191, "step": 1200 }, { "epoch": 2.7944111776447107, "eval_loss": 0.30573877692222595, "eval_runtime": 40.3076, "eval_samples_per_second": 11.784, "eval_steps_per_second": 1.489, "eval_wer": 0.46880570409982175, "step": 1400 }, { "epoch": 2.9940119760479043, "grad_norm": 0.6246519088745117, "learning_rate": 0.0002145824847250509, "loss": 0.3502, "step": 1500 }, { "epoch": 3.193612774451098, "eval_loss": 0.30173251032829285, "eval_runtime": 40.844, "eval_samples_per_second": 11.63, "eval_steps_per_second": 1.469, "eval_wer": 0.47771836007130125, "step": 1600 }, { "epoch": 3.592814371257485, "eval_loss": 0.2904900014400482, "eval_runtime": 40.4163, "eval_samples_per_second": 11.753, "eval_steps_per_second": 1.485, "eval_wer": 0.46473134708428826, "step": 1800 }, { "epoch": 3.992015968063872, "grad_norm": 0.9133301973342896, "learning_rate": 0.00018403258655804477, "loss": 0.3253, "step": 2000 }, { "epoch": 3.992015968063872, "eval_loss": 0.2856718897819519, "eval_runtime": 40.3664, "eval_samples_per_second": 11.767, "eval_steps_per_second": 1.486, "eval_wer": 0.4685510567863509, "step": 2000 }, { "epoch": 4.391217564870259, "eval_loss": 0.28921443223953247, "eval_runtime": 40.4989, "eval_samples_per_second": 11.729, "eval_steps_per_second": 1.482, "eval_wer": 0.4601476954418131, "step": 2200 }, { "epoch": 4.790419161676647, "eval_loss": 0.28484851121902466, "eval_runtime": 40.2649, "eval_samples_per_second": 11.797, "eval_steps_per_second": 1.49, "eval_wer": 0.47593582887700536, "step": 2400 }, { "epoch": 4.99001996007984, "grad_norm": 0.7654047012329102, "learning_rate": 0.00015348268839103868, "loss": 0.3066, "step": 2500 }, { "epoch": 5.189620758483034, "eval_loss": 0.2800922989845276, "eval_runtime": 40.3508, "eval_samples_per_second": 11.772, "eval_steps_per_second": 1.487, "eval_wer": 0.44435956200662086, "step": 2600 }, { "epoch": 5.588822355289421, "eval_loss": 0.27517372369766235, "eval_runtime": 40.2103, "eval_samples_per_second": 11.813, "eval_steps_per_second": 1.492, "eval_wer": 0.4626941685765215, "step": 2800 }, { "epoch": 5.9880239520958085, "grad_norm": 0.9405556917190552, "learning_rate": 0.00012293279022403258, "loss": 0.2988, "step": 3000 }, { "epoch": 5.9880239520958085, "eval_loss": 0.28181877732276917, "eval_runtime": 40.5684, "eval_samples_per_second": 11.709, "eval_steps_per_second": 1.479, "eval_wer": 0.4614209320091673, "step": 3000 }, { "epoch": 6.387225548902196, "eval_loss": 0.27585282921791077, "eval_runtime": 41.0192, "eval_samples_per_second": 11.58, "eval_steps_per_second": 1.463, "eval_wer": 0.44435956200662086, "step": 3200 }, { "epoch": 6.786427145708583, "eval_loss": 0.27509135007858276, "eval_runtime": 40.7557, "eval_samples_per_second": 11.655, "eval_steps_per_second": 1.472, "eval_wer": 0.4382480264833206, "step": 3400 }, { "epoch": 6.986027944111776, "grad_norm": 0.8755282163619995, "learning_rate": 9.238289205702647e-05, "loss": 0.2877, "step": 3500 }, { "epoch": 7.18562874251497, "eval_loss": 0.2725882828235626, "eval_runtime": 40.5214, "eval_samples_per_second": 11.722, "eval_steps_per_second": 1.481, "eval_wer": 0.4471606824548001, "step": 3600 }, { "epoch": 7.584830339321357, "eval_loss": 0.27224990725517273, "eval_runtime": 40.7203, "eval_samples_per_second": 11.665, "eval_steps_per_second": 1.473, "eval_wer": 0.4484339190221543, "step": 3800 }, { "epoch": 7.984031936127744, "grad_norm": 0.6233875155448914, "learning_rate": 6.183299389002036e-05, "loss": 0.2812, "step": 4000 }, { "epoch": 7.984031936127744, "eval_loss": 0.2709678113460541, "eval_runtime": 40.5287, "eval_samples_per_second": 11.72, "eval_steps_per_second": 1.48, "eval_wer": 0.434428316781258, "step": 4000 }, { "epoch": 8.383233532934131, "eval_loss": 0.2734103798866272, "eval_runtime": 40.5, "eval_samples_per_second": 11.728, "eval_steps_per_second": 1.481, "eval_wer": 0.4410491469314999, "step": 4200 }, { "epoch": 8.782435129740518, "eval_loss": 0.27336403727531433, "eval_runtime": 40.7879, "eval_samples_per_second": 11.646, "eval_steps_per_second": 1.471, "eval_wer": 0.435956200662083, "step": 4400 }, { "epoch": 8.982035928143713, "grad_norm": 0.49290069937705994, "learning_rate": 3.128309572301426e-05, "loss": 0.2742, "step": 4500 }, { "epoch": 9.181636726546905, "eval_loss": 0.2758755087852478, "eval_runtime": 40.5476, "eval_samples_per_second": 11.715, "eval_steps_per_second": 1.48, "eval_wer": 0.43977591036414565, "step": 4600 }, { "epoch": 9.580838323353294, "eval_loss": 0.27402016520500183, "eval_runtime": 40.6331, "eval_samples_per_second": 11.69, "eval_steps_per_second": 1.477, "eval_wer": 0.4336643748408454, "step": 4800 }, { "epoch": 9.980039920159681, "grad_norm": 0.8148034811019897, "learning_rate": 7.331975560081466e-07, "loss": 0.2731, "step": 5000 }, { "epoch": 9.980039920159681, "eval_loss": 0.27219507098197937, "eval_runtime": 40.3217, "eval_samples_per_second": 11.78, "eval_steps_per_second": 1.488, "eval_wer": 0.4382480264833206, "step": 5000 }, { "epoch": 10.0, "step": 5010, "total_flos": 7.851078607918333e+18, "train_loss": 0.535597919037718, "train_runtime": 6681.5115, "train_samples_per_second": 5.997, "train_steps_per_second": 0.75 } ], "logging_steps": 500, "max_steps": 5010, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 7.851078607918333e+18, "train_batch_size": 8, "trial_name": null, "trial_params": null }