{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.9568302779420463, "global_step": 5000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.06, "learning_rate": 4.945002956830278e-05, "loss": 6.8243, "step": 100 }, { "epoch": 0.12, "learning_rate": 4.885866351271437e-05, "loss": 4.1701, "step": 200 }, { "epoch": 0.18, "learning_rate": 4.8267297457125965e-05, "loss": 3.6081, "step": 300 }, { "epoch": 0.24, "learning_rate": 4.7675931401537554e-05, "loss": 3.3137, "step": 400 }, { "epoch": 0.3, "learning_rate": 4.7084565345949144e-05, "loss": 3.0057, "step": 500 }, { "epoch": 0.35, "learning_rate": 4.6493199290360734e-05, "loss": 2.7865, "step": 600 }, { "epoch": 0.41, "learning_rate": 4.590183323477233e-05, "loss": 2.4937, "step": 700 }, { "epoch": 0.47, "learning_rate": 4.531046717918392e-05, "loss": 2.2845, "step": 800 }, { "epoch": 0.53, "learning_rate": 4.471910112359551e-05, "loss": 1.8935, "step": 900 }, { "epoch": 0.59, "learning_rate": 4.41277350680071e-05, "loss": 1.6164, "step": 1000 }, { "epoch": 0.59, "eval_cer": 0.5792523669604962, "eval_loss": 1.4108693599700928, "eval_runtime": 78.7069, "eval_samples_per_second": 19.096, "eval_steps_per_second": 2.389, "step": 1000 }, { "epoch": 0.65, "learning_rate": 4.353636901241869e-05, "loss": 1.3669, "step": 1100 }, { "epoch": 0.71, "learning_rate": 4.294500295683028e-05, "loss": 1.1989, "step": 1200 }, { "epoch": 0.77, "learning_rate": 4.235363690124187e-05, "loss": 1.0658, "step": 1300 }, { "epoch": 0.83, "learning_rate": 4.176227084565346e-05, "loss": 0.8932, "step": 1400 }, { "epoch": 0.89, "learning_rate": 4.117090479006505e-05, "loss": 0.8178, "step": 1500 }, { "epoch": 0.95, "learning_rate": 4.057953873447664e-05, "loss": 0.6796, "step": 1600 }, { "epoch": 1.01, "learning_rate": 3.9988172678888235e-05, "loss": 0.6462, "step": 1700 }, { "epoch": 1.06, "learning_rate": 3.9396806623299825e-05, "loss": 0.4301, "step": 1800 }, { "epoch": 1.12, "learning_rate": 3.8805440567711414e-05, "loss": 0.4144, "step": 1900 }, { "epoch": 1.18, "learning_rate": 3.8214074512123004e-05, "loss": 0.3434, "step": 2000 }, { "epoch": 1.18, "eval_cer": 0.2175971269996735, "eval_loss": 0.3876412510871887, "eval_runtime": 79.3343, "eval_samples_per_second": 18.945, "eval_steps_per_second": 2.37, "step": 2000 }, { "epoch": 1.24, "learning_rate": 3.7622708456534594e-05, "loss": 0.3263, "step": 2100 }, { "epoch": 1.3, "learning_rate": 3.703134240094619e-05, "loss": 0.2942, "step": 2200 }, { "epoch": 1.36, "learning_rate": 3.643997634535778e-05, "loss": 0.2743, "step": 2300 }, { "epoch": 1.42, "learning_rate": 3.584861028976937e-05, "loss": 0.2679, "step": 2400 }, { "epoch": 1.48, "learning_rate": 3.525724423418096e-05, "loss": 0.2553, "step": 2500 }, { "epoch": 1.54, "learning_rate": 3.466587817859255e-05, "loss": 0.2329, "step": 2600 }, { "epoch": 1.6, "learning_rate": 3.4074512123004146e-05, "loss": 0.171, "step": 2700 }, { "epoch": 1.66, "learning_rate": 3.3483146067415736e-05, "loss": 0.2264, "step": 2800 }, { "epoch": 1.71, "learning_rate": 3.2891780011827325e-05, "loss": 0.2091, "step": 2900 }, { "epoch": 1.77, "learning_rate": 3.2300413956238915e-05, "loss": 0.1679, "step": 3000 }, { "epoch": 1.77, "eval_cer": 0.11859288279464578, "eval_loss": 0.22618062794208527, "eval_runtime": 78.6862, "eval_samples_per_second": 19.101, "eval_steps_per_second": 2.389, "step": 3000 }, { "epoch": 1.83, "learning_rate": 3.17090479006505e-05, "loss": 0.1629, "step": 3100 }, { "epoch": 1.89, "learning_rate": 3.1117681845062095e-05, "loss": 0.1669, "step": 3200 }, { "epoch": 1.95, "learning_rate": 3.0526315789473684e-05, "loss": 0.1582, "step": 3300 }, { "epoch": 2.01, "learning_rate": 2.9934949733885274e-05, "loss": 0.1066, "step": 3400 }, { "epoch": 2.07, "learning_rate": 2.9343583678296864e-05, "loss": 0.08, "step": 3500 }, { "epoch": 2.13, "learning_rate": 2.875221762270846e-05, "loss": 0.0651, "step": 3600 }, { "epoch": 2.19, "learning_rate": 2.816085156712005e-05, "loss": 0.063, "step": 3700 }, { "epoch": 2.25, "learning_rate": 2.756948551153164e-05, "loss": 0.045, "step": 3800 }, { "epoch": 2.31, "learning_rate": 2.697811945594323e-05, "loss": 0.0691, "step": 3900 }, { "epoch": 2.37, "learning_rate": 2.638675340035482e-05, "loss": 0.0816, "step": 4000 }, { "epoch": 2.37, "eval_cer": 0.06341821743388834, "eval_loss": 0.12735812366008759, "eval_runtime": 79.0538, "eval_samples_per_second": 19.012, "eval_steps_per_second": 2.378, "step": 4000 }, { "epoch": 2.42, "learning_rate": 2.5795387344766413e-05, "loss": 0.0732, "step": 4100 }, { "epoch": 2.48, "learning_rate": 2.5204021289178002e-05, "loss": 0.0486, "step": 4200 }, { "epoch": 2.54, "learning_rate": 2.4612655233589592e-05, "loss": 0.0618, "step": 4300 }, { "epoch": 2.6, "learning_rate": 2.4021289178001182e-05, "loss": 0.0494, "step": 4400 }, { "epoch": 2.66, "learning_rate": 2.3429923122412775e-05, "loss": 0.0557, "step": 4500 }, { "epoch": 2.72, "learning_rate": 2.2838557066824365e-05, "loss": 0.045, "step": 4600 }, { "epoch": 2.78, "learning_rate": 2.2247191011235958e-05, "loss": 0.0346, "step": 4700 }, { "epoch": 2.84, "learning_rate": 2.1655824955647548e-05, "loss": 0.0437, "step": 4800 }, { "epoch": 2.9, "learning_rate": 2.1064458900059137e-05, "loss": 0.0366, "step": 4900 }, { "epoch": 2.96, "learning_rate": 2.0473092844470727e-05, "loss": 0.0421, "step": 5000 }, { "epoch": 2.96, "eval_cer": 0.0381162259222984, "eval_loss": 0.08168739080429077, "eval_runtime": 78.8974, "eval_samples_per_second": 19.05, "eval_steps_per_second": 2.383, "step": 5000 } ], "max_steps": 8455, "num_train_epochs": 5, "total_flos": 7.242528907589714e+18, "trial_name": null, "trial_params": null }