|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.9568302779420463, |
|
"global_step": 5000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.945002956830278e-05, |
|
"loss": 6.8243, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.885866351271437e-05, |
|
"loss": 4.1701, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.8267297457125965e-05, |
|
"loss": 3.6081, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.7675931401537554e-05, |
|
"loss": 3.3137, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.7084565345949144e-05, |
|
"loss": 3.0057, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.6493199290360734e-05, |
|
"loss": 2.7865, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.590183323477233e-05, |
|
"loss": 2.4937, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.531046717918392e-05, |
|
"loss": 2.2845, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.471910112359551e-05, |
|
"loss": 1.8935, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.41277350680071e-05, |
|
"loss": 1.6164, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_cer": 0.5792523669604962, |
|
"eval_loss": 1.4108693599700928, |
|
"eval_runtime": 78.7069, |
|
"eval_samples_per_second": 19.096, |
|
"eval_steps_per_second": 2.389, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.353636901241869e-05, |
|
"loss": 1.3669, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.294500295683028e-05, |
|
"loss": 1.1989, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.235363690124187e-05, |
|
"loss": 1.0658, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.176227084565346e-05, |
|
"loss": 0.8932, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 4.117090479006505e-05, |
|
"loss": 0.8178, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 4.057953873447664e-05, |
|
"loss": 0.6796, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.9988172678888235e-05, |
|
"loss": 0.6462, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 3.9396806623299825e-05, |
|
"loss": 0.4301, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.8805440567711414e-05, |
|
"loss": 0.4144, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.8214074512123004e-05, |
|
"loss": 0.3434, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"eval_cer": 0.2175971269996735, |
|
"eval_loss": 0.3876412510871887, |
|
"eval_runtime": 79.3343, |
|
"eval_samples_per_second": 18.945, |
|
"eval_steps_per_second": 2.37, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 3.7622708456534594e-05, |
|
"loss": 0.3263, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 3.703134240094619e-05, |
|
"loss": 0.2942, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 3.643997634535778e-05, |
|
"loss": 0.2743, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 3.584861028976937e-05, |
|
"loss": 0.2679, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 3.525724423418096e-05, |
|
"loss": 0.2553, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 3.466587817859255e-05, |
|
"loss": 0.2329, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 3.4074512123004146e-05, |
|
"loss": 0.171, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 3.3483146067415736e-05, |
|
"loss": 0.2264, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 3.2891780011827325e-05, |
|
"loss": 0.2091, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 3.2300413956238915e-05, |
|
"loss": 0.1679, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"eval_cer": 0.11859288279464578, |
|
"eval_loss": 0.22618062794208527, |
|
"eval_runtime": 78.6862, |
|
"eval_samples_per_second": 19.101, |
|
"eval_steps_per_second": 2.389, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 3.17090479006505e-05, |
|
"loss": 0.1629, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 3.1117681845062095e-05, |
|
"loss": 0.1669, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 3.0526315789473684e-05, |
|
"loss": 0.1582, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 2.9934949733885274e-05, |
|
"loss": 0.1066, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 2.9343583678296864e-05, |
|
"loss": 0.08, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 2.875221762270846e-05, |
|
"loss": 0.0651, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 2.816085156712005e-05, |
|
"loss": 0.063, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 2.756948551153164e-05, |
|
"loss": 0.045, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 2.697811945594323e-05, |
|
"loss": 0.0691, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 2.638675340035482e-05, |
|
"loss": 0.0816, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"eval_cer": 0.06341821743388834, |
|
"eval_loss": 0.12735812366008759, |
|
"eval_runtime": 79.0538, |
|
"eval_samples_per_second": 19.012, |
|
"eval_steps_per_second": 2.378, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 2.5795387344766413e-05, |
|
"loss": 0.0732, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 2.5204021289178002e-05, |
|
"loss": 0.0486, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 2.4612655233589592e-05, |
|
"loss": 0.0618, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 2.4021289178001182e-05, |
|
"loss": 0.0494, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 2.3429923122412775e-05, |
|
"loss": 0.0557, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 2.2838557066824365e-05, |
|
"loss": 0.045, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 2.2247191011235958e-05, |
|
"loss": 0.0346, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 2.1655824955647548e-05, |
|
"loss": 0.0437, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 2.1064458900059137e-05, |
|
"loss": 0.0366, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 2.0473092844470727e-05, |
|
"loss": 0.0421, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"eval_cer": 0.0381162259222984, |
|
"eval_loss": 0.08168739080429077, |
|
"eval_runtime": 78.8974, |
|
"eval_samples_per_second": 19.05, |
|
"eval_steps_per_second": 2.383, |
|
"step": 5000 |
|
} |
|
], |
|
"max_steps": 8455, |
|
"num_train_epochs": 5, |
|
"total_flos": 7.242528907589714e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|