ArOCR / last-checkpoint /trainer_state.json
gagan3012's picture
Training in progress, step 5000
6d942b4
raw
history blame
7.63 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.9568302779420463,
"global_step": 5000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.06,
"learning_rate": 4.945002956830278e-05,
"loss": 6.8243,
"step": 100
},
{
"epoch": 0.12,
"learning_rate": 4.885866351271437e-05,
"loss": 4.1701,
"step": 200
},
{
"epoch": 0.18,
"learning_rate": 4.8267297457125965e-05,
"loss": 3.6081,
"step": 300
},
{
"epoch": 0.24,
"learning_rate": 4.7675931401537554e-05,
"loss": 3.3137,
"step": 400
},
{
"epoch": 0.3,
"learning_rate": 4.7084565345949144e-05,
"loss": 3.0057,
"step": 500
},
{
"epoch": 0.35,
"learning_rate": 4.6493199290360734e-05,
"loss": 2.7865,
"step": 600
},
{
"epoch": 0.41,
"learning_rate": 4.590183323477233e-05,
"loss": 2.4937,
"step": 700
},
{
"epoch": 0.47,
"learning_rate": 4.531046717918392e-05,
"loss": 2.2845,
"step": 800
},
{
"epoch": 0.53,
"learning_rate": 4.471910112359551e-05,
"loss": 1.8935,
"step": 900
},
{
"epoch": 0.59,
"learning_rate": 4.41277350680071e-05,
"loss": 1.6164,
"step": 1000
},
{
"epoch": 0.59,
"eval_cer": 0.5792523669604962,
"eval_loss": 1.4108693599700928,
"eval_runtime": 78.7069,
"eval_samples_per_second": 19.096,
"eval_steps_per_second": 2.389,
"step": 1000
},
{
"epoch": 0.65,
"learning_rate": 4.353636901241869e-05,
"loss": 1.3669,
"step": 1100
},
{
"epoch": 0.71,
"learning_rate": 4.294500295683028e-05,
"loss": 1.1989,
"step": 1200
},
{
"epoch": 0.77,
"learning_rate": 4.235363690124187e-05,
"loss": 1.0658,
"step": 1300
},
{
"epoch": 0.83,
"learning_rate": 4.176227084565346e-05,
"loss": 0.8932,
"step": 1400
},
{
"epoch": 0.89,
"learning_rate": 4.117090479006505e-05,
"loss": 0.8178,
"step": 1500
},
{
"epoch": 0.95,
"learning_rate": 4.057953873447664e-05,
"loss": 0.6796,
"step": 1600
},
{
"epoch": 1.01,
"learning_rate": 3.9988172678888235e-05,
"loss": 0.6462,
"step": 1700
},
{
"epoch": 1.06,
"learning_rate": 3.9396806623299825e-05,
"loss": 0.4301,
"step": 1800
},
{
"epoch": 1.12,
"learning_rate": 3.8805440567711414e-05,
"loss": 0.4144,
"step": 1900
},
{
"epoch": 1.18,
"learning_rate": 3.8214074512123004e-05,
"loss": 0.3434,
"step": 2000
},
{
"epoch": 1.18,
"eval_cer": 0.2175971269996735,
"eval_loss": 0.3876412510871887,
"eval_runtime": 79.3343,
"eval_samples_per_second": 18.945,
"eval_steps_per_second": 2.37,
"step": 2000
},
{
"epoch": 1.24,
"learning_rate": 3.7622708456534594e-05,
"loss": 0.3263,
"step": 2100
},
{
"epoch": 1.3,
"learning_rate": 3.703134240094619e-05,
"loss": 0.2942,
"step": 2200
},
{
"epoch": 1.36,
"learning_rate": 3.643997634535778e-05,
"loss": 0.2743,
"step": 2300
},
{
"epoch": 1.42,
"learning_rate": 3.584861028976937e-05,
"loss": 0.2679,
"step": 2400
},
{
"epoch": 1.48,
"learning_rate": 3.525724423418096e-05,
"loss": 0.2553,
"step": 2500
},
{
"epoch": 1.54,
"learning_rate": 3.466587817859255e-05,
"loss": 0.2329,
"step": 2600
},
{
"epoch": 1.6,
"learning_rate": 3.4074512123004146e-05,
"loss": 0.171,
"step": 2700
},
{
"epoch": 1.66,
"learning_rate": 3.3483146067415736e-05,
"loss": 0.2264,
"step": 2800
},
{
"epoch": 1.71,
"learning_rate": 3.2891780011827325e-05,
"loss": 0.2091,
"step": 2900
},
{
"epoch": 1.77,
"learning_rate": 3.2300413956238915e-05,
"loss": 0.1679,
"step": 3000
},
{
"epoch": 1.77,
"eval_cer": 0.11859288279464578,
"eval_loss": 0.22618062794208527,
"eval_runtime": 78.6862,
"eval_samples_per_second": 19.101,
"eval_steps_per_second": 2.389,
"step": 3000
},
{
"epoch": 1.83,
"learning_rate": 3.17090479006505e-05,
"loss": 0.1629,
"step": 3100
},
{
"epoch": 1.89,
"learning_rate": 3.1117681845062095e-05,
"loss": 0.1669,
"step": 3200
},
{
"epoch": 1.95,
"learning_rate": 3.0526315789473684e-05,
"loss": 0.1582,
"step": 3300
},
{
"epoch": 2.01,
"learning_rate": 2.9934949733885274e-05,
"loss": 0.1066,
"step": 3400
},
{
"epoch": 2.07,
"learning_rate": 2.9343583678296864e-05,
"loss": 0.08,
"step": 3500
},
{
"epoch": 2.13,
"learning_rate": 2.875221762270846e-05,
"loss": 0.0651,
"step": 3600
},
{
"epoch": 2.19,
"learning_rate": 2.816085156712005e-05,
"loss": 0.063,
"step": 3700
},
{
"epoch": 2.25,
"learning_rate": 2.756948551153164e-05,
"loss": 0.045,
"step": 3800
},
{
"epoch": 2.31,
"learning_rate": 2.697811945594323e-05,
"loss": 0.0691,
"step": 3900
},
{
"epoch": 2.37,
"learning_rate": 2.638675340035482e-05,
"loss": 0.0816,
"step": 4000
},
{
"epoch": 2.37,
"eval_cer": 0.06341821743388834,
"eval_loss": 0.12735812366008759,
"eval_runtime": 79.0538,
"eval_samples_per_second": 19.012,
"eval_steps_per_second": 2.378,
"step": 4000
},
{
"epoch": 2.42,
"learning_rate": 2.5795387344766413e-05,
"loss": 0.0732,
"step": 4100
},
{
"epoch": 2.48,
"learning_rate": 2.5204021289178002e-05,
"loss": 0.0486,
"step": 4200
},
{
"epoch": 2.54,
"learning_rate": 2.4612655233589592e-05,
"loss": 0.0618,
"step": 4300
},
{
"epoch": 2.6,
"learning_rate": 2.4021289178001182e-05,
"loss": 0.0494,
"step": 4400
},
{
"epoch": 2.66,
"learning_rate": 2.3429923122412775e-05,
"loss": 0.0557,
"step": 4500
},
{
"epoch": 2.72,
"learning_rate": 2.2838557066824365e-05,
"loss": 0.045,
"step": 4600
},
{
"epoch": 2.78,
"learning_rate": 2.2247191011235958e-05,
"loss": 0.0346,
"step": 4700
},
{
"epoch": 2.84,
"learning_rate": 2.1655824955647548e-05,
"loss": 0.0437,
"step": 4800
},
{
"epoch": 2.9,
"learning_rate": 2.1064458900059137e-05,
"loss": 0.0366,
"step": 4900
},
{
"epoch": 2.96,
"learning_rate": 2.0473092844470727e-05,
"loss": 0.0421,
"step": 5000
},
{
"epoch": 2.96,
"eval_cer": 0.0381162259222984,
"eval_loss": 0.08168739080429077,
"eval_runtime": 78.8974,
"eval_samples_per_second": 19.05,
"eval_steps_per_second": 2.383,
"step": 5000
}
],
"max_steps": 8455,
"num_train_epochs": 5,
"total_flos": 7.242528907589714e+18,
"trial_name": null,
"trial_params": null
}