tyzhu's picture
Training in progress, epoch 7, checkpoint
af09793 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 7.0,
"eval_steps": 1452,
"global_step": 10164,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.2,
"learning_rate": 3e-05,
"loss": 2.2721,
"step": 291
},
{
"epoch": 0.4,
"learning_rate": 3e-05,
"loss": 2.0539,
"step": 582
},
{
"epoch": 0.6,
"learning_rate": 3e-05,
"loss": 2.0079,
"step": 873
},
{
"epoch": 0.8,
"learning_rate": 3e-05,
"loss": 1.9686,
"step": 1164
},
{
"epoch": 1.0,
"eval_accuracy": 0.668149394347241,
"eval_loss": 1.5662202835083008,
"eval_runtime": 137.7572,
"eval_samples_per_second": 36.296,
"eval_steps_per_second": 2.272,
"step": 1452
},
{
"epoch": 1.0,
"eval_exact_match": 9.04,
"eval_f1": 13.087046780017364,
"eval_qa_bleu": 5.297766450359973,
"eval_qa_exact_match": 0.085,
"eval_recite_bleu": 14.716037077738477,
"eval_recite_exact_match": 0.0,
"step": 1452
},
{
"epoch": 1.0,
"learning_rate": 3e-05,
"loss": 1.9076,
"step": 1455
},
{
"epoch": 1.2,
"learning_rate": 3e-05,
"loss": 1.5211,
"step": 1746
},
{
"epoch": 1.4,
"learning_rate": 3e-05,
"loss": 1.5039,
"step": 2037
},
{
"epoch": 1.6,
"learning_rate": 3e-05,
"loss": 1.4834,
"step": 2328
},
{
"epoch": 1.8,
"learning_rate": 3e-05,
"loss": 1.4604,
"step": 2619
},
{
"epoch": 2.0,
"eval_accuracy": 0.6914514131897712,
"eval_loss": 1.206884741783142,
"eval_runtime": 139.7305,
"eval_samples_per_second": 35.783,
"eval_steps_per_second": 2.24,
"step": 2904
},
{
"epoch": 2.0,
"eval_exact_match": 11.64,
"eval_f1": 16.917593370997537,
"eval_qa_bleu": 7.627957176996819,
"eval_qa_exact_match": 0.1106,
"eval_recite_bleu": 18.356743056908567,
"eval_recite_exact_match": 0.0,
"step": 2904
},
{
"epoch": 2.0,
"learning_rate": 3e-05,
"loss": 1.4368,
"step": 2910
},
{
"epoch": 2.2,
"learning_rate": 3e-05,
"loss": 1.0705,
"step": 3201
},
{
"epoch": 2.4,
"learning_rate": 3e-05,
"loss": 1.0685,
"step": 3492
},
{
"epoch": 2.61,
"learning_rate": 3e-05,
"loss": 1.0554,
"step": 3783
},
{
"epoch": 2.81,
"learning_rate": 3e-05,
"loss": 1.0522,
"step": 4074
},
{
"epoch": 3.0,
"eval_accuracy": 0.7148896366083446,
"eval_loss": 0.9045500159263611,
"eval_runtime": 141.6455,
"eval_samples_per_second": 35.299,
"eval_steps_per_second": 2.21,
"step": 4356
},
{
"epoch": 3.0,
"eval_exact_match": 12.84,
"eval_f1": 18.555356874635468,
"eval_qa_bleu": 8.395879921702353,
"eval_qa_exact_match": 0.1188,
"eval_recite_bleu": 20.824221134285214,
"eval_recite_exact_match": 0.0,
"step": 4356
},
{
"epoch": 3.01,
"learning_rate": 3e-05,
"loss": 1.0375,
"step": 4365
},
{
"epoch": 3.21,
"learning_rate": 3e-05,
"loss": 0.7373,
"step": 4656
},
{
"epoch": 3.41,
"learning_rate": 3e-05,
"loss": 0.7497,
"step": 4947
},
{
"epoch": 3.61,
"learning_rate": 3e-05,
"loss": 0.7479,
"step": 5238
},
{
"epoch": 3.81,
"learning_rate": 3e-05,
"loss": 0.7455,
"step": 5529
},
{
"epoch": 4.0,
"eval_accuracy": 0.7340508748317631,
"eval_loss": 0.6843858957290649,
"eval_runtime": 143.5169,
"eval_samples_per_second": 34.839,
"eval_steps_per_second": 2.181,
"step": 5808
},
{
"epoch": 4.0,
"eval_exact_match": 13.44,
"eval_f1": 19.418847592695993,
"eval_qa_bleu": 8.731752840499793,
"eval_qa_exact_match": 0.1256,
"eval_recite_bleu": 24.11683390270685,
"eval_recite_exact_match": 0.0002,
"step": 5808
},
{
"epoch": 4.01,
"learning_rate": 3e-05,
"loss": 0.7226,
"step": 5820
},
{
"epoch": 4.21,
"learning_rate": 3e-05,
"loss": 0.5117,
"step": 6111
},
{
"epoch": 4.41,
"learning_rate": 3e-05,
"loss": 0.5208,
"step": 6402
},
{
"epoch": 4.61,
"learning_rate": 3e-05,
"loss": 0.5271,
"step": 6693
},
{
"epoch": 4.81,
"learning_rate": 3e-05,
"loss": 0.5307,
"step": 6984
},
{
"epoch": 5.0,
"eval_accuracy": 0.7474745625841185,
"eval_loss": 0.5420497059822083,
"eval_runtime": 144.0475,
"eval_samples_per_second": 34.711,
"eval_steps_per_second": 2.173,
"step": 7260
},
{
"epoch": 5.0,
"eval_exact_match": 15.08,
"eval_f1": 21.4491845879152,
"eval_qa_bleu": 10.89471390420015,
"eval_qa_exact_match": 0.1434,
"eval_recite_bleu": 27.62075386163555,
"eval_recite_exact_match": 0.0014,
"step": 7260
},
{
"epoch": 5.01,
"learning_rate": 3e-05,
"loss": 0.5165,
"step": 7275
},
{
"epoch": 5.21,
"learning_rate": 3e-05,
"loss": 0.3624,
"step": 7566
},
{
"epoch": 5.41,
"learning_rate": 3e-05,
"loss": 0.3748,
"step": 7857
},
{
"epoch": 5.61,
"learning_rate": 3e-05,
"loss": 0.383,
"step": 8148
},
{
"epoch": 5.81,
"learning_rate": 3e-05,
"loss": 0.3796,
"step": 8439
},
{
"epoch": 6.0,
"eval_accuracy": 0.7560010767160161,
"eval_loss": 0.4608670473098755,
"eval_runtime": 144.2314,
"eval_samples_per_second": 34.667,
"eval_steps_per_second": 2.17,
"step": 8712
},
{
"epoch": 6.0,
"eval_exact_match": 16.22,
"eval_f1": 22.5212141898349,
"eval_qa_bleu": 11.517856911949949,
"eval_qa_exact_match": 0.1524,
"eval_recite_bleu": 32.1802083615024,
"eval_recite_exact_match": 0.0044,
"step": 8712
},
{
"epoch": 6.01,
"learning_rate": 3e-05,
"loss": 0.3803,
"step": 8730
},
{
"epoch": 6.21,
"learning_rate": 3e-05,
"loss": 0.27,
"step": 9021
},
{
"epoch": 6.41,
"learning_rate": 3e-05,
"loss": 0.28,
"step": 9312
},
{
"epoch": 6.61,
"learning_rate": 3e-05,
"loss": 0.2889,
"step": 9603
},
{
"epoch": 6.81,
"learning_rate": 3e-05,
"loss": 0.2912,
"step": 9894
},
{
"epoch": 7.0,
"eval_accuracy": 0.7603886944818304,
"eval_loss": 0.43113213777542114,
"eval_runtime": 144.1381,
"eval_samples_per_second": 34.689,
"eval_steps_per_second": 2.172,
"step": 10164
},
{
"epoch": 7.0,
"eval_exact_match": 17.64,
"eval_f1": 24.23915265275915,
"eval_qa_bleu": 12.404229060978194,
"eval_qa_exact_match": 0.1648,
"eval_recite_bleu": 37.23348279932964,
"eval_recite_exact_match": 0.0102,
"step": 10164
}
],
"logging_steps": 291,
"max_steps": 29040,
"num_train_epochs": 20,
"save_steps": 500,
"total_flos": 4.641681443905536e+17,
"trial_name": null,
"trial_params": null
}