|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 7.0, |
|
"eval_steps": 1452, |
|
"global_step": 10164, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3e-05, |
|
"loss": 2.2721, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3e-05, |
|
"loss": 2.0539, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3e-05, |
|
"loss": 2.0079, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3e-05, |
|
"loss": 1.9686, |
|
"step": 1164 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.668149394347241, |
|
"eval_loss": 1.5662202835083008, |
|
"eval_runtime": 137.7572, |
|
"eval_samples_per_second": 36.296, |
|
"eval_steps_per_second": 2.272, |
|
"step": 1452 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_exact_match": 9.04, |
|
"eval_f1": 13.087046780017364, |
|
"eval_qa_bleu": 5.297766450359973, |
|
"eval_qa_exact_match": 0.085, |
|
"eval_recite_bleu": 14.716037077738477, |
|
"eval_recite_exact_match": 0.0, |
|
"step": 1452 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3e-05, |
|
"loss": 1.9076, |
|
"step": 1455 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3e-05, |
|
"loss": 1.5211, |
|
"step": 1746 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 3e-05, |
|
"loss": 1.5039, |
|
"step": 2037 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 3e-05, |
|
"loss": 1.4834, |
|
"step": 2328 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 3e-05, |
|
"loss": 1.4604, |
|
"step": 2619 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.6914514131897712, |
|
"eval_loss": 1.206884741783142, |
|
"eval_runtime": 139.7305, |
|
"eval_samples_per_second": 35.783, |
|
"eval_steps_per_second": 2.24, |
|
"step": 2904 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_exact_match": 11.64, |
|
"eval_f1": 16.917593370997537, |
|
"eval_qa_bleu": 7.627957176996819, |
|
"eval_qa_exact_match": 0.1106, |
|
"eval_recite_bleu": 18.356743056908567, |
|
"eval_recite_exact_match": 0.0, |
|
"step": 2904 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 3e-05, |
|
"loss": 1.4368, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 3e-05, |
|
"loss": 1.0705, |
|
"step": 3201 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 3e-05, |
|
"loss": 1.0685, |
|
"step": 3492 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 3e-05, |
|
"loss": 1.0554, |
|
"step": 3783 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 3e-05, |
|
"loss": 1.0522, |
|
"step": 4074 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.7148896366083446, |
|
"eval_loss": 0.9045500159263611, |
|
"eval_runtime": 141.6455, |
|
"eval_samples_per_second": 35.299, |
|
"eval_steps_per_second": 2.21, |
|
"step": 4356 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_exact_match": 12.84, |
|
"eval_f1": 18.555356874635468, |
|
"eval_qa_bleu": 8.395879921702353, |
|
"eval_qa_exact_match": 0.1188, |
|
"eval_recite_bleu": 20.824221134285214, |
|
"eval_recite_exact_match": 0.0, |
|
"step": 4356 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 3e-05, |
|
"loss": 1.0375, |
|
"step": 4365 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 3e-05, |
|
"loss": 0.7373, |
|
"step": 4656 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 3e-05, |
|
"loss": 0.7497, |
|
"step": 4947 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 3e-05, |
|
"loss": 0.7479, |
|
"step": 5238 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 3e-05, |
|
"loss": 0.7455, |
|
"step": 5529 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7340508748317631, |
|
"eval_loss": 0.6843858957290649, |
|
"eval_runtime": 143.5169, |
|
"eval_samples_per_second": 34.839, |
|
"eval_steps_per_second": 2.181, |
|
"step": 5808 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_exact_match": 13.44, |
|
"eval_f1": 19.418847592695993, |
|
"eval_qa_bleu": 8.731752840499793, |
|
"eval_qa_exact_match": 0.1256, |
|
"eval_recite_bleu": 24.11683390270685, |
|
"eval_recite_exact_match": 0.0002, |
|
"step": 5808 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 3e-05, |
|
"loss": 0.7226, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"learning_rate": 3e-05, |
|
"loss": 0.5117, |
|
"step": 6111 |
|
}, |
|
{ |
|
"epoch": 4.41, |
|
"learning_rate": 3e-05, |
|
"loss": 0.5208, |
|
"step": 6402 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"learning_rate": 3e-05, |
|
"loss": 0.5271, |
|
"step": 6693 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 3e-05, |
|
"loss": 0.5307, |
|
"step": 6984 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.7474745625841185, |
|
"eval_loss": 0.5420497059822083, |
|
"eval_runtime": 144.0475, |
|
"eval_samples_per_second": 34.711, |
|
"eval_steps_per_second": 2.173, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_exact_match": 15.08, |
|
"eval_f1": 21.4491845879152, |
|
"eval_qa_bleu": 10.89471390420015, |
|
"eval_qa_exact_match": 0.1434, |
|
"eval_recite_bleu": 27.62075386163555, |
|
"eval_recite_exact_match": 0.0014, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"learning_rate": 3e-05, |
|
"loss": 0.5165, |
|
"step": 7275 |
|
}, |
|
{ |
|
"epoch": 5.21, |
|
"learning_rate": 3e-05, |
|
"loss": 0.3624, |
|
"step": 7566 |
|
}, |
|
{ |
|
"epoch": 5.41, |
|
"learning_rate": 3e-05, |
|
"loss": 0.3748, |
|
"step": 7857 |
|
}, |
|
{ |
|
"epoch": 5.61, |
|
"learning_rate": 3e-05, |
|
"loss": 0.383, |
|
"step": 8148 |
|
}, |
|
{ |
|
"epoch": 5.81, |
|
"learning_rate": 3e-05, |
|
"loss": 0.3796, |
|
"step": 8439 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.7560010767160161, |
|
"eval_loss": 0.4608670473098755, |
|
"eval_runtime": 144.2314, |
|
"eval_samples_per_second": 34.667, |
|
"eval_steps_per_second": 2.17, |
|
"step": 8712 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_exact_match": 16.22, |
|
"eval_f1": 22.5212141898349, |
|
"eval_qa_bleu": 11.517856911949949, |
|
"eval_qa_exact_match": 0.1524, |
|
"eval_recite_bleu": 32.1802083615024, |
|
"eval_recite_exact_match": 0.0044, |
|
"step": 8712 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"learning_rate": 3e-05, |
|
"loss": 0.3803, |
|
"step": 8730 |
|
}, |
|
{ |
|
"epoch": 6.21, |
|
"learning_rate": 3e-05, |
|
"loss": 0.27, |
|
"step": 9021 |
|
}, |
|
{ |
|
"epoch": 6.41, |
|
"learning_rate": 3e-05, |
|
"loss": 0.28, |
|
"step": 9312 |
|
}, |
|
{ |
|
"epoch": 6.61, |
|
"learning_rate": 3e-05, |
|
"loss": 0.2889, |
|
"step": 9603 |
|
}, |
|
{ |
|
"epoch": 6.81, |
|
"learning_rate": 3e-05, |
|
"loss": 0.2912, |
|
"step": 9894 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.7603886944818304, |
|
"eval_loss": 0.43113213777542114, |
|
"eval_runtime": 144.1381, |
|
"eval_samples_per_second": 34.689, |
|
"eval_steps_per_second": 2.172, |
|
"step": 10164 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_exact_match": 17.64, |
|
"eval_f1": 24.23915265275915, |
|
"eval_qa_bleu": 12.404229060978194, |
|
"eval_qa_exact_match": 0.1648, |
|
"eval_recite_bleu": 37.23348279932964, |
|
"eval_recite_exact_match": 0.0102, |
|
"step": 10164 |
|
} |
|
], |
|
"logging_steps": 291, |
|
"max_steps": 29040, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 4.641681443905536e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|