|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 4.0, |
|
"eval_steps": 2179, |
|
"global_step": 8716, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3e-05, |
|
"loss": 2.2464, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3e-05, |
|
"loss": 2.0544, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3e-05, |
|
"loss": 1.9968, |
|
"step": 1308 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3e-05, |
|
"loss": 1.9537, |
|
"step": 1744 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.6693020521036767, |
|
"eval_loss": 1.5623117685317993, |
|
"eval_runtime": 220.2436, |
|
"eval_samples_per_second": 33.622, |
|
"eval_steps_per_second": 2.102, |
|
"step": 2179 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_exact_match": 10.24983119513842, |
|
"eval_f1": 15.257097040945366, |
|
"eval_qa_bleu": 5.8382901471491575, |
|
"eval_qa_exact_match": 0.09736664415935178, |
|
"eval_recite_bleu": 17.0930417063702, |
|
"eval_recite_exact_match": 0.0, |
|
"step": 2179 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3e-05, |
|
"loss": 1.9029, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3e-05, |
|
"loss": 1.5215, |
|
"step": 2616 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 3e-05, |
|
"loss": 1.5077, |
|
"step": 3052 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 3e-05, |
|
"loss": 1.5016, |
|
"step": 3488 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 3e-05, |
|
"loss": 1.4734, |
|
"step": 3924 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.6923618412861704, |
|
"eval_loss": 1.2099343538284302, |
|
"eval_runtime": 222.1542, |
|
"eval_samples_per_second": 33.333, |
|
"eval_steps_per_second": 2.084, |
|
"step": 4358 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_exact_match": 10.61444969615125, |
|
"eval_f1": 15.935672519396675, |
|
"eval_qa_bleu": 6.524750946372503, |
|
"eval_qa_exact_match": 0.10209318028359217, |
|
"eval_recite_bleu": 18.618913529891735, |
|
"eval_recite_exact_match": 0.0, |
|
"step": 4358 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 3e-05, |
|
"loss": 1.4425, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 3e-05, |
|
"loss": 1.0827, |
|
"step": 4796 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 3e-05, |
|
"loss": 1.0876, |
|
"step": 5232 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 3e-05, |
|
"loss": 1.0887, |
|
"step": 5668 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 3e-05, |
|
"loss": 1.0665, |
|
"step": 6104 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.7147465927772421, |
|
"eval_loss": 0.9177509546279907, |
|
"eval_runtime": 226.6441, |
|
"eval_samples_per_second": 32.672, |
|
"eval_steps_per_second": 2.043, |
|
"step": 6537 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_exact_match": 12.397029034436192, |
|
"eval_f1": 17.721572805797255, |
|
"eval_qa_bleu": 8.139751517788653, |
|
"eval_qa_exact_match": 0.11978392977717758, |
|
"eval_recite_bleu": 21.769859861022653, |
|
"eval_recite_exact_match": 0.0, |
|
"step": 6537 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 3e-05, |
|
"loss": 0.7772, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 3e-05, |
|
"loss": 0.7688, |
|
"step": 6976 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 3e-05, |
|
"loss": 0.7598, |
|
"step": 7412 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 3e-05, |
|
"loss": 0.77, |
|
"step": 7848 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 3e-05, |
|
"loss": 0.7684, |
|
"step": 8284 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7331474586575765, |
|
"eval_loss": 0.6987847089767456, |
|
"eval_runtime": 221.7249, |
|
"eval_samples_per_second": 33.397, |
|
"eval_steps_per_second": 2.088, |
|
"step": 8716 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_exact_match": 13.409858203916272, |
|
"eval_f1": 19.326947132558665, |
|
"eval_qa_bleu": 8.79837393539171, |
|
"eval_qa_exact_match": 0.12869682646860228, |
|
"eval_recite_bleu": 23.848732661358156, |
|
"eval_recite_exact_match": 0.0, |
|
"step": 8716 |
|
} |
|
], |
|
"logging_steps": 436, |
|
"max_steps": 43580, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 4.027988527859712e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|