|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 7.0, |
|
"eval_steps": 2179, |
|
"global_step": 15253, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3e-05, |
|
"loss": 2.2464, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3e-05, |
|
"loss": 2.0544, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3e-05, |
|
"loss": 1.9968, |
|
"step": 1308 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3e-05, |
|
"loss": 1.9537, |
|
"step": 1744 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.6693020521036767, |
|
"eval_loss": 1.5623117685317993, |
|
"eval_runtime": 220.2436, |
|
"eval_samples_per_second": 33.622, |
|
"eval_steps_per_second": 2.102, |
|
"step": 2179 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_exact_match": 10.24983119513842, |
|
"eval_f1": 15.257097040945366, |
|
"eval_qa_bleu": 5.8382901471491575, |
|
"eval_qa_exact_match": 0.09736664415935178, |
|
"eval_recite_bleu": 17.0930417063702, |
|
"eval_recite_exact_match": 0.0, |
|
"step": 2179 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3e-05, |
|
"loss": 1.9029, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3e-05, |
|
"loss": 1.5215, |
|
"step": 2616 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 3e-05, |
|
"loss": 1.5077, |
|
"step": 3052 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 3e-05, |
|
"loss": 1.5016, |
|
"step": 3488 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 3e-05, |
|
"loss": 1.4734, |
|
"step": 3924 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.6923618412861704, |
|
"eval_loss": 1.2099343538284302, |
|
"eval_runtime": 222.1542, |
|
"eval_samples_per_second": 33.333, |
|
"eval_steps_per_second": 2.084, |
|
"step": 4358 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_exact_match": 10.61444969615125, |
|
"eval_f1": 15.935672519396675, |
|
"eval_qa_bleu": 6.524750946372503, |
|
"eval_qa_exact_match": 0.10209318028359217, |
|
"eval_recite_bleu": 18.618913529891735, |
|
"eval_recite_exact_match": 0.0, |
|
"step": 4358 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 3e-05, |
|
"loss": 1.4425, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 3e-05, |
|
"loss": 1.0827, |
|
"step": 4796 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 3e-05, |
|
"loss": 1.0876, |
|
"step": 5232 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 3e-05, |
|
"loss": 1.0887, |
|
"step": 5668 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 3e-05, |
|
"loss": 1.0665, |
|
"step": 6104 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.7147465927772421, |
|
"eval_loss": 0.9177509546279907, |
|
"eval_runtime": 226.6441, |
|
"eval_samples_per_second": 32.672, |
|
"eval_steps_per_second": 2.043, |
|
"step": 6537 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_exact_match": 12.397029034436192, |
|
"eval_f1": 17.721572805797255, |
|
"eval_qa_bleu": 8.139751517788653, |
|
"eval_qa_exact_match": 0.11978392977717758, |
|
"eval_recite_bleu": 21.769859861022653, |
|
"eval_recite_exact_match": 0.0, |
|
"step": 6537 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 3e-05, |
|
"loss": 0.7772, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 3e-05, |
|
"loss": 0.7688, |
|
"step": 6976 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 3e-05, |
|
"loss": 0.7598, |
|
"step": 7412 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 3e-05, |
|
"loss": 0.77, |
|
"step": 7848 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 3e-05, |
|
"loss": 0.7684, |
|
"step": 8284 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7331474586575765, |
|
"eval_loss": 0.6987847089767456, |
|
"eval_runtime": 221.7249, |
|
"eval_samples_per_second": 33.397, |
|
"eval_steps_per_second": 2.088, |
|
"step": 8716 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_exact_match": 13.409858203916272, |
|
"eval_f1": 19.326947132558665, |
|
"eval_qa_bleu": 8.79837393539171, |
|
"eval_qa_exact_match": 0.12869682646860228, |
|
"eval_recite_bleu": 23.848732661358156, |
|
"eval_recite_exact_match": 0.0, |
|
"step": 8716 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 3e-05, |
|
"loss": 0.7559, |
|
"step": 8720 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 3e-05, |
|
"loss": 0.5354, |
|
"step": 9156 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 3e-05, |
|
"loss": 0.5511, |
|
"step": 9592 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"learning_rate": 3e-05, |
|
"loss": 0.5496, |
|
"step": 10028 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 3e-05, |
|
"loss": 0.548, |
|
"step": 10464 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.7465929589970037, |
|
"eval_loss": 0.5567022562026978, |
|
"eval_runtime": 226.4909, |
|
"eval_samples_per_second": 32.694, |
|
"eval_steps_per_second": 2.044, |
|
"step": 10895 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_exact_match": 14.220121539500338, |
|
"eval_f1": 20.736856637999697, |
|
"eval_qa_bleu": 9.542509177303282, |
|
"eval_qa_exact_match": 0.13652937204591492, |
|
"eval_recite_bleu": 27.315073242003514, |
|
"eval_recite_exact_match": 0.0009453072248480756, |
|
"step": 10895 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 3e-05, |
|
"loss": 0.5518, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"learning_rate": 3e-05, |
|
"loss": 0.3837, |
|
"step": 11336 |
|
}, |
|
{ |
|
"epoch": 5.4, |
|
"learning_rate": 3e-05, |
|
"loss": 0.3963, |
|
"step": 11772 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"learning_rate": 3e-05, |
|
"loss": 0.404, |
|
"step": 12208 |
|
}, |
|
{ |
|
"epoch": 5.8, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4039, |
|
"step": 12644 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.7551407100982113, |
|
"eval_loss": 0.4728190004825592, |
|
"eval_runtime": 223.8973, |
|
"eval_samples_per_second": 33.073, |
|
"eval_steps_per_second": 2.068, |
|
"step": 13074 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_exact_match": 16.12424037812289, |
|
"eval_f1": 23.0965386152669, |
|
"eval_qa_bleu": 10.706681466541065, |
|
"eval_qa_exact_match": 0.1524645509790682, |
|
"eval_recite_bleu": 31.825156559426375, |
|
"eval_recite_exact_match": 0.0036461850101282916, |
|
"step": 13074 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 3e-05, |
|
"loss": 0.405, |
|
"step": 13080 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"learning_rate": 3e-05, |
|
"loss": 0.285, |
|
"step": 13516 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 3e-05, |
|
"loss": 0.2984, |
|
"step": 13952 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"learning_rate": 3e-05, |
|
"loss": 0.3051, |
|
"step": 14388 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"learning_rate": 3e-05, |
|
"loss": 0.3044, |
|
"step": 14824 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.7600033806411041, |
|
"eval_loss": 0.4375591278076172, |
|
"eval_runtime": 225.3993, |
|
"eval_samples_per_second": 32.853, |
|
"eval_steps_per_second": 2.054, |
|
"step": 15253 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_exact_match": 16.948008102633356, |
|
"eval_f1": 23.931422337180113, |
|
"eval_qa_bleu": 11.914541362683222, |
|
"eval_qa_exact_match": 0.16029709655638083, |
|
"eval_recite_bleu": 35.91072342079755, |
|
"eval_recite_exact_match": 0.010938555030384874, |
|
"step": 15253 |
|
} |
|
], |
|
"logging_steps": 436, |
|
"max_steps": 43580, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 7.053884690512896e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|