|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 13.0, |
|
"eval_steps": 2179, |
|
"global_step": 28327, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3e-05, |
|
"loss": 2.2464, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3e-05, |
|
"loss": 2.0544, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3e-05, |
|
"loss": 1.9968, |
|
"step": 1308 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3e-05, |
|
"loss": 1.9537, |
|
"step": 1744 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.6693020521036767, |
|
"eval_loss": 1.5623117685317993, |
|
"eval_runtime": 220.2436, |
|
"eval_samples_per_second": 33.622, |
|
"eval_steps_per_second": 2.102, |
|
"step": 2179 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_exact_match": 10.24983119513842, |
|
"eval_f1": 15.257097040945366, |
|
"eval_qa_bleu": 5.8382901471491575, |
|
"eval_qa_exact_match": 0.09736664415935178, |
|
"eval_recite_bleu": 17.0930417063702, |
|
"eval_recite_exact_match": 0.0, |
|
"step": 2179 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3e-05, |
|
"loss": 1.9029, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3e-05, |
|
"loss": 1.5215, |
|
"step": 2616 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 3e-05, |
|
"loss": 1.5077, |
|
"step": 3052 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 3e-05, |
|
"loss": 1.5016, |
|
"step": 3488 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 3e-05, |
|
"loss": 1.4734, |
|
"step": 3924 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.6923618412861704, |
|
"eval_loss": 1.2099343538284302, |
|
"eval_runtime": 222.1542, |
|
"eval_samples_per_second": 33.333, |
|
"eval_steps_per_second": 2.084, |
|
"step": 4358 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_exact_match": 10.61444969615125, |
|
"eval_f1": 15.935672519396675, |
|
"eval_qa_bleu": 6.524750946372503, |
|
"eval_qa_exact_match": 0.10209318028359217, |
|
"eval_recite_bleu": 18.618913529891735, |
|
"eval_recite_exact_match": 0.0, |
|
"step": 4358 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 3e-05, |
|
"loss": 1.4425, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 3e-05, |
|
"loss": 1.0827, |
|
"step": 4796 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 3e-05, |
|
"loss": 1.0876, |
|
"step": 5232 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 3e-05, |
|
"loss": 1.0887, |
|
"step": 5668 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 3e-05, |
|
"loss": 1.0665, |
|
"step": 6104 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.7147465927772421, |
|
"eval_loss": 0.9177509546279907, |
|
"eval_runtime": 226.6441, |
|
"eval_samples_per_second": 32.672, |
|
"eval_steps_per_second": 2.043, |
|
"step": 6537 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_exact_match": 12.397029034436192, |
|
"eval_f1": 17.721572805797255, |
|
"eval_qa_bleu": 8.139751517788653, |
|
"eval_qa_exact_match": 0.11978392977717758, |
|
"eval_recite_bleu": 21.769859861022653, |
|
"eval_recite_exact_match": 0.0, |
|
"step": 6537 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 3e-05, |
|
"loss": 0.7772, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 3e-05, |
|
"loss": 0.7688, |
|
"step": 6976 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 3e-05, |
|
"loss": 0.7598, |
|
"step": 7412 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 3e-05, |
|
"loss": 0.77, |
|
"step": 7848 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 3e-05, |
|
"loss": 0.7684, |
|
"step": 8284 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7331474586575765, |
|
"eval_loss": 0.6987847089767456, |
|
"eval_runtime": 221.7249, |
|
"eval_samples_per_second": 33.397, |
|
"eval_steps_per_second": 2.088, |
|
"step": 8716 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_exact_match": 13.409858203916272, |
|
"eval_f1": 19.326947132558665, |
|
"eval_qa_bleu": 8.79837393539171, |
|
"eval_qa_exact_match": 0.12869682646860228, |
|
"eval_recite_bleu": 23.848732661358156, |
|
"eval_recite_exact_match": 0.0, |
|
"step": 8716 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 3e-05, |
|
"loss": 0.7559, |
|
"step": 8720 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 3e-05, |
|
"loss": 0.5354, |
|
"step": 9156 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 3e-05, |
|
"loss": 0.5511, |
|
"step": 9592 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"learning_rate": 3e-05, |
|
"loss": 0.5496, |
|
"step": 10028 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 3e-05, |
|
"loss": 0.548, |
|
"step": 10464 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.7465929589970037, |
|
"eval_loss": 0.5567022562026978, |
|
"eval_runtime": 226.4909, |
|
"eval_samples_per_second": 32.694, |
|
"eval_steps_per_second": 2.044, |
|
"step": 10895 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_exact_match": 14.220121539500338, |
|
"eval_f1": 20.736856637999697, |
|
"eval_qa_bleu": 9.542509177303282, |
|
"eval_qa_exact_match": 0.13652937204591492, |
|
"eval_recite_bleu": 27.315073242003514, |
|
"eval_recite_exact_match": 0.0009453072248480756, |
|
"step": 10895 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 3e-05, |
|
"loss": 0.5518, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"learning_rate": 3e-05, |
|
"loss": 0.3837, |
|
"step": 11336 |
|
}, |
|
{ |
|
"epoch": 5.4, |
|
"learning_rate": 3e-05, |
|
"loss": 0.3963, |
|
"step": 11772 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"learning_rate": 3e-05, |
|
"loss": 0.404, |
|
"step": 12208 |
|
}, |
|
{ |
|
"epoch": 5.8, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4039, |
|
"step": 12644 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.7551407100982113, |
|
"eval_loss": 0.4728190004825592, |
|
"eval_runtime": 223.8973, |
|
"eval_samples_per_second": 33.073, |
|
"eval_steps_per_second": 2.068, |
|
"step": 13074 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_exact_match": 16.12424037812289, |
|
"eval_f1": 23.0965386152669, |
|
"eval_qa_bleu": 10.706681466541065, |
|
"eval_qa_exact_match": 0.1524645509790682, |
|
"eval_recite_bleu": 31.825156559426375, |
|
"eval_recite_exact_match": 0.0036461850101282916, |
|
"step": 13074 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 3e-05, |
|
"loss": 0.405, |
|
"step": 13080 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"learning_rate": 3e-05, |
|
"loss": 0.285, |
|
"step": 13516 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 3e-05, |
|
"loss": 0.2984, |
|
"step": 13952 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"learning_rate": 3e-05, |
|
"loss": 0.3051, |
|
"step": 14388 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"learning_rate": 3e-05, |
|
"loss": 0.3044, |
|
"step": 14824 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.7600033806411041, |
|
"eval_loss": 0.4375591278076172, |
|
"eval_runtime": 225.3993, |
|
"eval_samples_per_second": 32.853, |
|
"eval_steps_per_second": 2.054, |
|
"step": 15253 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_exact_match": 16.948008102633356, |
|
"eval_f1": 23.931422337180113, |
|
"eval_qa_bleu": 11.914541362683222, |
|
"eval_qa_exact_match": 0.16029709655638083, |
|
"eval_recite_bleu": 35.91072342079755, |
|
"eval_recite_exact_match": 0.010938555030384874, |
|
"step": 15253 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 3e-05, |
|
"loss": 0.2046, |
|
"step": 15260 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"learning_rate": 3e-05, |
|
"loss": 0.2265, |
|
"step": 15696 |
|
}, |
|
{ |
|
"epoch": 7.4, |
|
"learning_rate": 3e-05, |
|
"loss": 0.236, |
|
"step": 16132 |
|
}, |
|
{ |
|
"epoch": 7.6, |
|
"learning_rate": 3e-05, |
|
"loss": 0.2399, |
|
"step": 16568 |
|
}, |
|
{ |
|
"epoch": 7.8, |
|
"learning_rate": 3e-05, |
|
"loss": 0.2446, |
|
"step": 17004 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.7628389388058522, |
|
"eval_loss": 0.42204150557518005, |
|
"eval_runtime": 223.5169, |
|
"eval_samples_per_second": 33.129, |
|
"eval_steps_per_second": 2.071, |
|
"step": 17432 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_exact_match": 18.433490884537473, |
|
"eval_f1": 25.859990277646197, |
|
"eval_qa_bleu": 13.293068057422849, |
|
"eval_qa_exact_match": 0.17609723160027008, |
|
"eval_recite_bleu": 41.25950959223269, |
|
"eval_recite_exact_match": 0.015530047265361242, |
|
"step": 17432 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 3e-05, |
|
"loss": 0.2466, |
|
"step": 17440 |
|
}, |
|
{ |
|
"epoch": 8.2, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1887, |
|
"step": 17876 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1967, |
|
"step": 18312 |
|
}, |
|
{ |
|
"epoch": 8.6, |
|
"learning_rate": 3e-05, |
|
"loss": 0.2026, |
|
"step": 18748 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"learning_rate": 3e-05, |
|
"loss": 0.2039, |
|
"step": 19184 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.7641864696201232, |
|
"eval_loss": 0.41897761821746826, |
|
"eval_runtime": 222.9387, |
|
"eval_samples_per_second": 33.215, |
|
"eval_steps_per_second": 2.077, |
|
"step": 19611 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_exact_match": 18.933153274814316, |
|
"eval_f1": 26.522905186169627, |
|
"eval_qa_bleu": 12.546175820192783, |
|
"eval_qa_exact_match": 0.1797434166103984, |
|
"eval_recite_bleu": 44.27793915167843, |
|
"eval_recite_exact_match": 0.02228224172856178, |
|
"step": 19611 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 3e-05, |
|
"loss": 0.2067, |
|
"step": 19620 |
|
}, |
|
{ |
|
"epoch": 9.2, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1667, |
|
"step": 20056 |
|
}, |
|
{ |
|
"epoch": 9.4, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1709, |
|
"step": 20492 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1774, |
|
"step": 20928 |
|
}, |
|
{ |
|
"epoch": 9.8, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1787, |
|
"step": 21364 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.7648945867029934, |
|
"eval_loss": 0.4250437319278717, |
|
"eval_runtime": 225.6304, |
|
"eval_samples_per_second": 32.819, |
|
"eval_steps_per_second": 2.052, |
|
"step": 21790 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_exact_match": 20.054017555705606, |
|
"eval_f1": 27.607870062960497, |
|
"eval_qa_bleu": 14.597537360428676, |
|
"eval_qa_exact_match": 0.1913571910871033, |
|
"eval_recite_bleu": 46.3428836838782, |
|
"eval_recite_exact_match": 0.029169480081026333, |
|
"step": 21790 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1829, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 10.2, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1498, |
|
"step": 22236 |
|
}, |
|
{ |
|
"epoch": 10.4, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1553, |
|
"step": 22672 |
|
}, |
|
{ |
|
"epoch": 10.6, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1612, |
|
"step": 23108 |
|
}, |
|
{ |
|
"epoch": 10.8, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1652, |
|
"step": 23544 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.7653962302216591, |
|
"eval_loss": 0.42946067452430725, |
|
"eval_runtime": 227.2445, |
|
"eval_samples_per_second": 32.586, |
|
"eval_steps_per_second": 2.037, |
|
"step": 23969 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_exact_match": 19.581363943281566, |
|
"eval_f1": 27.25909178622117, |
|
"eval_qa_bleu": 14.227446461856255, |
|
"eval_qa_exact_match": 0.18595543551654287, |
|
"eval_recite_bleu": 46.797968452871594, |
|
"eval_recite_exact_match": 0.03430114787305875, |
|
"step": 23969 |
|
}, |
|
{ |
|
"epoch": 11.01, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1439, |
|
"step": 23980 |
|
}, |
|
{ |
|
"epoch": 11.21, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1402, |
|
"step": 24416 |
|
}, |
|
{ |
|
"epoch": 11.41, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1462, |
|
"step": 24852 |
|
}, |
|
{ |
|
"epoch": 11.61, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1492, |
|
"step": 25288 |
|
}, |
|
{ |
|
"epoch": 11.81, |
|
"learning_rate": 3e-05, |
|
"loss": 0.154, |
|
"step": 25724 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.7654645700633325, |
|
"eval_loss": 0.43657177686691284, |
|
"eval_runtime": 220.7713, |
|
"eval_samples_per_second": 33.541, |
|
"eval_steps_per_second": 2.097, |
|
"step": 26148 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_exact_match": 19.945982444294394, |
|
"eval_f1": 27.398959656999555, |
|
"eval_qa_bleu": 13.50737485018698, |
|
"eval_qa_exact_match": 0.1886563133018231, |
|
"eval_recite_bleu": 47.72099251148126, |
|
"eval_recite_exact_match": 0.036326806212018906, |
|
"step": 26148 |
|
}, |
|
{ |
|
"epoch": 12.01, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1569, |
|
"step": 26160 |
|
}, |
|
{ |
|
"epoch": 12.21, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1344, |
|
"step": 26596 |
|
}, |
|
{ |
|
"epoch": 12.41, |
|
"learning_rate": 3e-05, |
|
"loss": 0.14, |
|
"step": 27032 |
|
}, |
|
{ |
|
"epoch": 12.61, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1427, |
|
"step": 27468 |
|
}, |
|
{ |
|
"epoch": 12.81, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1441, |
|
"step": 27904 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.7656634099218181, |
|
"eval_loss": 0.44285184144973755, |
|
"eval_runtime": 222.075, |
|
"eval_samples_per_second": 33.345, |
|
"eval_steps_per_second": 2.085, |
|
"step": 28327 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_exact_match": 20.405131667792034, |
|
"eval_f1": 28.457441663021587, |
|
"eval_qa_bleu": 14.596564409389117, |
|
"eval_qa_exact_match": 0.19446320054017555, |
|
"eval_recite_bleu": 48.14075990339527, |
|
"eval_recite_exact_match": 0.03902768399729912, |
|
"step": 28327 |
|
} |
|
], |
|
"logging_steps": 436, |
|
"max_steps": 43580, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 1.3113812200077312e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|