|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 1452, |
|
"global_step": 29040, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3e-05, |
|
"loss": 2.2721, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3e-05, |
|
"loss": 2.0539, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3e-05, |
|
"loss": 2.0079, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3e-05, |
|
"loss": 1.9686, |
|
"step": 1164 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.668149394347241, |
|
"eval_loss": 1.5662202835083008, |
|
"eval_runtime": 137.7572, |
|
"eval_samples_per_second": 36.296, |
|
"eval_steps_per_second": 2.272, |
|
"step": 1452 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_exact_match": 9.04, |
|
"eval_f1": 13.087046780017364, |
|
"eval_qa_bleu": 5.297766450359973, |
|
"eval_qa_exact_match": 0.085, |
|
"eval_recite_bleu": 14.716037077738477, |
|
"eval_recite_exact_match": 0.0, |
|
"step": 1452 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3e-05, |
|
"loss": 1.9076, |
|
"step": 1455 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3e-05, |
|
"loss": 1.5211, |
|
"step": 1746 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 3e-05, |
|
"loss": 1.5039, |
|
"step": 2037 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 3e-05, |
|
"loss": 1.4834, |
|
"step": 2328 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 3e-05, |
|
"loss": 1.4604, |
|
"step": 2619 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.6914514131897712, |
|
"eval_loss": 1.206884741783142, |
|
"eval_runtime": 139.7305, |
|
"eval_samples_per_second": 35.783, |
|
"eval_steps_per_second": 2.24, |
|
"step": 2904 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_exact_match": 11.64, |
|
"eval_f1": 16.917593370997537, |
|
"eval_qa_bleu": 7.627957176996819, |
|
"eval_qa_exact_match": 0.1106, |
|
"eval_recite_bleu": 18.356743056908567, |
|
"eval_recite_exact_match": 0.0, |
|
"step": 2904 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 3e-05, |
|
"loss": 1.4368, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 3e-05, |
|
"loss": 1.0705, |
|
"step": 3201 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 3e-05, |
|
"loss": 1.0685, |
|
"step": 3492 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 3e-05, |
|
"loss": 1.0554, |
|
"step": 3783 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 3e-05, |
|
"loss": 1.0522, |
|
"step": 4074 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.7148896366083446, |
|
"eval_loss": 0.9045500159263611, |
|
"eval_runtime": 141.6455, |
|
"eval_samples_per_second": 35.299, |
|
"eval_steps_per_second": 2.21, |
|
"step": 4356 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_exact_match": 12.84, |
|
"eval_f1": 18.555356874635468, |
|
"eval_qa_bleu": 8.395879921702353, |
|
"eval_qa_exact_match": 0.1188, |
|
"eval_recite_bleu": 20.824221134285214, |
|
"eval_recite_exact_match": 0.0, |
|
"step": 4356 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 3e-05, |
|
"loss": 1.0375, |
|
"step": 4365 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 3e-05, |
|
"loss": 0.7373, |
|
"step": 4656 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 3e-05, |
|
"loss": 0.7497, |
|
"step": 4947 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 3e-05, |
|
"loss": 0.7479, |
|
"step": 5238 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 3e-05, |
|
"loss": 0.7455, |
|
"step": 5529 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7340508748317631, |
|
"eval_loss": 0.6843858957290649, |
|
"eval_runtime": 143.5169, |
|
"eval_samples_per_second": 34.839, |
|
"eval_steps_per_second": 2.181, |
|
"step": 5808 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_exact_match": 13.44, |
|
"eval_f1": 19.418847592695993, |
|
"eval_qa_bleu": 8.731752840499793, |
|
"eval_qa_exact_match": 0.1256, |
|
"eval_recite_bleu": 24.11683390270685, |
|
"eval_recite_exact_match": 0.0002, |
|
"step": 5808 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 3e-05, |
|
"loss": 0.7226, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"learning_rate": 3e-05, |
|
"loss": 0.5117, |
|
"step": 6111 |
|
}, |
|
{ |
|
"epoch": 4.41, |
|
"learning_rate": 3e-05, |
|
"loss": 0.5208, |
|
"step": 6402 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"learning_rate": 3e-05, |
|
"loss": 0.5271, |
|
"step": 6693 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 3e-05, |
|
"loss": 0.5307, |
|
"step": 6984 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.7474745625841185, |
|
"eval_loss": 0.5420497059822083, |
|
"eval_runtime": 144.0475, |
|
"eval_samples_per_second": 34.711, |
|
"eval_steps_per_second": 2.173, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_exact_match": 15.08, |
|
"eval_f1": 21.4491845879152, |
|
"eval_qa_bleu": 10.89471390420015, |
|
"eval_qa_exact_match": 0.1434, |
|
"eval_recite_bleu": 27.62075386163555, |
|
"eval_recite_exact_match": 0.0014, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"learning_rate": 3e-05, |
|
"loss": 0.5165, |
|
"step": 7275 |
|
}, |
|
{ |
|
"epoch": 5.21, |
|
"learning_rate": 3e-05, |
|
"loss": 0.3624, |
|
"step": 7566 |
|
}, |
|
{ |
|
"epoch": 5.41, |
|
"learning_rate": 3e-05, |
|
"loss": 0.3748, |
|
"step": 7857 |
|
}, |
|
{ |
|
"epoch": 5.61, |
|
"learning_rate": 3e-05, |
|
"loss": 0.383, |
|
"step": 8148 |
|
}, |
|
{ |
|
"epoch": 5.81, |
|
"learning_rate": 3e-05, |
|
"loss": 0.3796, |
|
"step": 8439 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.7560010767160161, |
|
"eval_loss": 0.4608670473098755, |
|
"eval_runtime": 144.2314, |
|
"eval_samples_per_second": 34.667, |
|
"eval_steps_per_second": 2.17, |
|
"step": 8712 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_exact_match": 16.22, |
|
"eval_f1": 22.5212141898349, |
|
"eval_qa_bleu": 11.517856911949949, |
|
"eval_qa_exact_match": 0.1524, |
|
"eval_recite_bleu": 32.1802083615024, |
|
"eval_recite_exact_match": 0.0044, |
|
"step": 8712 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"learning_rate": 3e-05, |
|
"loss": 0.3803, |
|
"step": 8730 |
|
}, |
|
{ |
|
"epoch": 6.21, |
|
"learning_rate": 3e-05, |
|
"loss": 0.27, |
|
"step": 9021 |
|
}, |
|
{ |
|
"epoch": 6.41, |
|
"learning_rate": 3e-05, |
|
"loss": 0.28, |
|
"step": 9312 |
|
}, |
|
{ |
|
"epoch": 6.61, |
|
"learning_rate": 3e-05, |
|
"loss": 0.2889, |
|
"step": 9603 |
|
}, |
|
{ |
|
"epoch": 6.81, |
|
"learning_rate": 3e-05, |
|
"loss": 0.2912, |
|
"step": 9894 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.7603886944818304, |
|
"eval_loss": 0.43113213777542114, |
|
"eval_runtime": 144.1381, |
|
"eval_samples_per_second": 34.689, |
|
"eval_steps_per_second": 2.172, |
|
"step": 10164 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_exact_match": 17.64, |
|
"eval_f1": 24.23915265275915, |
|
"eval_qa_bleu": 12.404229060978194, |
|
"eval_qa_exact_match": 0.1648, |
|
"eval_recite_bleu": 37.23348279932964, |
|
"eval_recite_exact_match": 0.0102, |
|
"step": 10164 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"learning_rate": 3e-05, |
|
"loss": 0.2835, |
|
"step": 10185 |
|
}, |
|
{ |
|
"epoch": 7.21, |
|
"learning_rate": 3e-05, |
|
"loss": 0.2131, |
|
"step": 10476 |
|
}, |
|
{ |
|
"epoch": 7.42, |
|
"learning_rate": 3e-05, |
|
"loss": 0.2213, |
|
"step": 10767 |
|
}, |
|
{ |
|
"epoch": 7.62, |
|
"learning_rate": 3e-05, |
|
"loss": 0.226, |
|
"step": 11058 |
|
}, |
|
{ |
|
"epoch": 7.82, |
|
"learning_rate": 3e-05, |
|
"loss": 0.2282, |
|
"step": 11349 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.7626543741588157, |
|
"eval_loss": 0.41835975646972656, |
|
"eval_runtime": 144.0991, |
|
"eval_samples_per_second": 34.698, |
|
"eval_steps_per_second": 2.172, |
|
"step": 11616 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_exact_match": 17.78, |
|
"eval_f1": 25.32648324184451, |
|
"eval_qa_bleu": 12.460459536013193, |
|
"eval_qa_exact_match": 0.1676, |
|
"eval_recite_bleu": 40.14518456576004, |
|
"eval_recite_exact_match": 0.0168, |
|
"step": 11616 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1645, |
|
"step": 11640 |
|
}, |
|
{ |
|
"epoch": 8.22, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1768, |
|
"step": 11931 |
|
}, |
|
{ |
|
"epoch": 8.42, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1851, |
|
"step": 12222 |
|
}, |
|
{ |
|
"epoch": 8.62, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1895, |
|
"step": 12513 |
|
}, |
|
{ |
|
"epoch": 8.82, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1905, |
|
"step": 12804 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.7639776581426648, |
|
"eval_loss": 0.4136393666267395, |
|
"eval_runtime": 146.8079, |
|
"eval_samples_per_second": 34.058, |
|
"eval_steps_per_second": 2.132, |
|
"step": 13068 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_exact_match": 18.58, |
|
"eval_f1": 26.144983963782675, |
|
"eval_qa_bleu": 12.723130940050861, |
|
"eval_qa_exact_match": 0.1766, |
|
"eval_recite_bleu": 44.36820281112791, |
|
"eval_recite_exact_match": 0.0212, |
|
"step": 13068 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1919, |
|
"step": 13095 |
|
}, |
|
{ |
|
"epoch": 9.22, |
|
"learning_rate": 3e-05, |
|
"loss": 0.156, |
|
"step": 13386 |
|
}, |
|
{ |
|
"epoch": 9.42, |
|
"learning_rate": 3e-05, |
|
"loss": 0.158, |
|
"step": 13677 |
|
}, |
|
{ |
|
"epoch": 9.62, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1656, |
|
"step": 13968 |
|
}, |
|
{ |
|
"epoch": 9.82, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1687, |
|
"step": 14259 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.7647776581426649, |
|
"eval_loss": 0.41753506660461426, |
|
"eval_runtime": 149.0935, |
|
"eval_samples_per_second": 33.536, |
|
"eval_steps_per_second": 2.099, |
|
"step": 14520 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_exact_match": 19.8, |
|
"eval_f1": 27.235079497903637, |
|
"eval_qa_bleu": 13.874335090146856, |
|
"eval_qa_exact_match": 0.1886, |
|
"eval_recite_bleu": 46.53984717815193, |
|
"eval_recite_exact_match": 0.0276, |
|
"step": 14520 |
|
}, |
|
{ |
|
"epoch": 10.02, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1684, |
|
"step": 14550 |
|
}, |
|
{ |
|
"epoch": 10.22, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1396, |
|
"step": 14841 |
|
}, |
|
{ |
|
"epoch": 10.42, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1457, |
|
"step": 15132 |
|
}, |
|
{ |
|
"epoch": 10.62, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1542, |
|
"step": 15423 |
|
}, |
|
{ |
|
"epoch": 10.82, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1553, |
|
"step": 15714 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.7651154777927321, |
|
"eval_loss": 0.4211733937263489, |
|
"eval_runtime": 150.5664, |
|
"eval_samples_per_second": 33.208, |
|
"eval_steps_per_second": 2.079, |
|
"step": 15972 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_exact_match": 19.1, |
|
"eval_f1": 26.696266939458862, |
|
"eval_qa_bleu": 14.962573389382232, |
|
"eval_qa_exact_match": 0.1814, |
|
"eval_recite_bleu": 46.889182609591046, |
|
"eval_recite_exact_match": 0.032, |
|
"step": 15972 |
|
}, |
|
{ |
|
"epoch": 11.02, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1549, |
|
"step": 16005 |
|
}, |
|
{ |
|
"epoch": 11.22, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1335, |
|
"step": 16296 |
|
}, |
|
{ |
|
"epoch": 11.42, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1381, |
|
"step": 16587 |
|
}, |
|
{ |
|
"epoch": 11.62, |
|
"learning_rate": 3e-05, |
|
"loss": 0.143, |
|
"step": 16878 |
|
}, |
|
{ |
|
"epoch": 11.82, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1447, |
|
"step": 17169 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.7653222072678331, |
|
"eval_loss": 0.4283375144004822, |
|
"eval_runtime": 152.1173, |
|
"eval_samples_per_second": 32.869, |
|
"eval_steps_per_second": 2.058, |
|
"step": 17424 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_exact_match": 19.7, |
|
"eval_f1": 27.086380949947664, |
|
"eval_qa_bleu": 13.161442853987554, |
|
"eval_qa_exact_match": 0.1884, |
|
"eval_recite_bleu": 48.94647260503517, |
|
"eval_recite_exact_match": 0.0318, |
|
"step": 17424 |
|
}, |
|
{ |
|
"epoch": 12.02, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1449, |
|
"step": 17460 |
|
}, |
|
{ |
|
"epoch": 12.23, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1267, |
|
"step": 17751 |
|
}, |
|
{ |
|
"epoch": 12.43, |
|
"learning_rate": 3e-05, |
|
"loss": 0.131, |
|
"step": 18042 |
|
}, |
|
{ |
|
"epoch": 12.63, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1366, |
|
"step": 18333 |
|
}, |
|
{ |
|
"epoch": 12.83, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1388, |
|
"step": 18624 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.7655897711978465, |
|
"eval_loss": 0.4287155270576477, |
|
"eval_runtime": 152.6887, |
|
"eval_samples_per_second": 32.746, |
|
"eval_steps_per_second": 2.05, |
|
"step": 18876 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_exact_match": 20.12, |
|
"eval_f1": 27.673234730738653, |
|
"eval_qa_bleu": 14.175044837001105, |
|
"eval_qa_exact_match": 0.191, |
|
"eval_recite_bleu": 49.79523539787241, |
|
"eval_recite_exact_match": 0.0446, |
|
"step": 18876 |
|
}, |
|
{ |
|
"epoch": 13.03, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1393, |
|
"step": 18915 |
|
}, |
|
{ |
|
"epoch": 13.23, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1243, |
|
"step": 19206 |
|
}, |
|
{ |
|
"epoch": 13.43, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1277, |
|
"step": 19497 |
|
}, |
|
{ |
|
"epoch": 13.63, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1324, |
|
"step": 19788 |
|
}, |
|
{ |
|
"epoch": 13.83, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1329, |
|
"step": 20079 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.7656621803499327, |
|
"eval_loss": 0.4349338412284851, |
|
"eval_runtime": 153.0165, |
|
"eval_samples_per_second": 32.676, |
|
"eval_steps_per_second": 2.046, |
|
"step": 20328 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_exact_match": 19.74, |
|
"eval_f1": 27.539494437309155, |
|
"eval_qa_bleu": 14.653456371114471, |
|
"eval_qa_exact_match": 0.1874, |
|
"eval_recite_bleu": 49.667204132453634, |
|
"eval_recite_exact_match": 0.0442, |
|
"step": 20328 |
|
}, |
|
{ |
|
"epoch": 14.03, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1324, |
|
"step": 20370 |
|
}, |
|
{ |
|
"epoch": 14.23, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1191, |
|
"step": 20661 |
|
}, |
|
{ |
|
"epoch": 14.43, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1228, |
|
"step": 20952 |
|
}, |
|
{ |
|
"epoch": 14.63, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1265, |
|
"step": 21243 |
|
}, |
|
{ |
|
"epoch": 14.83, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1292, |
|
"step": 21534 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.7657300134589502, |
|
"eval_loss": 0.43529966473579407, |
|
"eval_runtime": 152.7159, |
|
"eval_samples_per_second": 32.741, |
|
"eval_steps_per_second": 2.05, |
|
"step": 21780 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_exact_match": 20.14, |
|
"eval_f1": 28.059788808027182, |
|
"eval_qa_bleu": 14.397818558683056, |
|
"eval_qa_exact_match": 0.1928, |
|
"eval_recite_bleu": 49.57947130892122, |
|
"eval_recite_exact_match": 0.0444, |
|
"step": 21780 |
|
}, |
|
{ |
|
"epoch": 15.03, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1313, |
|
"step": 21825 |
|
}, |
|
{ |
|
"epoch": 15.23, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1185, |
|
"step": 22116 |
|
}, |
|
{ |
|
"epoch": 15.43, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1199, |
|
"step": 22407 |
|
}, |
|
{ |
|
"epoch": 15.63, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1226, |
|
"step": 22698 |
|
}, |
|
{ |
|
"epoch": 15.83, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1267, |
|
"step": 22989 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.7659200538358009, |
|
"eval_loss": 0.438266783952713, |
|
"eval_runtime": 153.2915, |
|
"eval_samples_per_second": 32.618, |
|
"eval_steps_per_second": 2.042, |
|
"step": 23232 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_exact_match": 20.64, |
|
"eval_f1": 28.191361890575656, |
|
"eval_qa_bleu": 14.695055084130074, |
|
"eval_qa_exact_match": 0.1918, |
|
"eval_recite_bleu": 50.268257039645256, |
|
"eval_recite_exact_match": 0.0478, |
|
"step": 23232 |
|
}, |
|
{ |
|
"epoch": 16.03, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1133, |
|
"step": 23280 |
|
}, |
|
{ |
|
"epoch": 16.23, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1145, |
|
"step": 23571 |
|
}, |
|
{ |
|
"epoch": 16.43, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1174, |
|
"step": 23862 |
|
}, |
|
{ |
|
"epoch": 16.63, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1218, |
|
"step": 24153 |
|
}, |
|
{ |
|
"epoch": 16.83, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1251, |
|
"step": 24444 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.7660632570659489, |
|
"eval_loss": 0.4416392147541046, |
|
"eval_runtime": 148.1371, |
|
"eval_samples_per_second": 33.753, |
|
"eval_steps_per_second": 2.113, |
|
"step": 24684 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_exact_match": 20.62, |
|
"eval_f1": 28.101175315458292, |
|
"eval_qa_bleu": 14.458032503572724, |
|
"eval_qa_exact_match": 0.1966, |
|
"eval_recite_bleu": 51.2644528074332, |
|
"eval_recite_exact_match": 0.0496, |
|
"step": 24684 |
|
}, |
|
{ |
|
"epoch": 17.04, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1227, |
|
"step": 24735 |
|
}, |
|
{ |
|
"epoch": 17.24, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1117, |
|
"step": 25026 |
|
}, |
|
{ |
|
"epoch": 17.44, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1163, |
|
"step": 25317 |
|
}, |
|
{ |
|
"epoch": 17.64, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1209, |
|
"step": 25608 |
|
}, |
|
{ |
|
"epoch": 17.84, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1201, |
|
"step": 25899 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.7658912516823688, |
|
"eval_loss": 0.4466552734375, |
|
"eval_runtime": 149.6031, |
|
"eval_samples_per_second": 33.422, |
|
"eval_steps_per_second": 2.092, |
|
"step": 26136 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_exact_match": 19.72, |
|
"eval_f1": 27.705882516354148, |
|
"eval_qa_bleu": 13.41419171329321, |
|
"eval_qa_exact_match": 0.189, |
|
"eval_recite_bleu": 49.4786180883858, |
|
"eval_recite_exact_match": 0.0434, |
|
"step": 26136 |
|
}, |
|
{ |
|
"epoch": 18.04, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1215, |
|
"step": 26190 |
|
}, |
|
{ |
|
"epoch": 18.24, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1111, |
|
"step": 26481 |
|
}, |
|
{ |
|
"epoch": 18.44, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1164, |
|
"step": 26772 |
|
}, |
|
{ |
|
"epoch": 18.64, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1162, |
|
"step": 27063 |
|
}, |
|
{ |
|
"epoch": 18.84, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1186, |
|
"step": 27354 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.7659983849259758, |
|
"eval_loss": 0.4507902264595032, |
|
"eval_runtime": 151.1093, |
|
"eval_samples_per_second": 33.089, |
|
"eval_steps_per_second": 2.071, |
|
"step": 27588 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_exact_match": 20.78, |
|
"eval_f1": 28.407049206780048, |
|
"eval_qa_bleu": 14.348972564948328, |
|
"eval_qa_exact_match": 0.195, |
|
"eval_recite_bleu": 50.89618814085128, |
|
"eval_recite_exact_match": 0.0544, |
|
"step": 27588 |
|
}, |
|
{ |
|
"epoch": 19.04, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1198, |
|
"step": 27645 |
|
}, |
|
{ |
|
"epoch": 19.24, |
|
"learning_rate": 3e-05, |
|
"loss": 0.109, |
|
"step": 27936 |
|
}, |
|
{ |
|
"epoch": 19.44, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1136, |
|
"step": 28227 |
|
}, |
|
{ |
|
"epoch": 19.64, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1153, |
|
"step": 28518 |
|
}, |
|
{ |
|
"epoch": 19.84, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1176, |
|
"step": 28809 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.7660672947510094, |
|
"eval_loss": 0.4521976709365845, |
|
"eval_runtime": 152.6418, |
|
"eval_samples_per_second": 32.756, |
|
"eval_steps_per_second": 2.051, |
|
"step": 29040 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_exact_match": 20.7, |
|
"eval_f1": 28.598247849322437, |
|
"eval_qa_bleu": 14.395309016288971, |
|
"eval_qa_exact_match": 0.1954, |
|
"eval_recite_bleu": 51.41424385892475, |
|
"eval_recite_exact_match": 0.0504, |
|
"step": 29040 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 29040, |
|
"total_flos": 1.326010952896512e+18, |
|
"train_loss": 0.023505237591824914, |
|
"train_runtime": 14139.9896, |
|
"train_samples_per_second": 32.849, |
|
"train_steps_per_second": 2.054 |
|
} |
|
], |
|
"logging_steps": 291, |
|
"max_steps": 29040, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 1.326010952896512e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|