|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 93, |
|
"global_step": 1860, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0001, |
|
"loss": 3.2142, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0001, |
|
"loss": 2.3949, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.0001, |
|
"loss": 2.1979, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.0001, |
|
"loss": 2.0096, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.6793333333333333, |
|
"eval_loss": 1.380240559577942, |
|
"eval_runtime": 7.9893, |
|
"eval_samples_per_second": 37.55, |
|
"eval_steps_per_second": 2.378, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_exact_match": 3.3333333333333335, |
|
"eval_f1": 5.83015873015873, |
|
"eval_qa_bleu": 1.6970908056732688, |
|
"eval_qa_exact_match": 0.023333333333333334, |
|
"eval_recite_bleu": 8.149431756948522, |
|
"eval_recite_exact_match": 0.0, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7631, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9463, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8797, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8213, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7666, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7848627450980392, |
|
"eval_loss": 0.6686084270477295, |
|
"eval_runtime": 8.4979, |
|
"eval_samples_per_second": 35.303, |
|
"eval_steps_per_second": 2.236, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_exact_match": 4.666666666666667, |
|
"eval_f1": 7.87063492063492, |
|
"eval_qa_bleu": 4.158944460425248, |
|
"eval_qa_exact_match": 0.03333333333333333, |
|
"eval_recite_bleu": 12.887123438782695, |
|
"eval_recite_exact_match": 0.0033333333333333335, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5917, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3191, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3119, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3058, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 0.0001, |
|
"loss": 0.31, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8184183006535948, |
|
"eval_loss": 0.4719126522541046, |
|
"eval_runtime": 8.2778, |
|
"eval_samples_per_second": 36.242, |
|
"eval_steps_per_second": 2.295, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_exact_match": 8.0, |
|
"eval_f1": 11.731216931216931, |
|
"eval_qa_bleu": 3.2525404560535587, |
|
"eval_qa_exact_match": 0.06666666666666667, |
|
"eval_recite_bleu": 17.151706969674898, |
|
"eval_recite_exact_match": 0.006666666666666667, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 0.0001, |
|
"loss": 0.2319, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1567, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 0.0001, |
|
"loss": 0.163, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1494, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1608, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8310980392156863, |
|
"eval_loss": 0.4038252830505371, |
|
"eval_runtime": 8.4283, |
|
"eval_samples_per_second": 35.594, |
|
"eval_steps_per_second": 2.254, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_exact_match": 9.666666666666666, |
|
"eval_f1": 13.638095238095238, |
|
"eval_qa_bleu": 11.09109473987401, |
|
"eval_qa_exact_match": 0.07, |
|
"eval_recite_bleu": 22.969082796041224, |
|
"eval_recite_exact_match": 0.02666666666666667, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 0.0001, |
|
"loss": 0.139, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1032, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1056, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1006, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1101, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8371764705882353, |
|
"eval_loss": 0.37421324849128723, |
|
"eval_runtime": 8.4581, |
|
"eval_samples_per_second": 35.469, |
|
"eval_steps_per_second": 2.246, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_exact_match": 9.0, |
|
"eval_f1": 13.622222222222222, |
|
"eval_qa_bleu": 10.209463809998487, |
|
"eval_qa_exact_match": 0.06, |
|
"eval_recite_bleu": 25.315439762957883, |
|
"eval_recite_exact_match": 0.07, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 5.11, |
|
"learning_rate": 0.0001, |
|
"loss": 0.097, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 5.31, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0835, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 5.52, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0924, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 5.72, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0816, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 5.92, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0839, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8392679738562091, |
|
"eval_loss": 0.3733816146850586, |
|
"eval_runtime": 8.3329, |
|
"eval_samples_per_second": 36.002, |
|
"eval_steps_per_second": 2.28, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_exact_match": 10.333333333333334, |
|
"eval_f1": 13.493121693121692, |
|
"eval_qa_bleu": 10.482710761607542, |
|
"eval_qa_exact_match": 0.06, |
|
"eval_recite_bleu": 26.495515647579698, |
|
"eval_recite_exact_match": 0.09, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 6.13, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0844, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 6.33, |
|
"learning_rate": 0.0001, |
|
"loss": 0.077, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 6.54, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0759, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 6.74, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0746, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 6.95, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0743, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8404183006535948, |
|
"eval_loss": 0.3625450134277344, |
|
"eval_runtime": 7.971, |
|
"eval_samples_per_second": 37.637, |
|
"eval_steps_per_second": 2.384, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_exact_match": 12.333333333333334, |
|
"eval_f1": 16.304761904761904, |
|
"eval_qa_bleu": 11.931512542100624, |
|
"eval_qa_exact_match": 0.09, |
|
"eval_recite_bleu": 28.955751208977897, |
|
"eval_recite_exact_match": 0.09666666666666666, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 7.15, |
|
"learning_rate": 0.0001, |
|
"loss": 0.069, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 7.35, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0653, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 7.56, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0678, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 7.76, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0711, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 7.97, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0756, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8399215686274509, |
|
"eval_loss": 0.36541667580604553, |
|
"eval_runtime": 8.1637, |
|
"eval_samples_per_second": 36.748, |
|
"eval_steps_per_second": 2.327, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_exact_match": 11.333333333333334, |
|
"eval_f1": 15.38253968253968, |
|
"eval_qa_bleu": 8.755756508741644, |
|
"eval_qa_exact_match": 0.09, |
|
"eval_recite_bleu": 28.742438290087694, |
|
"eval_recite_exact_match": 0.08666666666666667, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 8.17, |
|
"learning_rate": 0.0001, |
|
"loss": 0.068, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 8.38, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0665, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 8.58, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0679, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 8.78, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0703, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 8.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0694, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.8399869281045752, |
|
"eval_loss": 0.37417492270469666, |
|
"eval_runtime": 8.3008, |
|
"eval_samples_per_second": 36.141, |
|
"eval_steps_per_second": 2.289, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_exact_match": 11.0, |
|
"eval_f1": 14.560317460317458, |
|
"eval_qa_bleu": 11.653366914054544, |
|
"eval_qa_exact_match": 0.07333333333333333, |
|
"eval_recite_bleu": 30.635594644467947, |
|
"eval_recite_exact_match": 0.09, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 9.19, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0652, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 9.4, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0643, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0662, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 9.81, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0669, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.8402875816993464, |
|
"eval_loss": 0.37116506695747375, |
|
"eval_runtime": 8.3018, |
|
"eval_samples_per_second": 36.137, |
|
"eval_steps_per_second": 2.289, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_exact_match": 11.0, |
|
"eval_f1": 14.528571428571425, |
|
"eval_qa_bleu": 10.001071696850747, |
|
"eval_qa_exact_match": 0.07666666666666666, |
|
"eval_recite_bleu": 25.0280074067495, |
|
"eval_recite_exact_match": 0.07333333333333333, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 10.01, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0687, |
|
"step": 931 |
|
}, |
|
{ |
|
"epoch": 10.22, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0619, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 10.42, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0667, |
|
"step": 969 |
|
}, |
|
{ |
|
"epoch": 10.62, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0688, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 10.83, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0692, |
|
"step": 1007 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.8396993464052288, |
|
"eval_loss": 0.3811953365802765, |
|
"eval_runtime": 8.6273, |
|
"eval_samples_per_second": 34.773, |
|
"eval_steps_per_second": 2.202, |
|
"step": 1023 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_exact_match": 8.0, |
|
"eval_f1": 12.665512265512264, |
|
"eval_qa_bleu": 9.029340244302077, |
|
"eval_qa_exact_match": 0.056666666666666664, |
|
"eval_recite_bleu": 27.692170109150023, |
|
"eval_recite_exact_match": 0.09, |
|
"step": 1023 |
|
}, |
|
{ |
|
"epoch": 11.03, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0721, |
|
"step": 1026 |
|
}, |
|
{ |
|
"epoch": 11.24, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0653, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 11.44, |
|
"learning_rate": 0.0001, |
|
"loss": 0.069, |
|
"step": 1064 |
|
}, |
|
{ |
|
"epoch": 11.65, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0749, |
|
"step": 1083 |
|
}, |
|
{ |
|
"epoch": 11.85, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0717, |
|
"step": 1102 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8394901960784313, |
|
"eval_loss": 0.3796521723270416, |
|
"eval_runtime": 8.4545, |
|
"eval_samples_per_second": 35.484, |
|
"eval_steps_per_second": 2.247, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_exact_match": 11.666666666666666, |
|
"eval_f1": 15.368253968253963, |
|
"eval_qa_bleu": 14.544634955750682, |
|
"eval_qa_exact_match": 0.08666666666666667, |
|
"eval_recite_bleu": 28.00469452055886, |
|
"eval_recite_exact_match": 0.09666666666666666, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 12.05, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0747, |
|
"step": 1121 |
|
}, |
|
{ |
|
"epoch": 12.26, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0671, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 12.46, |
|
"learning_rate": 0.0001, |
|
"loss": 0.076, |
|
"step": 1159 |
|
}, |
|
{ |
|
"epoch": 12.67, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0793, |
|
"step": 1178 |
|
}, |
|
{ |
|
"epoch": 12.87, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0762, |
|
"step": 1197 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.8392679738562091, |
|
"eval_loss": 0.3891879618167877, |
|
"eval_runtime": 8.3465, |
|
"eval_samples_per_second": 35.943, |
|
"eval_steps_per_second": 2.276, |
|
"step": 1209 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_exact_match": 11.666666666666666, |
|
"eval_f1": 16.049206349206347, |
|
"eval_qa_bleu": 9.975889576329717, |
|
"eval_qa_exact_match": 0.09, |
|
"eval_recite_bleu": 28.604552918150617, |
|
"eval_recite_exact_match": 0.1, |
|
"step": 1209 |
|
}, |
|
{ |
|
"epoch": 13.08, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0721, |
|
"step": 1216 |
|
}, |
|
{ |
|
"epoch": 13.28, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0795, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 13.48, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0761, |
|
"step": 1254 |
|
}, |
|
{ |
|
"epoch": 13.69, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0823, |
|
"step": 1273 |
|
}, |
|
{ |
|
"epoch": 13.89, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0823, |
|
"step": 1292 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.8383790849673203, |
|
"eval_loss": 0.3992992043495178, |
|
"eval_runtime": 8.5283, |
|
"eval_samples_per_second": 35.177, |
|
"eval_steps_per_second": 2.228, |
|
"step": 1302 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_exact_match": 9.666666666666666, |
|
"eval_f1": 12.499470899470898, |
|
"eval_qa_bleu": 8.034011406889757, |
|
"eval_qa_exact_match": 0.08333333333333333, |
|
"eval_recite_bleu": 27.04820432137949, |
|
"eval_recite_exact_match": 0.06333333333333334, |
|
"step": 1302 |
|
}, |
|
{ |
|
"epoch": 14.1, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0756, |
|
"step": 1311 |
|
}, |
|
{ |
|
"epoch": 14.3, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0731, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 14.51, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0795, |
|
"step": 1349 |
|
}, |
|
{ |
|
"epoch": 14.71, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0797, |
|
"step": 1368 |
|
}, |
|
{ |
|
"epoch": 14.91, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0789, |
|
"step": 1387 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.8389281045751634, |
|
"eval_loss": 0.3946473002433777, |
|
"eval_runtime": 8.3483, |
|
"eval_samples_per_second": 35.935, |
|
"eval_steps_per_second": 2.276, |
|
"step": 1395 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_exact_match": 10.333333333333334, |
|
"eval_f1": 13.693650793650791, |
|
"eval_qa_bleu": 10.968521104903296, |
|
"eval_qa_exact_match": 0.08666666666666667, |
|
"eval_recite_bleu": 25.816615313359897, |
|
"eval_recite_exact_match": 0.08, |
|
"step": 1395 |
|
}, |
|
{ |
|
"epoch": 15.12, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0728, |
|
"step": 1406 |
|
}, |
|
{ |
|
"epoch": 15.32, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0695, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 15.53, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0699, |
|
"step": 1444 |
|
}, |
|
{ |
|
"epoch": 15.73, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0771, |
|
"step": 1463 |
|
}, |
|
{ |
|
"epoch": 15.94, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0737, |
|
"step": 1482 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.8392941176470589, |
|
"eval_loss": 0.39267826080322266, |
|
"eval_runtime": 7.9822, |
|
"eval_samples_per_second": 37.584, |
|
"eval_steps_per_second": 2.38, |
|
"step": 1488 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_exact_match": 8.0, |
|
"eval_f1": 10.6, |
|
"eval_qa_bleu": 7.290561593538973, |
|
"eval_qa_exact_match": 0.06333333333333334, |
|
"eval_recite_bleu": 23.399757672969518, |
|
"eval_recite_exact_match": 0.07333333333333333, |
|
"step": 1488 |
|
}, |
|
{ |
|
"epoch": 16.14, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0672, |
|
"step": 1501 |
|
}, |
|
{ |
|
"epoch": 16.34, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0686, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 16.55, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0719, |
|
"step": 1539 |
|
}, |
|
{ |
|
"epoch": 16.75, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0752, |
|
"step": 1558 |
|
}, |
|
{ |
|
"epoch": 16.96, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0739, |
|
"step": 1577 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.8381045751633986, |
|
"eval_loss": 0.3977350890636444, |
|
"eval_runtime": 8.3772, |
|
"eval_samples_per_second": 35.811, |
|
"eval_steps_per_second": 2.268, |
|
"step": 1581 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_exact_match": 11.0, |
|
"eval_f1": 14.11269841269841, |
|
"eval_qa_bleu": 11.007855081705829, |
|
"eval_qa_exact_match": 0.07333333333333333, |
|
"eval_recite_bleu": 29.431067552519558, |
|
"eval_recite_exact_match": 0.08, |
|
"step": 1581 |
|
}, |
|
{ |
|
"epoch": 17.16, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0703, |
|
"step": 1596 |
|
}, |
|
{ |
|
"epoch": 17.37, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0737, |
|
"step": 1615 |
|
}, |
|
{ |
|
"epoch": 17.57, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0784, |
|
"step": 1634 |
|
}, |
|
{ |
|
"epoch": 17.77, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0729, |
|
"step": 1653 |
|
}, |
|
{ |
|
"epoch": 17.98, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0741, |
|
"step": 1672 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.8378562091503268, |
|
"eval_loss": 0.40596938133239746, |
|
"eval_runtime": 8.2411, |
|
"eval_samples_per_second": 36.403, |
|
"eval_steps_per_second": 2.306, |
|
"step": 1674 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_exact_match": 8.333333333333334, |
|
"eval_f1": 11.382539682539681, |
|
"eval_qa_bleu": 7.279783117269195, |
|
"eval_qa_exact_match": 0.056666666666666664, |
|
"eval_recite_bleu": 23.09547487104553, |
|
"eval_recite_exact_match": 0.07333333333333333, |
|
"step": 1674 |
|
}, |
|
{ |
|
"epoch": 18.18, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0737, |
|
"step": 1691 |
|
}, |
|
{ |
|
"epoch": 18.39, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0666, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 18.59, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0764, |
|
"step": 1729 |
|
}, |
|
{ |
|
"epoch": 18.8, |
|
"learning_rate": 0.0001, |
|
"loss": 0.072, |
|
"step": 1748 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0741, |
|
"step": 1767 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.8388888888888889, |
|
"eval_loss": 0.40468740463256836, |
|
"eval_runtime": 7.9826, |
|
"eval_samples_per_second": 37.582, |
|
"eval_steps_per_second": 2.38, |
|
"step": 1767 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_exact_match": 12.0, |
|
"eval_f1": 15.778571428571425, |
|
"eval_qa_bleu": 12.003446246092967, |
|
"eval_qa_exact_match": 0.08666666666666667, |
|
"eval_recite_bleu": 31.433479842615494, |
|
"eval_recite_exact_match": 0.11666666666666667, |
|
"step": 1767 |
|
}, |
|
{ |
|
"epoch": 19.2, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0622, |
|
"step": 1786 |
|
}, |
|
{ |
|
"epoch": 19.41, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0684, |
|
"step": 1805 |
|
}, |
|
{ |
|
"epoch": 19.61, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0721, |
|
"step": 1824 |
|
}, |
|
{ |
|
"epoch": 19.82, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0715, |
|
"step": 1843 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.8390196078431372, |
|
"eval_loss": 0.39820176362991333, |
|
"eval_runtime": 8.1892, |
|
"eval_samples_per_second": 36.634, |
|
"eval_steps_per_second": 2.32, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_exact_match": 11.333333333333334, |
|
"eval_f1": 14.327777777777778, |
|
"eval_qa_bleu": 10.816198494730774, |
|
"eval_qa_exact_match": 0.08, |
|
"eval_recite_bleu": 26.73749978175143, |
|
"eval_recite_exact_match": 0.1, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 1860, |
|
"total_flos": 4.95263869850112e+16, |
|
"train_loss": 0.24169133318367825, |
|
"train_runtime": 4163.8318, |
|
"train_samples_per_second": 7.123, |
|
"train_steps_per_second": 0.447 |
|
} |
|
], |
|
"logging_steps": 19, |
|
"max_steps": 1860, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 4.95263869850112e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|