tyzhu's picture
End of training
1f2ba55 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 20.0,
"eval_steps": 93,
"global_step": 1860,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.2,
"learning_rate": 0.0001,
"loss": 3.2142,
"step": 19
},
{
"epoch": 0.41,
"learning_rate": 0.0001,
"loss": 2.3949,
"step": 38
},
{
"epoch": 0.61,
"learning_rate": 0.0001,
"loss": 2.1979,
"step": 57
},
{
"epoch": 0.82,
"learning_rate": 0.0001,
"loss": 2.0096,
"step": 76
},
{
"epoch": 1.0,
"eval_accuracy": 0.6793333333333333,
"eval_loss": 1.380240559577942,
"eval_runtime": 7.9893,
"eval_samples_per_second": 37.55,
"eval_steps_per_second": 2.378,
"step": 93
},
{
"epoch": 1.0,
"eval_exact_match": 3.3333333333333335,
"eval_f1": 5.83015873015873,
"eval_qa_bleu": 1.6970908056732688,
"eval_qa_exact_match": 0.023333333333333334,
"eval_recite_bleu": 8.149431756948522,
"eval_recite_exact_match": 0.0,
"step": 93
},
{
"epoch": 1.02,
"learning_rate": 0.0001,
"loss": 1.7631,
"step": 95
},
{
"epoch": 1.23,
"learning_rate": 0.0001,
"loss": 0.9463,
"step": 114
},
{
"epoch": 1.43,
"learning_rate": 0.0001,
"loss": 0.8797,
"step": 133
},
{
"epoch": 1.63,
"learning_rate": 0.0001,
"loss": 0.8213,
"step": 152
},
{
"epoch": 1.84,
"learning_rate": 0.0001,
"loss": 0.7666,
"step": 171
},
{
"epoch": 2.0,
"eval_accuracy": 0.7848627450980392,
"eval_loss": 0.6686084270477295,
"eval_runtime": 8.4979,
"eval_samples_per_second": 35.303,
"eval_steps_per_second": 2.236,
"step": 186
},
{
"epoch": 2.0,
"eval_exact_match": 4.666666666666667,
"eval_f1": 7.87063492063492,
"eval_qa_bleu": 4.158944460425248,
"eval_qa_exact_match": 0.03333333333333333,
"eval_recite_bleu": 12.887123438782695,
"eval_recite_exact_match": 0.0033333333333333335,
"step": 186
},
{
"epoch": 2.04,
"learning_rate": 0.0001,
"loss": 0.5917,
"step": 190
},
{
"epoch": 2.25,
"learning_rate": 0.0001,
"loss": 0.3191,
"step": 209
},
{
"epoch": 2.45,
"learning_rate": 0.0001,
"loss": 0.3119,
"step": 228
},
{
"epoch": 2.66,
"learning_rate": 0.0001,
"loss": 0.3058,
"step": 247
},
{
"epoch": 2.86,
"learning_rate": 0.0001,
"loss": 0.31,
"step": 266
},
{
"epoch": 3.0,
"eval_accuracy": 0.8184183006535948,
"eval_loss": 0.4719126522541046,
"eval_runtime": 8.2778,
"eval_samples_per_second": 36.242,
"eval_steps_per_second": 2.295,
"step": 279
},
{
"epoch": 3.0,
"eval_exact_match": 8.0,
"eval_f1": 11.731216931216931,
"eval_qa_bleu": 3.2525404560535587,
"eval_qa_exact_match": 0.06666666666666667,
"eval_recite_bleu": 17.151706969674898,
"eval_recite_exact_match": 0.006666666666666667,
"step": 279
},
{
"epoch": 3.06,
"learning_rate": 0.0001,
"loss": 0.2319,
"step": 285
},
{
"epoch": 3.27,
"learning_rate": 0.0001,
"loss": 0.1567,
"step": 304
},
{
"epoch": 3.47,
"learning_rate": 0.0001,
"loss": 0.163,
"step": 323
},
{
"epoch": 3.68,
"learning_rate": 0.0001,
"loss": 0.1494,
"step": 342
},
{
"epoch": 3.88,
"learning_rate": 0.0001,
"loss": 0.1608,
"step": 361
},
{
"epoch": 4.0,
"eval_accuracy": 0.8310980392156863,
"eval_loss": 0.4038252830505371,
"eval_runtime": 8.4283,
"eval_samples_per_second": 35.594,
"eval_steps_per_second": 2.254,
"step": 372
},
{
"epoch": 4.0,
"eval_exact_match": 9.666666666666666,
"eval_f1": 13.638095238095238,
"eval_qa_bleu": 11.09109473987401,
"eval_qa_exact_match": 0.07,
"eval_recite_bleu": 22.969082796041224,
"eval_recite_exact_match": 0.02666666666666667,
"step": 372
},
{
"epoch": 4.09,
"learning_rate": 0.0001,
"loss": 0.139,
"step": 380
},
{
"epoch": 4.29,
"learning_rate": 0.0001,
"loss": 0.1032,
"step": 399
},
{
"epoch": 4.49,
"learning_rate": 0.0001,
"loss": 0.1056,
"step": 418
},
{
"epoch": 4.7,
"learning_rate": 0.0001,
"loss": 0.1006,
"step": 437
},
{
"epoch": 4.9,
"learning_rate": 0.0001,
"loss": 0.1101,
"step": 456
},
{
"epoch": 5.0,
"eval_accuracy": 0.8371764705882353,
"eval_loss": 0.37421324849128723,
"eval_runtime": 8.4581,
"eval_samples_per_second": 35.469,
"eval_steps_per_second": 2.246,
"step": 465
},
{
"epoch": 5.0,
"eval_exact_match": 9.0,
"eval_f1": 13.622222222222222,
"eval_qa_bleu": 10.209463809998487,
"eval_qa_exact_match": 0.06,
"eval_recite_bleu": 25.315439762957883,
"eval_recite_exact_match": 0.07,
"step": 465
},
{
"epoch": 5.11,
"learning_rate": 0.0001,
"loss": 0.097,
"step": 475
},
{
"epoch": 5.31,
"learning_rate": 0.0001,
"loss": 0.0835,
"step": 494
},
{
"epoch": 5.52,
"learning_rate": 0.0001,
"loss": 0.0924,
"step": 513
},
{
"epoch": 5.72,
"learning_rate": 0.0001,
"loss": 0.0816,
"step": 532
},
{
"epoch": 5.92,
"learning_rate": 0.0001,
"loss": 0.0839,
"step": 551
},
{
"epoch": 6.0,
"eval_accuracy": 0.8392679738562091,
"eval_loss": 0.3733816146850586,
"eval_runtime": 8.3329,
"eval_samples_per_second": 36.002,
"eval_steps_per_second": 2.28,
"step": 558
},
{
"epoch": 6.0,
"eval_exact_match": 10.333333333333334,
"eval_f1": 13.493121693121692,
"eval_qa_bleu": 10.482710761607542,
"eval_qa_exact_match": 0.06,
"eval_recite_bleu": 26.495515647579698,
"eval_recite_exact_match": 0.09,
"step": 558
},
{
"epoch": 6.13,
"learning_rate": 0.0001,
"loss": 0.0844,
"step": 570
},
{
"epoch": 6.33,
"learning_rate": 0.0001,
"loss": 0.077,
"step": 589
},
{
"epoch": 6.54,
"learning_rate": 0.0001,
"loss": 0.0759,
"step": 608
},
{
"epoch": 6.74,
"learning_rate": 0.0001,
"loss": 0.0746,
"step": 627
},
{
"epoch": 6.95,
"learning_rate": 0.0001,
"loss": 0.0743,
"step": 646
},
{
"epoch": 7.0,
"eval_accuracy": 0.8404183006535948,
"eval_loss": 0.3625450134277344,
"eval_runtime": 7.971,
"eval_samples_per_second": 37.637,
"eval_steps_per_second": 2.384,
"step": 651
},
{
"epoch": 7.0,
"eval_exact_match": 12.333333333333334,
"eval_f1": 16.304761904761904,
"eval_qa_bleu": 11.931512542100624,
"eval_qa_exact_match": 0.09,
"eval_recite_bleu": 28.955751208977897,
"eval_recite_exact_match": 0.09666666666666666,
"step": 651
},
{
"epoch": 7.15,
"learning_rate": 0.0001,
"loss": 0.069,
"step": 665
},
{
"epoch": 7.35,
"learning_rate": 0.0001,
"loss": 0.0653,
"step": 684
},
{
"epoch": 7.56,
"learning_rate": 0.0001,
"loss": 0.0678,
"step": 703
},
{
"epoch": 7.76,
"learning_rate": 0.0001,
"loss": 0.0711,
"step": 722
},
{
"epoch": 7.97,
"learning_rate": 0.0001,
"loss": 0.0756,
"step": 741
},
{
"epoch": 8.0,
"eval_accuracy": 0.8399215686274509,
"eval_loss": 0.36541667580604553,
"eval_runtime": 8.1637,
"eval_samples_per_second": 36.748,
"eval_steps_per_second": 2.327,
"step": 744
},
{
"epoch": 8.0,
"eval_exact_match": 11.333333333333334,
"eval_f1": 15.38253968253968,
"eval_qa_bleu": 8.755756508741644,
"eval_qa_exact_match": 0.09,
"eval_recite_bleu": 28.742438290087694,
"eval_recite_exact_match": 0.08666666666666667,
"step": 744
},
{
"epoch": 8.17,
"learning_rate": 0.0001,
"loss": 0.068,
"step": 760
},
{
"epoch": 8.38,
"learning_rate": 0.0001,
"loss": 0.0665,
"step": 779
},
{
"epoch": 8.58,
"learning_rate": 0.0001,
"loss": 0.0679,
"step": 798
},
{
"epoch": 8.78,
"learning_rate": 0.0001,
"loss": 0.0703,
"step": 817
},
{
"epoch": 8.99,
"learning_rate": 0.0001,
"loss": 0.0694,
"step": 836
},
{
"epoch": 9.0,
"eval_accuracy": 0.8399869281045752,
"eval_loss": 0.37417492270469666,
"eval_runtime": 8.3008,
"eval_samples_per_second": 36.141,
"eval_steps_per_second": 2.289,
"step": 837
},
{
"epoch": 9.0,
"eval_exact_match": 11.0,
"eval_f1": 14.560317460317458,
"eval_qa_bleu": 11.653366914054544,
"eval_qa_exact_match": 0.07333333333333333,
"eval_recite_bleu": 30.635594644467947,
"eval_recite_exact_match": 0.09,
"step": 837
},
{
"epoch": 9.19,
"learning_rate": 0.0001,
"loss": 0.0652,
"step": 855
},
{
"epoch": 9.4,
"learning_rate": 0.0001,
"loss": 0.0643,
"step": 874
},
{
"epoch": 9.6,
"learning_rate": 0.0001,
"loss": 0.0662,
"step": 893
},
{
"epoch": 9.81,
"learning_rate": 0.0001,
"loss": 0.0669,
"step": 912
},
{
"epoch": 10.0,
"eval_accuracy": 0.8402875816993464,
"eval_loss": 0.37116506695747375,
"eval_runtime": 8.3018,
"eval_samples_per_second": 36.137,
"eval_steps_per_second": 2.289,
"step": 930
},
{
"epoch": 10.0,
"eval_exact_match": 11.0,
"eval_f1": 14.528571428571425,
"eval_qa_bleu": 10.001071696850747,
"eval_qa_exact_match": 0.07666666666666666,
"eval_recite_bleu": 25.0280074067495,
"eval_recite_exact_match": 0.07333333333333333,
"step": 930
},
{
"epoch": 10.01,
"learning_rate": 0.0001,
"loss": 0.0687,
"step": 931
},
{
"epoch": 10.22,
"learning_rate": 0.0001,
"loss": 0.0619,
"step": 950
},
{
"epoch": 10.42,
"learning_rate": 0.0001,
"loss": 0.0667,
"step": 969
},
{
"epoch": 10.62,
"learning_rate": 0.0001,
"loss": 0.0688,
"step": 988
},
{
"epoch": 10.83,
"learning_rate": 0.0001,
"loss": 0.0692,
"step": 1007
},
{
"epoch": 11.0,
"eval_accuracy": 0.8396993464052288,
"eval_loss": 0.3811953365802765,
"eval_runtime": 8.6273,
"eval_samples_per_second": 34.773,
"eval_steps_per_second": 2.202,
"step": 1023
},
{
"epoch": 11.0,
"eval_exact_match": 8.0,
"eval_f1": 12.665512265512264,
"eval_qa_bleu": 9.029340244302077,
"eval_qa_exact_match": 0.056666666666666664,
"eval_recite_bleu": 27.692170109150023,
"eval_recite_exact_match": 0.09,
"step": 1023
},
{
"epoch": 11.03,
"learning_rate": 0.0001,
"loss": 0.0721,
"step": 1026
},
{
"epoch": 11.24,
"learning_rate": 0.0001,
"loss": 0.0653,
"step": 1045
},
{
"epoch": 11.44,
"learning_rate": 0.0001,
"loss": 0.069,
"step": 1064
},
{
"epoch": 11.65,
"learning_rate": 0.0001,
"loss": 0.0749,
"step": 1083
},
{
"epoch": 11.85,
"learning_rate": 0.0001,
"loss": 0.0717,
"step": 1102
},
{
"epoch": 12.0,
"eval_accuracy": 0.8394901960784313,
"eval_loss": 0.3796521723270416,
"eval_runtime": 8.4545,
"eval_samples_per_second": 35.484,
"eval_steps_per_second": 2.247,
"step": 1116
},
{
"epoch": 12.0,
"eval_exact_match": 11.666666666666666,
"eval_f1": 15.368253968253963,
"eval_qa_bleu": 14.544634955750682,
"eval_qa_exact_match": 0.08666666666666667,
"eval_recite_bleu": 28.00469452055886,
"eval_recite_exact_match": 0.09666666666666666,
"step": 1116
},
{
"epoch": 12.05,
"learning_rate": 0.0001,
"loss": 0.0747,
"step": 1121
},
{
"epoch": 12.26,
"learning_rate": 0.0001,
"loss": 0.0671,
"step": 1140
},
{
"epoch": 12.46,
"learning_rate": 0.0001,
"loss": 0.076,
"step": 1159
},
{
"epoch": 12.67,
"learning_rate": 0.0001,
"loss": 0.0793,
"step": 1178
},
{
"epoch": 12.87,
"learning_rate": 0.0001,
"loss": 0.0762,
"step": 1197
},
{
"epoch": 13.0,
"eval_accuracy": 0.8392679738562091,
"eval_loss": 0.3891879618167877,
"eval_runtime": 8.3465,
"eval_samples_per_second": 35.943,
"eval_steps_per_second": 2.276,
"step": 1209
},
{
"epoch": 13.0,
"eval_exact_match": 11.666666666666666,
"eval_f1": 16.049206349206347,
"eval_qa_bleu": 9.975889576329717,
"eval_qa_exact_match": 0.09,
"eval_recite_bleu": 28.604552918150617,
"eval_recite_exact_match": 0.1,
"step": 1209
},
{
"epoch": 13.08,
"learning_rate": 0.0001,
"loss": 0.0721,
"step": 1216
},
{
"epoch": 13.28,
"learning_rate": 0.0001,
"loss": 0.0795,
"step": 1235
},
{
"epoch": 13.48,
"learning_rate": 0.0001,
"loss": 0.0761,
"step": 1254
},
{
"epoch": 13.69,
"learning_rate": 0.0001,
"loss": 0.0823,
"step": 1273
},
{
"epoch": 13.89,
"learning_rate": 0.0001,
"loss": 0.0823,
"step": 1292
},
{
"epoch": 14.0,
"eval_accuracy": 0.8383790849673203,
"eval_loss": 0.3992992043495178,
"eval_runtime": 8.5283,
"eval_samples_per_second": 35.177,
"eval_steps_per_second": 2.228,
"step": 1302
},
{
"epoch": 14.0,
"eval_exact_match": 9.666666666666666,
"eval_f1": 12.499470899470898,
"eval_qa_bleu": 8.034011406889757,
"eval_qa_exact_match": 0.08333333333333333,
"eval_recite_bleu": 27.04820432137949,
"eval_recite_exact_match": 0.06333333333333334,
"step": 1302
},
{
"epoch": 14.1,
"learning_rate": 0.0001,
"loss": 0.0756,
"step": 1311
},
{
"epoch": 14.3,
"learning_rate": 0.0001,
"loss": 0.0731,
"step": 1330
},
{
"epoch": 14.51,
"learning_rate": 0.0001,
"loss": 0.0795,
"step": 1349
},
{
"epoch": 14.71,
"learning_rate": 0.0001,
"loss": 0.0797,
"step": 1368
},
{
"epoch": 14.91,
"learning_rate": 0.0001,
"loss": 0.0789,
"step": 1387
},
{
"epoch": 15.0,
"eval_accuracy": 0.8389281045751634,
"eval_loss": 0.3946473002433777,
"eval_runtime": 8.3483,
"eval_samples_per_second": 35.935,
"eval_steps_per_second": 2.276,
"step": 1395
},
{
"epoch": 15.0,
"eval_exact_match": 10.333333333333334,
"eval_f1": 13.693650793650791,
"eval_qa_bleu": 10.968521104903296,
"eval_qa_exact_match": 0.08666666666666667,
"eval_recite_bleu": 25.816615313359897,
"eval_recite_exact_match": 0.08,
"step": 1395
},
{
"epoch": 15.12,
"learning_rate": 0.0001,
"loss": 0.0728,
"step": 1406
},
{
"epoch": 15.32,
"learning_rate": 0.0001,
"loss": 0.0695,
"step": 1425
},
{
"epoch": 15.53,
"learning_rate": 0.0001,
"loss": 0.0699,
"step": 1444
},
{
"epoch": 15.73,
"learning_rate": 0.0001,
"loss": 0.0771,
"step": 1463
},
{
"epoch": 15.94,
"learning_rate": 0.0001,
"loss": 0.0737,
"step": 1482
},
{
"epoch": 16.0,
"eval_accuracy": 0.8392941176470589,
"eval_loss": 0.39267826080322266,
"eval_runtime": 7.9822,
"eval_samples_per_second": 37.584,
"eval_steps_per_second": 2.38,
"step": 1488
},
{
"epoch": 16.0,
"eval_exact_match": 8.0,
"eval_f1": 10.6,
"eval_qa_bleu": 7.290561593538973,
"eval_qa_exact_match": 0.06333333333333334,
"eval_recite_bleu": 23.399757672969518,
"eval_recite_exact_match": 0.07333333333333333,
"step": 1488
},
{
"epoch": 16.14,
"learning_rate": 0.0001,
"loss": 0.0672,
"step": 1501
},
{
"epoch": 16.34,
"learning_rate": 0.0001,
"loss": 0.0686,
"step": 1520
},
{
"epoch": 16.55,
"learning_rate": 0.0001,
"loss": 0.0719,
"step": 1539
},
{
"epoch": 16.75,
"learning_rate": 0.0001,
"loss": 0.0752,
"step": 1558
},
{
"epoch": 16.96,
"learning_rate": 0.0001,
"loss": 0.0739,
"step": 1577
},
{
"epoch": 17.0,
"eval_accuracy": 0.8381045751633986,
"eval_loss": 0.3977350890636444,
"eval_runtime": 8.3772,
"eval_samples_per_second": 35.811,
"eval_steps_per_second": 2.268,
"step": 1581
},
{
"epoch": 17.0,
"eval_exact_match": 11.0,
"eval_f1": 14.11269841269841,
"eval_qa_bleu": 11.007855081705829,
"eval_qa_exact_match": 0.07333333333333333,
"eval_recite_bleu": 29.431067552519558,
"eval_recite_exact_match": 0.08,
"step": 1581
},
{
"epoch": 17.16,
"learning_rate": 0.0001,
"loss": 0.0703,
"step": 1596
},
{
"epoch": 17.37,
"learning_rate": 0.0001,
"loss": 0.0737,
"step": 1615
},
{
"epoch": 17.57,
"learning_rate": 0.0001,
"loss": 0.0784,
"step": 1634
},
{
"epoch": 17.77,
"learning_rate": 0.0001,
"loss": 0.0729,
"step": 1653
},
{
"epoch": 17.98,
"learning_rate": 0.0001,
"loss": 0.0741,
"step": 1672
},
{
"epoch": 18.0,
"eval_accuracy": 0.8378562091503268,
"eval_loss": 0.40596938133239746,
"eval_runtime": 8.2411,
"eval_samples_per_second": 36.403,
"eval_steps_per_second": 2.306,
"step": 1674
},
{
"epoch": 18.0,
"eval_exact_match": 8.333333333333334,
"eval_f1": 11.382539682539681,
"eval_qa_bleu": 7.279783117269195,
"eval_qa_exact_match": 0.056666666666666664,
"eval_recite_bleu": 23.09547487104553,
"eval_recite_exact_match": 0.07333333333333333,
"step": 1674
},
{
"epoch": 18.18,
"learning_rate": 0.0001,
"loss": 0.0737,
"step": 1691
},
{
"epoch": 18.39,
"learning_rate": 0.0001,
"loss": 0.0666,
"step": 1710
},
{
"epoch": 18.59,
"learning_rate": 0.0001,
"loss": 0.0764,
"step": 1729
},
{
"epoch": 18.8,
"learning_rate": 0.0001,
"loss": 0.072,
"step": 1748
},
{
"epoch": 19.0,
"learning_rate": 0.0001,
"loss": 0.0741,
"step": 1767
},
{
"epoch": 19.0,
"eval_accuracy": 0.8388888888888889,
"eval_loss": 0.40468740463256836,
"eval_runtime": 7.9826,
"eval_samples_per_second": 37.582,
"eval_steps_per_second": 2.38,
"step": 1767
},
{
"epoch": 19.0,
"eval_exact_match": 12.0,
"eval_f1": 15.778571428571425,
"eval_qa_bleu": 12.003446246092967,
"eval_qa_exact_match": 0.08666666666666667,
"eval_recite_bleu": 31.433479842615494,
"eval_recite_exact_match": 0.11666666666666667,
"step": 1767
},
{
"epoch": 19.2,
"learning_rate": 0.0001,
"loss": 0.0622,
"step": 1786
},
{
"epoch": 19.41,
"learning_rate": 0.0001,
"loss": 0.0684,
"step": 1805
},
{
"epoch": 19.61,
"learning_rate": 0.0001,
"loss": 0.0721,
"step": 1824
},
{
"epoch": 19.82,
"learning_rate": 0.0001,
"loss": 0.0715,
"step": 1843
},
{
"epoch": 20.0,
"eval_accuracy": 0.8390196078431372,
"eval_loss": 0.39820176362991333,
"eval_runtime": 8.1892,
"eval_samples_per_second": 36.634,
"eval_steps_per_second": 2.32,
"step": 1860
},
{
"epoch": 20.0,
"eval_exact_match": 11.333333333333334,
"eval_f1": 14.327777777777778,
"eval_qa_bleu": 10.816198494730774,
"eval_qa_exact_match": 0.08,
"eval_recite_bleu": 26.73749978175143,
"eval_recite_exact_match": 0.1,
"step": 1860
},
{
"epoch": 20.0,
"step": 1860,
"total_flos": 4.95263869850112e+16,
"train_loss": 0.24169133318367825,
"train_runtime": 4163.8318,
"train_samples_per_second": 7.123,
"train_steps_per_second": 0.447
}
],
"logging_steps": 19,
"max_steps": 1860,
"num_train_epochs": 20,
"save_steps": 500,
"total_flos": 4.95263869850112e+16,
"trial_name": null,
"trial_params": null
}