tyzhu's picture
End of training
65db12a verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 10.0,
"eval_steps": 66,
"global_step": 1310,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.11,
"learning_rate": 3e-05,
"loss": 3.7331,
"step": 14
},
{
"epoch": 0.21,
"learning_rate": 3e-05,
"loss": 2.345,
"step": 28
},
{
"epoch": 0.32,
"learning_rate": 3e-05,
"loss": 2.1491,
"step": 42
},
{
"epoch": 0.43,
"learning_rate": 3e-05,
"loss": 2.0739,
"step": 56
},
{
"epoch": 0.5,
"eval_accuracy": 0.6206453178068898,
"eval_loss": 1.8404711484909058,
"eval_runtime": 11.9883,
"eval_samples_per_second": 25.024,
"eval_steps_per_second": 1.585,
"step": 66
},
{
"epoch": 0.5,
"eval_exact_match": 7.666666666666667,
"eval_f1": 10.221428571428572,
"eval_qa_bleu": 1.4042262195131967,
"eval_qa_exact_match": 0.07333333333333333,
"eval_recite_bleu": 8.5956480576491,
"eval_recite_exact_match": 0.0,
"step": 66
},
{
"epoch": 0.53,
"learning_rate": 3e-05,
"loss": 1.9722,
"step": 70
},
{
"epoch": 0.64,
"learning_rate": 3e-05,
"loss": 2.014,
"step": 84
},
{
"epoch": 0.75,
"learning_rate": 3e-05,
"loss": 1.9812,
"step": 98
},
{
"epoch": 0.85,
"learning_rate": 3e-05,
"loss": 1.9213,
"step": 112
},
{
"epoch": 0.96,
"learning_rate": 3e-05,
"loss": 1.8806,
"step": 126
},
{
"epoch": 1.01,
"eval_accuracy": 0.6365065502183406,
"eval_loss": 1.574863314628601,
"eval_runtime": 9.1867,
"eval_samples_per_second": 32.656,
"eval_steps_per_second": 2.068,
"step": 132
},
{
"epoch": 1.01,
"eval_exact_match": 8.666666666666666,
"eval_f1": 13.8123783922171,
"eval_qa_bleu": 3.0725240037081307,
"eval_qa_exact_match": 0.08666666666666667,
"eval_recite_bleu": 16.386073781113847,
"eval_recite_exact_match": 0.0,
"step": 132
},
{
"epoch": 1.07,
"learning_rate": 3e-05,
"loss": 1.6177,
"step": 140
},
{
"epoch": 1.18,
"learning_rate": 3e-05,
"loss": 1.4286,
"step": 154
},
{
"epoch": 1.28,
"learning_rate": 3e-05,
"loss": 1.4102,
"step": 168
},
{
"epoch": 1.39,
"learning_rate": 3e-05,
"loss": 1.4033,
"step": 182
},
{
"epoch": 1.5,
"learning_rate": 3e-05,
"loss": 1.3619,
"step": 196
},
{
"epoch": 1.51,
"eval_accuracy": 0.6533139252789908,
"eval_loss": 1.342494010925293,
"eval_runtime": 9.3165,
"eval_samples_per_second": 32.201,
"eval_steps_per_second": 2.039,
"step": 198
},
{
"epoch": 1.51,
"eval_exact_match": 7.0,
"eval_f1": 10.38888888888889,
"eval_qa_bleu": 5.590724094958645,
"eval_qa_exact_match": 0.06666666666666667,
"eval_recite_bleu": 17.070052559724132,
"eval_recite_exact_match": 0.0,
"step": 198
},
{
"epoch": 1.6,
"learning_rate": 3e-05,
"loss": 1.3649,
"step": 210
},
{
"epoch": 1.71,
"learning_rate": 3e-05,
"loss": 1.2886,
"step": 224
},
{
"epoch": 1.82,
"learning_rate": 3e-05,
"loss": 1.2787,
"step": 238
},
{
"epoch": 1.92,
"learning_rate": 3e-05,
"loss": 1.283,
"step": 252
},
{
"epoch": 2.02,
"eval_accuracy": 0.6685152838427948,
"eval_loss": 1.1253471374511719,
"eval_runtime": 9.3443,
"eval_samples_per_second": 32.105,
"eval_steps_per_second": 2.033,
"step": 264
},
{
"epoch": 2.02,
"eval_exact_match": 9.0,
"eval_f1": 13.042195767195768,
"eval_qa_bleu": 5.743589453455261,
"eval_qa_exact_match": 0.08666666666666667,
"eval_recite_bleu": 15.4059437725803,
"eval_recite_exact_match": 0.0,
"step": 264
},
{
"epoch": 2.03,
"learning_rate": 3e-05,
"loss": 1.112,
"step": 266
},
{
"epoch": 2.14,
"learning_rate": 3e-05,
"loss": 0.8377,
"step": 280
},
{
"epoch": 2.24,
"learning_rate": 3e-05,
"loss": 0.8533,
"step": 294
},
{
"epoch": 2.35,
"learning_rate": 3e-05,
"loss": 0.8407,
"step": 308
},
{
"epoch": 2.46,
"learning_rate": 3e-05,
"loss": 0.8433,
"step": 322
},
{
"epoch": 2.52,
"eval_accuracy": 0.6825278990781174,
"eval_loss": 0.9735248684883118,
"eval_runtime": 9.3401,
"eval_samples_per_second": 32.12,
"eval_steps_per_second": 2.034,
"step": 330
},
{
"epoch": 2.52,
"eval_exact_match": 11.666666666666666,
"eval_f1": 15.95859788359788,
"eval_qa_bleu": 6.31601664322449,
"eval_qa_exact_match": 0.11,
"eval_recite_bleu": 21.285839887136575,
"eval_recite_exact_match": 0.0,
"step": 330
},
{
"epoch": 2.56,
"learning_rate": 3e-05,
"loss": 0.8632,
"step": 336
},
{
"epoch": 2.67,
"learning_rate": 3e-05,
"loss": 0.8308,
"step": 350
},
{
"epoch": 2.78,
"learning_rate": 3e-05,
"loss": 0.8475,
"step": 364
},
{
"epoch": 2.89,
"learning_rate": 3e-05,
"loss": 0.7829,
"step": 378
},
{
"epoch": 2.99,
"learning_rate": 3e-05,
"loss": 0.7629,
"step": 392
},
{
"epoch": 3.02,
"eval_accuracy": 0.6982241630276564,
"eval_loss": 0.7873561382293701,
"eval_runtime": 9.0856,
"eval_samples_per_second": 33.019,
"eval_steps_per_second": 2.091,
"step": 396
},
{
"epoch": 3.02,
"eval_exact_match": 12.333333333333334,
"eval_f1": 16.93478835978836,
"eval_qa_bleu": 7.7589360856745815,
"eval_qa_exact_match": 0.12,
"eval_recite_bleu": 22.16978951972916,
"eval_recite_exact_match": 0.0,
"step": 396
},
{
"epoch": 3.1,
"learning_rate": 3e-05,
"loss": 0.5537,
"step": 406
},
{
"epoch": 3.21,
"learning_rate": 3e-05,
"loss": 0.5111,
"step": 420
},
{
"epoch": 3.31,
"learning_rate": 3e-05,
"loss": 0.479,
"step": 434
},
{
"epoch": 3.42,
"learning_rate": 3e-05,
"loss": 0.4904,
"step": 448
},
{
"epoch": 3.53,
"learning_rate": 3e-05,
"loss": 0.5058,
"step": 462
},
{
"epoch": 3.53,
"eval_accuracy": 0.7086026200873362,
"eval_loss": 0.6920613646507263,
"eval_runtime": 9.3473,
"eval_samples_per_second": 32.095,
"eval_steps_per_second": 2.033,
"step": 462
},
{
"epoch": 3.53,
"eval_exact_match": 15.333333333333334,
"eval_f1": 20.8812384782973,
"eval_qa_bleu": 11.001436952878551,
"eval_qa_exact_match": 0.14,
"eval_recite_bleu": 25.319328021621963,
"eval_recite_exact_match": 0.0,
"step": 462
},
{
"epoch": 3.63,
"learning_rate": 3e-05,
"loss": 0.5456,
"step": 476
},
{
"epoch": 3.74,
"learning_rate": 3e-05,
"loss": 0.4998,
"step": 490
},
{
"epoch": 3.85,
"learning_rate": 3e-05,
"loss": 0.5075,
"step": 504
},
{
"epoch": 3.95,
"learning_rate": 3e-05,
"loss": 0.4593,
"step": 518
},
{
"epoch": 4.03,
"eval_accuracy": 0.7196943231441048,
"eval_loss": 0.564062237739563,
"eval_runtime": 9.2784,
"eval_samples_per_second": 32.333,
"eval_steps_per_second": 2.048,
"step": 528
},
{
"epoch": 4.03,
"eval_exact_match": 13.333333333333334,
"eval_f1": 18.11966301672184,
"eval_qa_bleu": 4.47266646700781,
"eval_qa_exact_match": 0.12666666666666668,
"eval_recite_bleu": 26.136161725029392,
"eval_recite_exact_match": 0.0033333333333333335,
"step": 528
},
{
"epoch": 4.06,
"learning_rate": 3e-05,
"loss": 0.3952,
"step": 532
},
{
"epoch": 4.17,
"learning_rate": 3e-05,
"loss": 0.2996,
"step": 546
},
{
"epoch": 4.27,
"learning_rate": 3e-05,
"loss": 0.3354,
"step": 560
},
{
"epoch": 4.38,
"learning_rate": 3e-05,
"loss": 0.3179,
"step": 574
},
{
"epoch": 4.49,
"learning_rate": 3e-05,
"loss": 0.3064,
"step": 588
},
{
"epoch": 4.53,
"eval_accuracy": 0.7245269286754003,
"eval_loss": 0.5348330736160278,
"eval_runtime": 9.5519,
"eval_samples_per_second": 31.407,
"eval_steps_per_second": 1.989,
"step": 594
},
{
"epoch": 4.53,
"eval_exact_match": 13.666666666666666,
"eval_f1": 19.689682539682536,
"eval_qa_bleu": 4.45260487005976,
"eval_qa_exact_match": 0.12666666666666668,
"eval_recite_bleu": 28.855187002245795,
"eval_recite_exact_match": 0.0,
"step": 594
},
{
"epoch": 4.6,
"learning_rate": 3e-05,
"loss": 0.3258,
"step": 602
},
{
"epoch": 4.7,
"learning_rate": 3e-05,
"loss": 0.3384,
"step": 616
},
{
"epoch": 4.81,
"learning_rate": 3e-05,
"loss": 0.3258,
"step": 630
},
{
"epoch": 4.92,
"learning_rate": 3e-05,
"loss": 0.312,
"step": 644
},
{
"epoch": 5.02,
"learning_rate": 3e-05,
"loss": 0.2967,
"step": 658
},
{
"epoch": 5.04,
"eval_accuracy": 0.7303784570596797,
"eval_loss": 0.47703343629837036,
"eval_runtime": 9.4774,
"eval_samples_per_second": 31.654,
"eval_steps_per_second": 2.005,
"step": 660
},
{
"epoch": 5.04,
"eval_exact_match": 12.333333333333334,
"eval_f1": 18.61798941798941,
"eval_qa_bleu": 6.60839076276961,
"eval_qa_exact_match": 0.12,
"eval_recite_bleu": 29.5293953590396,
"eval_recite_exact_match": 0.0,
"step": 660
},
{
"epoch": 5.13,
"learning_rate": 3e-05,
"loss": 0.2148,
"step": 672
},
{
"epoch": 5.24,
"learning_rate": 3e-05,
"loss": 0.2275,
"step": 686
},
{
"epoch": 5.34,
"learning_rate": 3e-05,
"loss": 0.2158,
"step": 700
},
{
"epoch": 5.45,
"learning_rate": 3e-05,
"loss": 0.2167,
"step": 714
},
{
"epoch": 5.54,
"eval_accuracy": 0.732372634643377,
"eval_loss": 0.458192378282547,
"eval_runtime": 9.1255,
"eval_samples_per_second": 32.875,
"eval_steps_per_second": 2.082,
"step": 726
},
{
"epoch": 5.54,
"eval_exact_match": 14.333333333333334,
"eval_f1": 19.81243386243386,
"eval_qa_bleu": 8.961623752889384,
"eval_qa_exact_match": 0.13666666666666666,
"eval_recite_bleu": 33.41904095334099,
"eval_recite_exact_match": 0.0,
"step": 726
},
{
"epoch": 5.56,
"learning_rate": 3e-05,
"loss": 0.229,
"step": 728
},
{
"epoch": 5.66,
"learning_rate": 3e-05,
"loss": 0.2275,
"step": 742
},
{
"epoch": 5.77,
"learning_rate": 3e-05,
"loss": 0.2211,
"step": 756
},
{
"epoch": 5.88,
"learning_rate": 3e-05,
"loss": 0.2231,
"step": 770
},
{
"epoch": 5.98,
"learning_rate": 3e-05,
"loss": 0.2157,
"step": 784
},
{
"epoch": 6.05,
"eval_accuracy": 0.7358369723435225,
"eval_loss": 0.4307834804058075,
"eval_runtime": 9.1847,
"eval_samples_per_second": 32.663,
"eval_steps_per_second": 2.069,
"step": 792
},
{
"epoch": 6.05,
"eval_exact_match": 16.666666666666668,
"eval_f1": 21.786772486772488,
"eval_qa_bleu": 7.511563755726586,
"eval_qa_exact_match": 0.16,
"eval_recite_bleu": 37.63384023220464,
"eval_recite_exact_match": 0.013333333333333334,
"step": 792
},
{
"epoch": 6.09,
"learning_rate": 3e-05,
"loss": 0.1669,
"step": 798
},
{
"epoch": 6.2,
"learning_rate": 3e-05,
"loss": 0.1712,
"step": 812
},
{
"epoch": 6.31,
"learning_rate": 3e-05,
"loss": 0.1601,
"step": 826
},
{
"epoch": 6.41,
"learning_rate": 3e-05,
"loss": 0.1608,
"step": 840
},
{
"epoch": 6.52,
"learning_rate": 3e-05,
"loss": 0.1597,
"step": 854
},
{
"epoch": 6.55,
"eval_accuracy": 0.7373410965550704,
"eval_loss": 0.4301389157772064,
"eval_runtime": 9.7372,
"eval_samples_per_second": 30.81,
"eval_steps_per_second": 1.951,
"step": 858
},
{
"epoch": 6.55,
"eval_exact_match": 15.666666666666666,
"eval_f1": 21.056661856661847,
"eval_qa_bleu": 12.649140852831426,
"eval_qa_exact_match": 0.14666666666666667,
"eval_recite_bleu": 42.22665248887737,
"eval_recite_exact_match": 0.013333333333333334,
"step": 858
},
{
"epoch": 6.63,
"learning_rate": 3e-05,
"loss": 0.1623,
"step": 868
},
{
"epoch": 6.73,
"learning_rate": 3e-05,
"loss": 0.1668,
"step": 882
},
{
"epoch": 6.84,
"learning_rate": 3e-05,
"loss": 0.1624,
"step": 896
},
{
"epoch": 6.95,
"learning_rate": 3e-05,
"loss": 0.1648,
"step": 910
},
{
"epoch": 7.05,
"learning_rate": 3e-05,
"loss": 0.1481,
"step": 924
},
{
"epoch": 7.05,
"eval_accuracy": 0.7385298398835517,
"eval_loss": 0.42236796021461487,
"eval_runtime": 9.3603,
"eval_samples_per_second": 32.05,
"eval_steps_per_second": 2.03,
"step": 924
},
{
"epoch": 7.05,
"eval_exact_match": 18.666666666666668,
"eval_f1": 25.187830687830694,
"eval_qa_bleu": 8.014835952265651,
"eval_qa_exact_match": 0.18,
"eval_recite_bleu": 42.38987173856079,
"eval_recite_exact_match": 0.016666666666666666,
"step": 924
},
{
"epoch": 7.16,
"learning_rate": 3e-05,
"loss": 0.1227,
"step": 938
},
{
"epoch": 7.27,
"learning_rate": 3e-05,
"loss": 0.1272,
"step": 952
},
{
"epoch": 7.37,
"learning_rate": 3e-05,
"loss": 0.1312,
"step": 966
},
{
"epoch": 7.48,
"learning_rate": 3e-05,
"loss": 0.1293,
"step": 980
},
{
"epoch": 7.56,
"eval_accuracy": 0.739422610383309,
"eval_loss": 0.41248488426208496,
"eval_runtime": 9.7486,
"eval_samples_per_second": 30.774,
"eval_steps_per_second": 1.949,
"step": 990
},
{
"epoch": 7.56,
"eval_exact_match": 15.666666666666666,
"eval_f1": 22.348196248196246,
"eval_qa_bleu": 6.260683683577316,
"eval_qa_exact_match": 0.14666666666666667,
"eval_recite_bleu": 45.04221504063147,
"eval_recite_exact_match": 0.023333333333333334,
"step": 990
},
{
"epoch": 7.59,
"learning_rate": 3e-05,
"loss": 0.1345,
"step": 994
},
{
"epoch": 7.69,
"learning_rate": 3e-05,
"loss": 0.1325,
"step": 1008
},
{
"epoch": 7.8,
"learning_rate": 3e-05,
"loss": 0.1273,
"step": 1022
},
{
"epoch": 7.91,
"learning_rate": 3e-05,
"loss": 0.1362,
"step": 1036
},
{
"epoch": 8.02,
"learning_rate": 3e-05,
"loss": 0.125,
"step": 1050
},
{
"epoch": 8.06,
"eval_accuracy": 0.7399902959728287,
"eval_loss": 0.41223272681236267,
"eval_runtime": 9.2562,
"eval_samples_per_second": 32.411,
"eval_steps_per_second": 2.053,
"step": 1056
},
{
"epoch": 8.06,
"eval_exact_match": 18.0,
"eval_f1": 25.051058201058197,
"eval_qa_bleu": 10.352483602423003,
"eval_qa_exact_match": 0.17,
"eval_recite_bleu": 46.26228887496748,
"eval_recite_exact_match": 0.04666666666666667,
"step": 1056
},
{
"epoch": 8.12,
"learning_rate": 3e-05,
"loss": 0.11,
"step": 1064
},
{
"epoch": 8.23,
"learning_rate": 3e-05,
"loss": 0.1045,
"step": 1078
},
{
"epoch": 8.34,
"learning_rate": 3e-05,
"loss": 0.1179,
"step": 1092
},
{
"epoch": 8.44,
"learning_rate": 3e-05,
"loss": 0.1083,
"step": 1106
},
{
"epoch": 8.55,
"learning_rate": 3e-05,
"loss": 0.1139,
"step": 1120
},
{
"epoch": 8.56,
"eval_accuracy": 0.7406501698204755,
"eval_loss": 0.40691348910331726,
"eval_runtime": 9.0014,
"eval_samples_per_second": 33.328,
"eval_steps_per_second": 2.111,
"step": 1122
},
{
"epoch": 8.56,
"eval_exact_match": 16.333333333333332,
"eval_f1": 23.766955266955264,
"eval_qa_bleu": 11.966740392922118,
"eval_qa_exact_match": 0.15333333333333332,
"eval_recite_bleu": 45.74271226096357,
"eval_recite_exact_match": 0.03333333333333333,
"step": 1122
},
{
"epoch": 8.66,
"learning_rate": 3e-05,
"loss": 0.1108,
"step": 1134
},
{
"epoch": 8.76,
"learning_rate": 3e-05,
"loss": 0.1168,
"step": 1148
},
{
"epoch": 8.87,
"learning_rate": 3e-05,
"loss": 0.1132,
"step": 1162
},
{
"epoch": 8.98,
"learning_rate": 3e-05,
"loss": 0.1141,
"step": 1176
},
{
"epoch": 9.07,
"eval_accuracy": 0.7409364386220282,
"eval_loss": 0.40822312235832214,
"eval_runtime": 9.119,
"eval_samples_per_second": 32.898,
"eval_steps_per_second": 2.084,
"step": 1188
},
{
"epoch": 9.07,
"eval_exact_match": 17.333333333333332,
"eval_f1": 23.91428571428571,
"eval_qa_bleu": 11.956602534707741,
"eval_qa_exact_match": 0.16666666666666666,
"eval_recite_bleu": 49.23892010841006,
"eval_recite_exact_match": 0.056666666666666664,
"step": 1188
},
{
"epoch": 9.08,
"learning_rate": 3e-05,
"loss": 0.0925,
"step": 1190
},
{
"epoch": 9.19,
"learning_rate": 3e-05,
"loss": 0.0918,
"step": 1204
},
{
"epoch": 9.3,
"learning_rate": 3e-05,
"loss": 0.0975,
"step": 1218
},
{
"epoch": 9.4,
"learning_rate": 3e-05,
"loss": 0.0998,
"step": 1232
},
{
"epoch": 9.51,
"learning_rate": 3e-05,
"loss": 0.0994,
"step": 1246
},
{
"epoch": 9.57,
"eval_accuracy": 0.7412130033964095,
"eval_loss": 0.40646126866340637,
"eval_runtime": 9.2467,
"eval_samples_per_second": 32.444,
"eval_steps_per_second": 2.055,
"step": 1254
},
{
"epoch": 9.57,
"eval_exact_match": 18.0,
"eval_f1": 25.15901875901876,
"eval_qa_bleu": 9.885421847181547,
"eval_qa_exact_match": 0.16666666666666666,
"eval_recite_bleu": 48.32004544781462,
"eval_recite_exact_match": 0.06,
"step": 1254
},
{
"epoch": 9.62,
"learning_rate": 3e-05,
"loss": 0.0972,
"step": 1260
},
{
"epoch": 9.73,
"learning_rate": 3e-05,
"loss": 0.1074,
"step": 1274
},
{
"epoch": 9.83,
"learning_rate": 3e-05,
"loss": 0.0998,
"step": 1288
},
{
"epoch": 9.94,
"learning_rate": 3e-05,
"loss": 0.1094,
"step": 1302
},
{
"epoch": 10.0,
"step": 1310,
"total_flos": 5.95362899625984e+16,
"train_loss": 0.5945830505312855,
"train_runtime": 4268.7073,
"train_samples_per_second": 4.905,
"train_steps_per_second": 0.307
}
],
"logging_steps": 14,
"max_steps": 1310,
"num_train_epochs": 10,
"save_steps": 500,
"total_flos": 5.95362899625984e+16,
"trial_name": null,
"trial_params": null
}