tyzhu's picture
End of training
a8ed472 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 10.0,
"eval_steps": 500,
"global_step": 2110,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.1,
"learning_rate": 6.226415094339623e-06,
"loss": 1.1807,
"step": 22
},
{
"epoch": 0.21,
"learning_rate": 1.2452830188679246e-05,
"loss": 1.1325,
"step": 44
},
{
"epoch": 0.31,
"learning_rate": 1.8679245283018867e-05,
"loss": 0.9876,
"step": 66
},
{
"epoch": 0.42,
"learning_rate": 2.4905660377358492e-05,
"loss": 0.873,
"step": 88
},
{
"epoch": 0.52,
"learning_rate": 2.994011976047904e-05,
"loss": 0.7931,
"step": 110
},
{
"epoch": 0.63,
"learning_rate": 2.9610778443113774e-05,
"loss": 0.7764,
"step": 132
},
{
"epoch": 0.73,
"learning_rate": 2.9281437125748504e-05,
"loss": 0.7701,
"step": 154
},
{
"epoch": 0.83,
"learning_rate": 2.8952095808383233e-05,
"loss": 0.739,
"step": 176
},
{
"epoch": 0.94,
"learning_rate": 2.8622754491017966e-05,
"loss": 0.7644,
"step": 198
},
{
"epoch": 1.0,
"eval_accuracy": 0.6429210134128167,
"eval_loss": 1.0036486387252808,
"eval_runtime": 6.1897,
"eval_samples_per_second": 32.312,
"eval_steps_per_second": 2.1,
"step": 211
},
{
"epoch": 1.0,
"eval_exact_match": 14.5,
"eval_f1": 19.727380952380955,
"eval_qa_bleu": 9.309381343075087,
"eval_qa_exact_match": 0.14,
"eval_recite_bleu": 15.818078125717689,
"eval_recite_exact_match": 0.0,
"step": 211
},
{
"epoch": 1.04,
"learning_rate": 2.8293413173652696e-05,
"loss": 0.6488,
"step": 220
},
{
"epoch": 1.15,
"learning_rate": 2.7964071856287425e-05,
"loss": 0.5286,
"step": 242
},
{
"epoch": 1.25,
"learning_rate": 2.763473053892216e-05,
"loss": 0.5157,
"step": 264
},
{
"epoch": 1.36,
"learning_rate": 2.7305389221556884e-05,
"loss": 0.4838,
"step": 286
},
{
"epoch": 1.46,
"learning_rate": 2.6976047904191617e-05,
"loss": 0.5013,
"step": 308
},
{
"epoch": 1.56,
"learning_rate": 2.664670658682635e-05,
"loss": 0.4649,
"step": 330
},
{
"epoch": 1.67,
"learning_rate": 2.6317365269461076e-05,
"loss": 0.4604,
"step": 352
},
{
"epoch": 1.77,
"learning_rate": 2.598802395209581e-05,
"loss": 0.5064,
"step": 374
},
{
"epoch": 1.88,
"learning_rate": 2.565868263473054e-05,
"loss": 0.4732,
"step": 396
},
{
"epoch": 1.98,
"learning_rate": 2.5329341317365268e-05,
"loss": 0.526,
"step": 418
},
{
"epoch": 2.0,
"eval_accuracy": 0.6657600596125186,
"eval_loss": 0.7350580096244812,
"eval_runtime": 6.5425,
"eval_samples_per_second": 30.569,
"eval_steps_per_second": 1.987,
"step": 422
},
{
"epoch": 2.0,
"eval_exact_match": 12.5,
"eval_f1": 19.69947691197691,
"eval_qa_bleu": 9.452490656702446,
"eval_qa_exact_match": 0.12,
"eval_recite_bleu": 17.493375608153375,
"eval_recite_exact_match": 0.0,
"step": 422
},
{
"epoch": 2.09,
"learning_rate": 2.5e-05,
"loss": 0.4741,
"step": 440
},
{
"epoch": 2.19,
"learning_rate": 2.467065868263473e-05,
"loss": 0.4338,
"step": 462
},
{
"epoch": 2.29,
"learning_rate": 2.4341317365269464e-05,
"loss": 0.4451,
"step": 484
},
{
"epoch": 2.4,
"learning_rate": 2.4011976047904193e-05,
"loss": 0.4312,
"step": 506
},
{
"epoch": 2.5,
"learning_rate": 2.3682634730538923e-05,
"loss": 0.4363,
"step": 528
},
{
"epoch": 2.61,
"learning_rate": 2.3353293413173656e-05,
"loss": 0.4302,
"step": 550
},
{
"epoch": 2.71,
"learning_rate": 2.302395209580838e-05,
"loss": 0.4528,
"step": 572
},
{
"epoch": 2.82,
"learning_rate": 2.2694610778443115e-05,
"loss": 0.4086,
"step": 594
},
{
"epoch": 2.92,
"learning_rate": 2.2365269461077847e-05,
"loss": 0.4163,
"step": 616
},
{
"epoch": 3.0,
"eval_accuracy": 0.6815499254843517,
"eval_loss": 0.5743635296821594,
"eval_runtime": 6.2699,
"eval_samples_per_second": 31.898,
"eval_steps_per_second": 2.073,
"step": 633
},
{
"epoch": 3.0,
"eval_exact_match": 13.0,
"eval_f1": 20.81829004329004,
"eval_qa_bleu": 7.245823931129993,
"eval_qa_exact_match": 0.12,
"eval_recite_bleu": 22.248138618982026,
"eval_recite_exact_match": 0.0,
"step": 633
},
{
"epoch": 3.02,
"learning_rate": 2.2035928143712574e-05,
"loss": 0.3866,
"step": 638
},
{
"epoch": 3.13,
"learning_rate": 2.1706586826347306e-05,
"loss": 0.2789,
"step": 660
},
{
"epoch": 3.23,
"learning_rate": 2.1377245508982036e-05,
"loss": 0.2932,
"step": 682
},
{
"epoch": 3.34,
"learning_rate": 2.1047904191616766e-05,
"loss": 0.2883,
"step": 704
},
{
"epoch": 3.44,
"learning_rate": 2.07185628742515e-05,
"loss": 0.3048,
"step": 726
},
{
"epoch": 3.55,
"learning_rate": 2.0389221556886228e-05,
"loss": 0.28,
"step": 748
},
{
"epoch": 3.65,
"learning_rate": 2.0059880239520957e-05,
"loss": 0.2847,
"step": 770
},
{
"epoch": 3.75,
"learning_rate": 1.9730538922155687e-05,
"loss": 0.2949,
"step": 792
},
{
"epoch": 3.86,
"learning_rate": 1.940119760479042e-05,
"loss": 0.3002,
"step": 814
},
{
"epoch": 3.96,
"learning_rate": 1.9071856287425153e-05,
"loss": 0.2864,
"step": 836
},
{
"epoch": 4.0,
"eval_accuracy": 0.689903129657228,
"eval_loss": 0.4952711760997772,
"eval_runtime": 6.337,
"eval_samples_per_second": 31.561,
"eval_steps_per_second": 2.051,
"step": 844
},
{
"epoch": 4.0,
"eval_exact_match": 17.0,
"eval_f1": 22.154761904761905,
"eval_qa_bleu": 10.09488722739415,
"eval_qa_exact_match": 0.155,
"eval_recite_bleu": 27.91257178143564,
"eval_recite_exact_match": 0.005,
"step": 844
},
{
"epoch": 4.07,
"learning_rate": 1.874251497005988e-05,
"loss": 0.2368,
"step": 858
},
{
"epoch": 4.17,
"learning_rate": 1.8413173652694612e-05,
"loss": 0.203,
"step": 880
},
{
"epoch": 4.27,
"learning_rate": 1.8083832335329345e-05,
"loss": 0.2092,
"step": 902
},
{
"epoch": 4.38,
"learning_rate": 1.775449101796407e-05,
"loss": 0.2188,
"step": 924
},
{
"epoch": 4.48,
"learning_rate": 1.7425149700598804e-05,
"loss": 0.2092,
"step": 946
},
{
"epoch": 4.59,
"learning_rate": 1.7095808383233533e-05,
"loss": 0.2144,
"step": 968
},
{
"epoch": 4.69,
"learning_rate": 1.6766467065868263e-05,
"loss": 0.2061,
"step": 990
},
{
"epoch": 4.8,
"learning_rate": 1.6437125748502996e-05,
"loss": 0.2161,
"step": 1012
},
{
"epoch": 4.9,
"learning_rate": 1.6107784431137725e-05,
"loss": 0.2118,
"step": 1034
},
{
"epoch": 5.0,
"eval_accuracy": 0.6943889716840537,
"eval_loss": 0.45938587188720703,
"eval_runtime": 6.2541,
"eval_samples_per_second": 31.979,
"eval_steps_per_second": 2.079,
"step": 1055
},
{
"epoch": 5.0,
"eval_exact_match": 16.0,
"eval_f1": 22.22857142857142,
"eval_qa_bleu": 10.69246525438524,
"eval_qa_exact_match": 0.14,
"eval_recite_bleu": 33.64683237936858,
"eval_recite_exact_match": 0.025,
"step": 1055
},
{
"epoch": 5.0,
"learning_rate": 1.5778443113772455e-05,
"loss": 0.2129,
"step": 1056
},
{
"epoch": 5.11,
"learning_rate": 1.5449101796407184e-05,
"loss": 0.1559,
"step": 1078
},
{
"epoch": 5.21,
"learning_rate": 1.5119760479041917e-05,
"loss": 0.1631,
"step": 1100
},
{
"epoch": 5.32,
"learning_rate": 1.4790419161676647e-05,
"loss": 0.1614,
"step": 1122
},
{
"epoch": 5.42,
"learning_rate": 1.4461077844311378e-05,
"loss": 0.1636,
"step": 1144
},
{
"epoch": 5.53,
"learning_rate": 1.4131736526946109e-05,
"loss": 0.1645,
"step": 1166
},
{
"epoch": 5.63,
"learning_rate": 1.3802395209580839e-05,
"loss": 0.1613,
"step": 1188
},
{
"epoch": 5.73,
"learning_rate": 1.3473053892215568e-05,
"loss": 0.1724,
"step": 1210
},
{
"epoch": 5.84,
"learning_rate": 1.31437125748503e-05,
"loss": 0.1643,
"step": 1232
},
{
"epoch": 5.94,
"learning_rate": 1.281437125748503e-05,
"loss": 0.17,
"step": 1254
},
{
"epoch": 6.0,
"eval_accuracy": 0.6964456035767511,
"eval_loss": 0.44896385073661804,
"eval_runtime": 6.3718,
"eval_samples_per_second": 31.389,
"eval_steps_per_second": 2.04,
"step": 1266
},
{
"epoch": 6.0,
"eval_exact_match": 15.0,
"eval_f1": 21.070238095238086,
"eval_qa_bleu": 11.294234950233255,
"eval_qa_exact_match": 0.13,
"eval_recite_bleu": 35.4728355349377,
"eval_recite_exact_match": 0.025,
"step": 1266
},
{
"epoch": 6.05,
"learning_rate": 1.2485029940119762e-05,
"loss": 0.1441,
"step": 1276
},
{
"epoch": 6.15,
"learning_rate": 1.2155688622754491e-05,
"loss": 0.1356,
"step": 1298
},
{
"epoch": 6.26,
"learning_rate": 1.182634730538922e-05,
"loss": 0.1363,
"step": 1320
},
{
"epoch": 6.36,
"learning_rate": 1.1497005988023954e-05,
"loss": 0.137,
"step": 1342
},
{
"epoch": 6.46,
"learning_rate": 1.1167664670658683e-05,
"loss": 0.1382,
"step": 1364
},
{
"epoch": 6.57,
"learning_rate": 1.0838323353293413e-05,
"loss": 0.1416,
"step": 1386
},
{
"epoch": 6.67,
"learning_rate": 1.0508982035928144e-05,
"loss": 0.1389,
"step": 1408
},
{
"epoch": 6.78,
"learning_rate": 1.0179640718562873e-05,
"loss": 0.1392,
"step": 1430
},
{
"epoch": 6.88,
"learning_rate": 9.850299401197606e-06,
"loss": 0.1398,
"step": 1452
},
{
"epoch": 6.99,
"learning_rate": 9.520958083832336e-06,
"loss": 0.134,
"step": 1474
},
{
"epoch": 7.0,
"eval_accuracy": 0.697876304023845,
"eval_loss": 0.43685418367385864,
"eval_runtime": 6.2539,
"eval_samples_per_second": 31.98,
"eval_steps_per_second": 2.079,
"step": 1477
},
{
"epoch": 7.0,
"eval_exact_match": 16.5,
"eval_f1": 22.119444444444444,
"eval_qa_bleu": 13.989667861778496,
"eval_qa_exact_match": 0.14,
"eval_recite_bleu": 42.188868266934165,
"eval_recite_exact_match": 0.04,
"step": 1477
},
{
"epoch": 7.09,
"learning_rate": 9.191616766467065e-06,
"loss": 0.1188,
"step": 1496
},
{
"epoch": 7.19,
"learning_rate": 8.862275449101796e-06,
"loss": 0.1207,
"step": 1518
},
{
"epoch": 7.3,
"learning_rate": 8.532934131736528e-06,
"loss": 0.1223,
"step": 1540
},
{
"epoch": 7.4,
"learning_rate": 8.203592814371257e-06,
"loss": 0.115,
"step": 1562
},
{
"epoch": 7.51,
"learning_rate": 7.874251497005988e-06,
"loss": 0.1139,
"step": 1584
},
{
"epoch": 7.61,
"learning_rate": 7.544910179640718e-06,
"loss": 0.117,
"step": 1606
},
{
"epoch": 7.72,
"learning_rate": 7.215568862275449e-06,
"loss": 0.1203,
"step": 1628
},
{
"epoch": 7.82,
"learning_rate": 6.8862275449101795e-06,
"loss": 0.1229,
"step": 1650
},
{
"epoch": 7.92,
"learning_rate": 6.556886227544911e-06,
"loss": 0.1206,
"step": 1672
},
{
"epoch": 8.0,
"eval_accuracy": 0.6986959761549926,
"eval_loss": 0.43722403049468994,
"eval_runtime": 6.191,
"eval_samples_per_second": 32.305,
"eval_steps_per_second": 2.1,
"step": 1688
},
{
"epoch": 8.0,
"eval_exact_match": 20.0,
"eval_f1": 25.83571428571428,
"eval_qa_bleu": 8.333490045944334,
"eval_qa_exact_match": 0.18,
"eval_recite_bleu": 43.68492978075338,
"eval_recite_exact_match": 0.065,
"step": 1688
},
{
"epoch": 8.03,
"learning_rate": 6.22754491017964e-06,
"loss": 0.1145,
"step": 1694
},
{
"epoch": 8.13,
"learning_rate": 5.898203592814371e-06,
"loss": 0.1066,
"step": 1716
},
{
"epoch": 8.24,
"learning_rate": 5.568862275449102e-06,
"loss": 0.1064,
"step": 1738
},
{
"epoch": 8.34,
"learning_rate": 5.239520958083833e-06,
"loss": 0.1077,
"step": 1760
},
{
"epoch": 8.45,
"learning_rate": 4.9101796407185625e-06,
"loss": 0.1108,
"step": 1782
},
{
"epoch": 8.55,
"learning_rate": 4.580838323353294e-06,
"loss": 0.1078,
"step": 1804
},
{
"epoch": 8.65,
"learning_rate": 4.251497005988024e-06,
"loss": 0.1072,
"step": 1826
},
{
"epoch": 8.76,
"learning_rate": 3.922155688622755e-06,
"loss": 0.1098,
"step": 1848
},
{
"epoch": 8.86,
"learning_rate": 3.592814371257485e-06,
"loss": 0.1093,
"step": 1870
},
{
"epoch": 8.97,
"learning_rate": 3.2634730538922155e-06,
"loss": 0.1081,
"step": 1892
},
{
"epoch": 9.0,
"eval_accuracy": 0.698725782414307,
"eval_loss": 0.44227516651153564,
"eval_runtime": 6.2856,
"eval_samples_per_second": 31.819,
"eval_steps_per_second": 2.068,
"step": 1899
},
{
"epoch": 9.0,
"eval_exact_match": 17.5,
"eval_f1": 22.30357142857143,
"eval_qa_bleu": 10.817736383091892,
"eval_qa_exact_match": 0.15,
"eval_recite_bleu": 43.37666725316897,
"eval_recite_exact_match": 0.06,
"step": 1899
},
{
"epoch": 9.07,
"learning_rate": 2.9341317365269463e-06,
"loss": 0.1079,
"step": 1914
},
{
"epoch": 9.18,
"learning_rate": 2.6047904191616767e-06,
"loss": 0.102,
"step": 1936
},
{
"epoch": 9.28,
"learning_rate": 2.2754491017964075e-06,
"loss": 0.1003,
"step": 1958
},
{
"epoch": 9.38,
"learning_rate": 1.9461077844311374e-06,
"loss": 0.1023,
"step": 1980
},
{
"epoch": 9.49,
"learning_rate": 1.6167664670658684e-06,
"loss": 0.1003,
"step": 2002
},
{
"epoch": 9.59,
"learning_rate": 1.287425149700599e-06,
"loss": 0.1058,
"step": 2024
},
{
"epoch": 9.7,
"learning_rate": 9.580838323353293e-07,
"loss": 0.1067,
"step": 2046
},
{
"epoch": 9.8,
"learning_rate": 6.287425149700599e-07,
"loss": 0.1047,
"step": 2068
},
{
"epoch": 9.91,
"learning_rate": 2.994011976047904e-07,
"loss": 0.1053,
"step": 2090
},
{
"epoch": 10.0,
"eval_accuracy": 0.6988971684053651,
"eval_loss": 0.4435840845108032,
"eval_runtime": 6.3653,
"eval_samples_per_second": 31.42,
"eval_steps_per_second": 2.042,
"step": 2110
},
{
"epoch": 10.0,
"eval_exact_match": 20.5,
"eval_f1": 25.95833333333333,
"eval_qa_bleu": 12.082719988904218,
"eval_qa_exact_match": 0.18,
"eval_recite_bleu": 44.74289478974747,
"eval_recite_exact_match": 0.075,
"step": 2110
},
{
"epoch": 10.0,
"step": 2110,
"total_flos": 9.64020042848256e+16,
"train_loss": 0.29445283661521443,
"train_runtime": 3197.9615,
"train_samples_per_second": 10.547,
"train_steps_per_second": 0.66
}
],
"logging_steps": 22,
"max_steps": 2110,
"num_train_epochs": 10,
"save_steps": 500,
"total_flos": 9.64020042848256e+16,
"trial_name": null,
"trial_params": null
}