tyzhu's picture
End of training
c1f7f62 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 20.0,
"eval_steps": 1452,
"global_step": 29040,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.2,
"learning_rate": 3e-05,
"loss": 2.2721,
"step": 291
},
{
"epoch": 0.4,
"learning_rate": 3e-05,
"loss": 2.0539,
"step": 582
},
{
"epoch": 0.6,
"learning_rate": 3e-05,
"loss": 2.0079,
"step": 873
},
{
"epoch": 0.8,
"learning_rate": 3e-05,
"loss": 1.9686,
"step": 1164
},
{
"epoch": 1.0,
"eval_accuracy": 0.668149394347241,
"eval_loss": 1.5662202835083008,
"eval_runtime": 137.7572,
"eval_samples_per_second": 36.296,
"eval_steps_per_second": 2.272,
"step": 1452
},
{
"epoch": 1.0,
"eval_exact_match": 9.04,
"eval_f1": 13.087046780017364,
"eval_qa_bleu": 5.297766450359973,
"eval_qa_exact_match": 0.085,
"eval_recite_bleu": 14.716037077738477,
"eval_recite_exact_match": 0.0,
"step": 1452
},
{
"epoch": 1.0,
"learning_rate": 3e-05,
"loss": 1.9076,
"step": 1455
},
{
"epoch": 1.2,
"learning_rate": 3e-05,
"loss": 1.5211,
"step": 1746
},
{
"epoch": 1.4,
"learning_rate": 3e-05,
"loss": 1.5039,
"step": 2037
},
{
"epoch": 1.6,
"learning_rate": 3e-05,
"loss": 1.4834,
"step": 2328
},
{
"epoch": 1.8,
"learning_rate": 3e-05,
"loss": 1.4604,
"step": 2619
},
{
"epoch": 2.0,
"eval_accuracy": 0.6914514131897712,
"eval_loss": 1.206884741783142,
"eval_runtime": 139.7305,
"eval_samples_per_second": 35.783,
"eval_steps_per_second": 2.24,
"step": 2904
},
{
"epoch": 2.0,
"eval_exact_match": 11.64,
"eval_f1": 16.917593370997537,
"eval_qa_bleu": 7.627957176996819,
"eval_qa_exact_match": 0.1106,
"eval_recite_bleu": 18.356743056908567,
"eval_recite_exact_match": 0.0,
"step": 2904
},
{
"epoch": 2.0,
"learning_rate": 3e-05,
"loss": 1.4368,
"step": 2910
},
{
"epoch": 2.2,
"learning_rate": 3e-05,
"loss": 1.0705,
"step": 3201
},
{
"epoch": 2.4,
"learning_rate": 3e-05,
"loss": 1.0685,
"step": 3492
},
{
"epoch": 2.61,
"learning_rate": 3e-05,
"loss": 1.0554,
"step": 3783
},
{
"epoch": 2.81,
"learning_rate": 3e-05,
"loss": 1.0522,
"step": 4074
},
{
"epoch": 3.0,
"eval_accuracy": 0.7148896366083446,
"eval_loss": 0.9045500159263611,
"eval_runtime": 141.6455,
"eval_samples_per_second": 35.299,
"eval_steps_per_second": 2.21,
"step": 4356
},
{
"epoch": 3.0,
"eval_exact_match": 12.84,
"eval_f1": 18.555356874635468,
"eval_qa_bleu": 8.395879921702353,
"eval_qa_exact_match": 0.1188,
"eval_recite_bleu": 20.824221134285214,
"eval_recite_exact_match": 0.0,
"step": 4356
},
{
"epoch": 3.01,
"learning_rate": 3e-05,
"loss": 1.0375,
"step": 4365
},
{
"epoch": 3.21,
"learning_rate": 3e-05,
"loss": 0.7373,
"step": 4656
},
{
"epoch": 3.41,
"learning_rate": 3e-05,
"loss": 0.7497,
"step": 4947
},
{
"epoch": 3.61,
"learning_rate": 3e-05,
"loss": 0.7479,
"step": 5238
},
{
"epoch": 3.81,
"learning_rate": 3e-05,
"loss": 0.7455,
"step": 5529
},
{
"epoch": 4.0,
"eval_accuracy": 0.7340508748317631,
"eval_loss": 0.6843858957290649,
"eval_runtime": 143.5169,
"eval_samples_per_second": 34.839,
"eval_steps_per_second": 2.181,
"step": 5808
},
{
"epoch": 4.0,
"eval_exact_match": 13.44,
"eval_f1": 19.418847592695993,
"eval_qa_bleu": 8.731752840499793,
"eval_qa_exact_match": 0.1256,
"eval_recite_bleu": 24.11683390270685,
"eval_recite_exact_match": 0.0002,
"step": 5808
},
{
"epoch": 4.01,
"learning_rate": 3e-05,
"loss": 0.7226,
"step": 5820
},
{
"epoch": 4.21,
"learning_rate": 3e-05,
"loss": 0.5117,
"step": 6111
},
{
"epoch": 4.41,
"learning_rate": 3e-05,
"loss": 0.5208,
"step": 6402
},
{
"epoch": 4.61,
"learning_rate": 3e-05,
"loss": 0.5271,
"step": 6693
},
{
"epoch": 4.81,
"learning_rate": 3e-05,
"loss": 0.5307,
"step": 6984
},
{
"epoch": 5.0,
"eval_accuracy": 0.7474745625841185,
"eval_loss": 0.5420497059822083,
"eval_runtime": 144.0475,
"eval_samples_per_second": 34.711,
"eval_steps_per_second": 2.173,
"step": 7260
},
{
"epoch": 5.0,
"eval_exact_match": 15.08,
"eval_f1": 21.4491845879152,
"eval_qa_bleu": 10.89471390420015,
"eval_qa_exact_match": 0.1434,
"eval_recite_bleu": 27.62075386163555,
"eval_recite_exact_match": 0.0014,
"step": 7260
},
{
"epoch": 5.01,
"learning_rate": 3e-05,
"loss": 0.5165,
"step": 7275
},
{
"epoch": 5.21,
"learning_rate": 3e-05,
"loss": 0.3624,
"step": 7566
},
{
"epoch": 5.41,
"learning_rate": 3e-05,
"loss": 0.3748,
"step": 7857
},
{
"epoch": 5.61,
"learning_rate": 3e-05,
"loss": 0.383,
"step": 8148
},
{
"epoch": 5.81,
"learning_rate": 3e-05,
"loss": 0.3796,
"step": 8439
},
{
"epoch": 6.0,
"eval_accuracy": 0.7560010767160161,
"eval_loss": 0.4608670473098755,
"eval_runtime": 144.2314,
"eval_samples_per_second": 34.667,
"eval_steps_per_second": 2.17,
"step": 8712
},
{
"epoch": 6.0,
"eval_exact_match": 16.22,
"eval_f1": 22.5212141898349,
"eval_qa_bleu": 11.517856911949949,
"eval_qa_exact_match": 0.1524,
"eval_recite_bleu": 32.1802083615024,
"eval_recite_exact_match": 0.0044,
"step": 8712
},
{
"epoch": 6.01,
"learning_rate": 3e-05,
"loss": 0.3803,
"step": 8730
},
{
"epoch": 6.21,
"learning_rate": 3e-05,
"loss": 0.27,
"step": 9021
},
{
"epoch": 6.41,
"learning_rate": 3e-05,
"loss": 0.28,
"step": 9312
},
{
"epoch": 6.61,
"learning_rate": 3e-05,
"loss": 0.2889,
"step": 9603
},
{
"epoch": 6.81,
"learning_rate": 3e-05,
"loss": 0.2912,
"step": 9894
},
{
"epoch": 7.0,
"eval_accuracy": 0.7603886944818304,
"eval_loss": 0.43113213777542114,
"eval_runtime": 144.1381,
"eval_samples_per_second": 34.689,
"eval_steps_per_second": 2.172,
"step": 10164
},
{
"epoch": 7.0,
"eval_exact_match": 17.64,
"eval_f1": 24.23915265275915,
"eval_qa_bleu": 12.404229060978194,
"eval_qa_exact_match": 0.1648,
"eval_recite_bleu": 37.23348279932964,
"eval_recite_exact_match": 0.0102,
"step": 10164
},
{
"epoch": 7.01,
"learning_rate": 3e-05,
"loss": 0.2835,
"step": 10185
},
{
"epoch": 7.21,
"learning_rate": 3e-05,
"loss": 0.2131,
"step": 10476
},
{
"epoch": 7.42,
"learning_rate": 3e-05,
"loss": 0.2213,
"step": 10767
},
{
"epoch": 7.62,
"learning_rate": 3e-05,
"loss": 0.226,
"step": 11058
},
{
"epoch": 7.82,
"learning_rate": 3e-05,
"loss": 0.2282,
"step": 11349
},
{
"epoch": 8.0,
"eval_accuracy": 0.7626543741588157,
"eval_loss": 0.41835975646972656,
"eval_runtime": 144.0991,
"eval_samples_per_second": 34.698,
"eval_steps_per_second": 2.172,
"step": 11616
},
{
"epoch": 8.0,
"eval_exact_match": 17.78,
"eval_f1": 25.32648324184451,
"eval_qa_bleu": 12.460459536013193,
"eval_qa_exact_match": 0.1676,
"eval_recite_bleu": 40.14518456576004,
"eval_recite_exact_match": 0.0168,
"step": 11616
},
{
"epoch": 8.02,
"learning_rate": 3e-05,
"loss": 0.1645,
"step": 11640
},
{
"epoch": 8.22,
"learning_rate": 3e-05,
"loss": 0.1768,
"step": 11931
},
{
"epoch": 8.42,
"learning_rate": 3e-05,
"loss": 0.1851,
"step": 12222
},
{
"epoch": 8.62,
"learning_rate": 3e-05,
"loss": 0.1895,
"step": 12513
},
{
"epoch": 8.82,
"learning_rate": 3e-05,
"loss": 0.1905,
"step": 12804
},
{
"epoch": 9.0,
"eval_accuracy": 0.7639776581426648,
"eval_loss": 0.4136393666267395,
"eval_runtime": 146.8079,
"eval_samples_per_second": 34.058,
"eval_steps_per_second": 2.132,
"step": 13068
},
{
"epoch": 9.0,
"eval_exact_match": 18.58,
"eval_f1": 26.144983963782675,
"eval_qa_bleu": 12.723130940050861,
"eval_qa_exact_match": 0.1766,
"eval_recite_bleu": 44.36820281112791,
"eval_recite_exact_match": 0.0212,
"step": 13068
},
{
"epoch": 9.02,
"learning_rate": 3e-05,
"loss": 0.1919,
"step": 13095
},
{
"epoch": 9.22,
"learning_rate": 3e-05,
"loss": 0.156,
"step": 13386
},
{
"epoch": 9.42,
"learning_rate": 3e-05,
"loss": 0.158,
"step": 13677
},
{
"epoch": 9.62,
"learning_rate": 3e-05,
"loss": 0.1656,
"step": 13968
},
{
"epoch": 9.82,
"learning_rate": 3e-05,
"loss": 0.1687,
"step": 14259
},
{
"epoch": 10.0,
"eval_accuracy": 0.7647776581426649,
"eval_loss": 0.41753506660461426,
"eval_runtime": 149.0935,
"eval_samples_per_second": 33.536,
"eval_steps_per_second": 2.099,
"step": 14520
},
{
"epoch": 10.0,
"eval_exact_match": 19.8,
"eval_f1": 27.235079497903637,
"eval_qa_bleu": 13.874335090146856,
"eval_qa_exact_match": 0.1886,
"eval_recite_bleu": 46.53984717815193,
"eval_recite_exact_match": 0.0276,
"step": 14520
},
{
"epoch": 10.02,
"learning_rate": 3e-05,
"loss": 0.1684,
"step": 14550
},
{
"epoch": 10.22,
"learning_rate": 3e-05,
"loss": 0.1396,
"step": 14841
},
{
"epoch": 10.42,
"learning_rate": 3e-05,
"loss": 0.1457,
"step": 15132
},
{
"epoch": 10.62,
"learning_rate": 3e-05,
"loss": 0.1542,
"step": 15423
},
{
"epoch": 10.82,
"learning_rate": 3e-05,
"loss": 0.1553,
"step": 15714
},
{
"epoch": 11.0,
"eval_accuracy": 0.7651154777927321,
"eval_loss": 0.4211733937263489,
"eval_runtime": 150.5664,
"eval_samples_per_second": 33.208,
"eval_steps_per_second": 2.079,
"step": 15972
},
{
"epoch": 11.0,
"eval_exact_match": 19.1,
"eval_f1": 26.696266939458862,
"eval_qa_bleu": 14.962573389382232,
"eval_qa_exact_match": 0.1814,
"eval_recite_bleu": 46.889182609591046,
"eval_recite_exact_match": 0.032,
"step": 15972
},
{
"epoch": 11.02,
"learning_rate": 3e-05,
"loss": 0.1549,
"step": 16005
},
{
"epoch": 11.22,
"learning_rate": 3e-05,
"loss": 0.1335,
"step": 16296
},
{
"epoch": 11.42,
"learning_rate": 3e-05,
"loss": 0.1381,
"step": 16587
},
{
"epoch": 11.62,
"learning_rate": 3e-05,
"loss": 0.143,
"step": 16878
},
{
"epoch": 11.82,
"learning_rate": 3e-05,
"loss": 0.1447,
"step": 17169
},
{
"epoch": 12.0,
"eval_accuracy": 0.7653222072678331,
"eval_loss": 0.4283375144004822,
"eval_runtime": 152.1173,
"eval_samples_per_second": 32.869,
"eval_steps_per_second": 2.058,
"step": 17424
},
{
"epoch": 12.0,
"eval_exact_match": 19.7,
"eval_f1": 27.086380949947664,
"eval_qa_bleu": 13.161442853987554,
"eval_qa_exact_match": 0.1884,
"eval_recite_bleu": 48.94647260503517,
"eval_recite_exact_match": 0.0318,
"step": 17424
},
{
"epoch": 12.02,
"learning_rate": 3e-05,
"loss": 0.1449,
"step": 17460
},
{
"epoch": 12.23,
"learning_rate": 3e-05,
"loss": 0.1267,
"step": 17751
},
{
"epoch": 12.43,
"learning_rate": 3e-05,
"loss": 0.131,
"step": 18042
},
{
"epoch": 12.63,
"learning_rate": 3e-05,
"loss": 0.1366,
"step": 18333
},
{
"epoch": 12.83,
"learning_rate": 3e-05,
"loss": 0.1388,
"step": 18624
},
{
"epoch": 13.0,
"eval_accuracy": 0.7655897711978465,
"eval_loss": 0.4287155270576477,
"eval_runtime": 152.6887,
"eval_samples_per_second": 32.746,
"eval_steps_per_second": 2.05,
"step": 18876
},
{
"epoch": 13.0,
"eval_exact_match": 20.12,
"eval_f1": 27.673234730738653,
"eval_qa_bleu": 14.175044837001105,
"eval_qa_exact_match": 0.191,
"eval_recite_bleu": 49.79523539787241,
"eval_recite_exact_match": 0.0446,
"step": 18876
},
{
"epoch": 13.03,
"learning_rate": 3e-05,
"loss": 0.1393,
"step": 18915
},
{
"epoch": 13.23,
"learning_rate": 3e-05,
"loss": 0.1243,
"step": 19206
},
{
"epoch": 13.43,
"learning_rate": 3e-05,
"loss": 0.1277,
"step": 19497
},
{
"epoch": 13.63,
"learning_rate": 3e-05,
"loss": 0.1324,
"step": 19788
},
{
"epoch": 13.83,
"learning_rate": 3e-05,
"loss": 0.1329,
"step": 20079
},
{
"epoch": 14.0,
"eval_accuracy": 0.7656621803499327,
"eval_loss": 0.4349338412284851,
"eval_runtime": 153.0165,
"eval_samples_per_second": 32.676,
"eval_steps_per_second": 2.046,
"step": 20328
},
{
"epoch": 14.0,
"eval_exact_match": 19.74,
"eval_f1": 27.539494437309155,
"eval_qa_bleu": 14.653456371114471,
"eval_qa_exact_match": 0.1874,
"eval_recite_bleu": 49.667204132453634,
"eval_recite_exact_match": 0.0442,
"step": 20328
},
{
"epoch": 14.03,
"learning_rate": 3e-05,
"loss": 0.1324,
"step": 20370
},
{
"epoch": 14.23,
"learning_rate": 3e-05,
"loss": 0.1191,
"step": 20661
},
{
"epoch": 14.43,
"learning_rate": 3e-05,
"loss": 0.1228,
"step": 20952
},
{
"epoch": 14.63,
"learning_rate": 3e-05,
"loss": 0.1265,
"step": 21243
},
{
"epoch": 14.83,
"learning_rate": 3e-05,
"loss": 0.1292,
"step": 21534
},
{
"epoch": 15.0,
"eval_accuracy": 0.7657300134589502,
"eval_loss": 0.43529966473579407,
"eval_runtime": 152.7159,
"eval_samples_per_second": 32.741,
"eval_steps_per_second": 2.05,
"step": 21780
},
{
"epoch": 15.0,
"eval_exact_match": 20.14,
"eval_f1": 28.059788808027182,
"eval_qa_bleu": 14.397818558683056,
"eval_qa_exact_match": 0.1928,
"eval_recite_bleu": 49.57947130892122,
"eval_recite_exact_match": 0.0444,
"step": 21780
},
{
"epoch": 15.03,
"learning_rate": 3e-05,
"loss": 0.1313,
"step": 21825
},
{
"epoch": 15.23,
"learning_rate": 3e-05,
"loss": 0.1185,
"step": 22116
},
{
"epoch": 15.43,
"learning_rate": 3e-05,
"loss": 0.1199,
"step": 22407
},
{
"epoch": 15.63,
"learning_rate": 3e-05,
"loss": 0.1226,
"step": 22698
},
{
"epoch": 15.83,
"learning_rate": 3e-05,
"loss": 0.1267,
"step": 22989
},
{
"epoch": 16.0,
"eval_accuracy": 0.7659200538358009,
"eval_loss": 0.438266783952713,
"eval_runtime": 153.2915,
"eval_samples_per_second": 32.618,
"eval_steps_per_second": 2.042,
"step": 23232
},
{
"epoch": 16.0,
"eval_exact_match": 20.64,
"eval_f1": 28.191361890575656,
"eval_qa_bleu": 14.695055084130074,
"eval_qa_exact_match": 0.1918,
"eval_recite_bleu": 50.268257039645256,
"eval_recite_exact_match": 0.0478,
"step": 23232
},
{
"epoch": 16.03,
"learning_rate": 3e-05,
"loss": 0.1133,
"step": 23280
},
{
"epoch": 16.23,
"learning_rate": 3e-05,
"loss": 0.1145,
"step": 23571
},
{
"epoch": 16.43,
"learning_rate": 3e-05,
"loss": 0.1174,
"step": 23862
},
{
"epoch": 16.63,
"learning_rate": 3e-05,
"loss": 0.1218,
"step": 24153
},
{
"epoch": 16.83,
"learning_rate": 3e-05,
"loss": 0.1251,
"step": 24444
},
{
"epoch": 17.0,
"eval_accuracy": 0.7660632570659489,
"eval_loss": 0.4416392147541046,
"eval_runtime": 148.1371,
"eval_samples_per_second": 33.753,
"eval_steps_per_second": 2.113,
"step": 24684
},
{
"epoch": 17.0,
"eval_exact_match": 20.62,
"eval_f1": 28.101175315458292,
"eval_qa_bleu": 14.458032503572724,
"eval_qa_exact_match": 0.1966,
"eval_recite_bleu": 51.2644528074332,
"eval_recite_exact_match": 0.0496,
"step": 24684
},
{
"epoch": 17.04,
"learning_rate": 3e-05,
"loss": 0.1227,
"step": 24735
},
{
"epoch": 17.24,
"learning_rate": 3e-05,
"loss": 0.1117,
"step": 25026
},
{
"epoch": 17.44,
"learning_rate": 3e-05,
"loss": 0.1163,
"step": 25317
},
{
"epoch": 17.64,
"learning_rate": 3e-05,
"loss": 0.1209,
"step": 25608
},
{
"epoch": 17.84,
"learning_rate": 3e-05,
"loss": 0.1201,
"step": 25899
},
{
"epoch": 18.0,
"eval_accuracy": 0.7658912516823688,
"eval_loss": 0.4466552734375,
"eval_runtime": 149.6031,
"eval_samples_per_second": 33.422,
"eval_steps_per_second": 2.092,
"step": 26136
},
{
"epoch": 18.0,
"eval_exact_match": 19.72,
"eval_f1": 27.705882516354148,
"eval_qa_bleu": 13.41419171329321,
"eval_qa_exact_match": 0.189,
"eval_recite_bleu": 49.4786180883858,
"eval_recite_exact_match": 0.0434,
"step": 26136
},
{
"epoch": 18.04,
"learning_rate": 3e-05,
"loss": 0.1215,
"step": 26190
},
{
"epoch": 18.24,
"learning_rate": 3e-05,
"loss": 0.1111,
"step": 26481
},
{
"epoch": 18.44,
"learning_rate": 3e-05,
"loss": 0.1164,
"step": 26772
},
{
"epoch": 18.64,
"learning_rate": 3e-05,
"loss": 0.1162,
"step": 27063
},
{
"epoch": 18.84,
"learning_rate": 3e-05,
"loss": 0.1186,
"step": 27354
},
{
"epoch": 19.0,
"eval_accuracy": 0.7659983849259758,
"eval_loss": 0.4507902264595032,
"eval_runtime": 151.1093,
"eval_samples_per_second": 33.089,
"eval_steps_per_second": 2.071,
"step": 27588
},
{
"epoch": 19.0,
"eval_exact_match": 20.78,
"eval_f1": 28.407049206780048,
"eval_qa_bleu": 14.348972564948328,
"eval_qa_exact_match": 0.195,
"eval_recite_bleu": 50.89618814085128,
"eval_recite_exact_match": 0.0544,
"step": 27588
},
{
"epoch": 19.04,
"learning_rate": 3e-05,
"loss": 0.1198,
"step": 27645
},
{
"epoch": 19.24,
"learning_rate": 3e-05,
"loss": 0.109,
"step": 27936
},
{
"epoch": 19.44,
"learning_rate": 3e-05,
"loss": 0.1136,
"step": 28227
},
{
"epoch": 19.64,
"learning_rate": 3e-05,
"loss": 0.1153,
"step": 28518
},
{
"epoch": 19.84,
"learning_rate": 3e-05,
"loss": 0.1176,
"step": 28809
},
{
"epoch": 20.0,
"eval_accuracy": 0.7660672947510094,
"eval_loss": 0.4521976709365845,
"eval_runtime": 152.6418,
"eval_samples_per_second": 32.756,
"eval_steps_per_second": 2.051,
"step": 29040
},
{
"epoch": 20.0,
"eval_exact_match": 20.7,
"eval_f1": 28.598247849322437,
"eval_qa_bleu": 14.395309016288971,
"eval_qa_exact_match": 0.1954,
"eval_recite_bleu": 51.41424385892475,
"eval_recite_exact_match": 0.0504,
"step": 29040
},
{
"epoch": 20.0,
"step": 29040,
"total_flos": 1.326010952896512e+18,
"train_loss": 0.023505237591824914,
"train_runtime": 14139.9896,
"train_samples_per_second": 32.849,
"train_steps_per_second": 2.054
}
],
"logging_steps": 291,
"max_steps": 29040,
"num_train_epochs": 20,
"save_steps": 500,
"total_flos": 1.326010952896512e+18,
"trial_name": null,
"trial_params": null
}