|
{
|
|
"best_metric": 0.75,
|
|
"best_model_checkpoint": "Swin-DMAE-H-DA-REVAL-80\\checkpoint-546",
|
|
"epoch": 77.36263736263736,
|
|
"eval_steps": 500,
|
|
"global_step": 1760,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 2.840909090909091e-06,
|
|
"loss": 1.6093,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.88,
|
|
"learning_rate": 5.681818181818182e-06,
|
|
"loss": 1.608,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.97,
|
|
"eval_accuracy": 0.25,
|
|
"eval_loss": 1.6090906858444214,
|
|
"eval_runtime": 0.9532,
|
|
"eval_samples_per_second": 54.552,
|
|
"eval_steps_per_second": 4.196,
|
|
"step": 22
|
|
},
|
|
{
|
|
"epoch": 1.32,
|
|
"learning_rate": 8.522727272727273e-06,
|
|
"loss": 1.6038,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 1.76,
|
|
"learning_rate": 1.1363636363636365e-05,
|
|
"loss": 1.5899,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 1.98,
|
|
"eval_accuracy": 0.19230769230769232,
|
|
"eval_loss": 1.5960006713867188,
|
|
"eval_runtime": 0.6772,
|
|
"eval_samples_per_second": 76.791,
|
|
"eval_steps_per_second": 5.907,
|
|
"step": 45
|
|
},
|
|
{
|
|
"epoch": 2.2,
|
|
"learning_rate": 1.4204545454545456e-05,
|
|
"loss": 1.5458,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 2.64,
|
|
"learning_rate": 1.7045454545454546e-05,
|
|
"loss": 1.4759,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 2.99,
|
|
"eval_accuracy": 0.34615384615384615,
|
|
"eval_loss": 1.4429875612258911,
|
|
"eval_runtime": 0.6507,
|
|
"eval_samples_per_second": 79.919,
|
|
"eval_steps_per_second": 6.148,
|
|
"step": 68
|
|
},
|
|
{
|
|
"epoch": 3.08,
|
|
"learning_rate": 1.9886363636363638e-05,
|
|
"loss": 1.3432,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 3.52,
|
|
"learning_rate": 2.272727272727273e-05,
|
|
"loss": 1.2227,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 3.96,
|
|
"learning_rate": 2.5568181818181817e-05,
|
|
"loss": 1.1012,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"eval_accuracy": 0.5192307692307693,
|
|
"eval_loss": 1.3212602138519287,
|
|
"eval_runtime": 0.6366,
|
|
"eval_samples_per_second": 81.678,
|
|
"eval_steps_per_second": 6.283,
|
|
"step": 91
|
|
},
|
|
{
|
|
"epoch": 4.4,
|
|
"learning_rate": 2.8409090909090912e-05,
|
|
"loss": 0.9932,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 4.84,
|
|
"learning_rate": 3.125e-05,
|
|
"loss": 0.8965,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 4.97,
|
|
"eval_accuracy": 0.4230769230769231,
|
|
"eval_loss": 1.1938039064407349,
|
|
"eval_runtime": 0.7082,
|
|
"eval_samples_per_second": 73.43,
|
|
"eval_steps_per_second": 5.648,
|
|
"step": 113
|
|
},
|
|
{
|
|
"epoch": 5.27,
|
|
"learning_rate": 3.409090909090909e-05,
|
|
"loss": 0.8385,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 5.71,
|
|
"learning_rate": 3.6931818181818184e-05,
|
|
"loss": 0.7214,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 5.98,
|
|
"eval_accuracy": 0.46153846153846156,
|
|
"eval_loss": 1.1870416402816772,
|
|
"eval_runtime": 0.6792,
|
|
"eval_samples_per_second": 76.566,
|
|
"eval_steps_per_second": 5.89,
|
|
"step": 136
|
|
},
|
|
{
|
|
"epoch": 6.15,
|
|
"learning_rate": 3.9772727272727275e-05,
|
|
"loss": 0.6341,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 6.59,
|
|
"learning_rate": 4.261363636363637e-05,
|
|
"loss": 0.6757,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 6.99,
|
|
"eval_accuracy": 0.5,
|
|
"eval_loss": 1.2117116451263428,
|
|
"eval_runtime": 0.6842,
|
|
"eval_samples_per_second": 76.005,
|
|
"eval_steps_per_second": 5.847,
|
|
"step": 159
|
|
},
|
|
{
|
|
"epoch": 7.03,
|
|
"learning_rate": 4.545454545454546e-05,
|
|
"loss": 0.5533,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 7.47,
|
|
"learning_rate": 4.829545454545455e-05,
|
|
"loss": 0.5691,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 7.91,
|
|
"learning_rate": 4.9873737373737375e-05,
|
|
"loss": 0.5529,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 8.0,
|
|
"eval_accuracy": 0.46153846153846156,
|
|
"eval_loss": 1.1975771188735962,
|
|
"eval_runtime": 0.6381,
|
|
"eval_samples_per_second": 81.486,
|
|
"eval_steps_per_second": 6.268,
|
|
"step": 182
|
|
},
|
|
{
|
|
"epoch": 8.35,
|
|
"learning_rate": 4.955808080808081e-05,
|
|
"loss": 0.4971,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 8.79,
|
|
"learning_rate": 4.9242424242424245e-05,
|
|
"loss": 0.5279,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 8.97,
|
|
"eval_accuracy": 0.5192307692307693,
|
|
"eval_loss": 1.1249598264694214,
|
|
"eval_runtime": 0.6211,
|
|
"eval_samples_per_second": 83.717,
|
|
"eval_steps_per_second": 6.44,
|
|
"step": 204
|
|
},
|
|
{
|
|
"epoch": 9.23,
|
|
"learning_rate": 4.892676767676767e-05,
|
|
"loss": 0.5037,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 9.67,
|
|
"learning_rate": 4.8611111111111115e-05,
|
|
"loss": 0.4701,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 9.98,
|
|
"eval_accuracy": 0.5576923076923077,
|
|
"eval_loss": 1.099881887435913,
|
|
"eval_runtime": 0.6342,
|
|
"eval_samples_per_second": 81.999,
|
|
"eval_steps_per_second": 6.308,
|
|
"step": 227
|
|
},
|
|
{
|
|
"epoch": 10.11,
|
|
"learning_rate": 4.829545454545455e-05,
|
|
"loss": 0.3906,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 10.55,
|
|
"learning_rate": 4.797979797979798e-05,
|
|
"loss": 0.3812,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 10.99,
|
|
"learning_rate": 4.7664141414141413e-05,
|
|
"loss": 0.3721,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 10.99,
|
|
"eval_accuracy": 0.6538461538461539,
|
|
"eval_loss": 0.7842198014259338,
|
|
"eval_runtime": 0.6467,
|
|
"eval_samples_per_second": 80.414,
|
|
"eval_steps_per_second": 6.186,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 11.43,
|
|
"learning_rate": 4.7348484848484855e-05,
|
|
"loss": 0.327,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 11.87,
|
|
"learning_rate": 4.7032828282828283e-05,
|
|
"loss": 0.3631,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 12.0,
|
|
"eval_accuracy": 0.6153846153846154,
|
|
"eval_loss": 1.1728084087371826,
|
|
"eval_runtime": 0.7257,
|
|
"eval_samples_per_second": 71.658,
|
|
"eval_steps_per_second": 5.512,
|
|
"step": 273
|
|
},
|
|
{
|
|
"epoch": 12.31,
|
|
"learning_rate": 4.671717171717172e-05,
|
|
"loss": 0.335,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 12.75,
|
|
"learning_rate": 4.6401515151515154e-05,
|
|
"loss": 0.3384,
|
|
"step": 290
|
|
},
|
|
{
|
|
"epoch": 12.97,
|
|
"eval_accuracy": 0.5769230769230769,
|
|
"eval_loss": 1.241263508796692,
|
|
"eval_runtime": 0.7252,
|
|
"eval_samples_per_second": 71.706,
|
|
"eval_steps_per_second": 5.516,
|
|
"step": 295
|
|
},
|
|
{
|
|
"epoch": 13.19,
|
|
"learning_rate": 4.608585858585859e-05,
|
|
"loss": 0.2861,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 13.63,
|
|
"learning_rate": 4.5770202020202024e-05,
|
|
"loss": 0.2531,
|
|
"step": 310
|
|
},
|
|
{
|
|
"epoch": 13.98,
|
|
"eval_accuracy": 0.6346153846153846,
|
|
"eval_loss": 0.9144014716148376,
|
|
"eval_runtime": 0.6357,
|
|
"eval_samples_per_second": 81.806,
|
|
"eval_steps_per_second": 6.293,
|
|
"step": 318
|
|
},
|
|
{
|
|
"epoch": 14.07,
|
|
"learning_rate": 4.545454545454546e-05,
|
|
"loss": 0.2761,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 14.51,
|
|
"learning_rate": 4.5138888888888894e-05,
|
|
"loss": 0.2721,
|
|
"step": 330
|
|
},
|
|
{
|
|
"epoch": 14.95,
|
|
"learning_rate": 4.482323232323233e-05,
|
|
"loss": 0.2753,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 14.99,
|
|
"eval_accuracy": 0.6923076923076923,
|
|
"eval_loss": 0.8958693146705627,
|
|
"eval_runtime": 0.6421,
|
|
"eval_samples_per_second": 80.979,
|
|
"eval_steps_per_second": 6.229,
|
|
"step": 341
|
|
},
|
|
{
|
|
"epoch": 15.38,
|
|
"learning_rate": 4.450757575757576e-05,
|
|
"loss": 0.2275,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 15.82,
|
|
"learning_rate": 4.41919191919192e-05,
|
|
"loss": 0.2611,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 16.0,
|
|
"eval_accuracy": 0.6538461538461539,
|
|
"eval_loss": 1.139877438545227,
|
|
"eval_runtime": 0.7657,
|
|
"eval_samples_per_second": 67.913,
|
|
"eval_steps_per_second": 5.224,
|
|
"step": 364
|
|
},
|
|
{
|
|
"epoch": 16.26,
|
|
"learning_rate": 4.387626262626263e-05,
|
|
"loss": 0.2614,
|
|
"step": 370
|
|
},
|
|
{
|
|
"epoch": 16.7,
|
|
"learning_rate": 4.356060606060606e-05,
|
|
"loss": 0.2072,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 16.97,
|
|
"eval_accuracy": 0.7115384615384616,
|
|
"eval_loss": 1.0731658935546875,
|
|
"eval_runtime": 0.6391,
|
|
"eval_samples_per_second": 81.359,
|
|
"eval_steps_per_second": 6.258,
|
|
"step": 386
|
|
},
|
|
{
|
|
"epoch": 17.14,
|
|
"learning_rate": 4.32449494949495e-05,
|
|
"loss": 0.1875,
|
|
"step": 390
|
|
},
|
|
{
|
|
"epoch": 17.58,
|
|
"learning_rate": 4.292929292929293e-05,
|
|
"loss": 0.2532,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 17.98,
|
|
"eval_accuracy": 0.7115384615384616,
|
|
"eval_loss": 1.1921563148498535,
|
|
"eval_runtime": 0.7727,
|
|
"eval_samples_per_second": 67.298,
|
|
"eval_steps_per_second": 5.177,
|
|
"step": 409
|
|
},
|
|
{
|
|
"epoch": 18.02,
|
|
"learning_rate": 4.261363636363637e-05,
|
|
"loss": 0.1789,
|
|
"step": 410
|
|
},
|
|
{
|
|
"epoch": 18.46,
|
|
"learning_rate": 4.2297979797979795e-05,
|
|
"loss": 0.1653,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 18.9,
|
|
"learning_rate": 4.198232323232324e-05,
|
|
"loss": 0.1633,
|
|
"step": 430
|
|
},
|
|
{
|
|
"epoch": 18.99,
|
|
"eval_accuracy": 0.6730769230769231,
|
|
"eval_loss": 1.059952974319458,
|
|
"eval_runtime": 0.6427,
|
|
"eval_samples_per_second": 80.914,
|
|
"eval_steps_per_second": 6.224,
|
|
"step": 432
|
|
},
|
|
{
|
|
"epoch": 19.34,
|
|
"learning_rate": 4.166666666666667e-05,
|
|
"loss": 0.1899,
|
|
"step": 440
|
|
},
|
|
{
|
|
"epoch": 19.78,
|
|
"learning_rate": 4.13510101010101e-05,
|
|
"loss": 0.1946,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 20.0,
|
|
"eval_accuracy": 0.6538461538461539,
|
|
"eval_loss": 1.228935718536377,
|
|
"eval_runtime": 0.7167,
|
|
"eval_samples_per_second": 72.558,
|
|
"eval_steps_per_second": 5.581,
|
|
"step": 455
|
|
},
|
|
{
|
|
"epoch": 20.22,
|
|
"learning_rate": 4.1035353535353535e-05,
|
|
"loss": 0.1413,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 20.66,
|
|
"learning_rate": 4.071969696969698e-05,
|
|
"loss": 0.2214,
|
|
"step": 470
|
|
},
|
|
{
|
|
"epoch": 20.97,
|
|
"eval_accuracy": 0.6730769230769231,
|
|
"eval_loss": 1.3590692281723022,
|
|
"eval_runtime": 0.6267,
|
|
"eval_samples_per_second": 82.98,
|
|
"eval_steps_per_second": 6.383,
|
|
"step": 477
|
|
},
|
|
{
|
|
"epoch": 21.1,
|
|
"learning_rate": 4.0404040404040405e-05,
|
|
"loss": 0.1925,
|
|
"step": 480
|
|
},
|
|
{
|
|
"epoch": 21.54,
|
|
"learning_rate": 4.008838383838384e-05,
|
|
"loss": 0.1842,
|
|
"step": 490
|
|
},
|
|
{
|
|
"epoch": 21.98,
|
|
"learning_rate": 3.9772727272727275e-05,
|
|
"loss": 0.1666,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 21.98,
|
|
"eval_accuracy": 0.7115384615384616,
|
|
"eval_loss": 1.0735695362091064,
|
|
"eval_runtime": 0.6487,
|
|
"eval_samples_per_second": 80.166,
|
|
"eval_steps_per_second": 6.167,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 22.42,
|
|
"learning_rate": 3.945707070707071e-05,
|
|
"loss": 0.1609,
|
|
"step": 510
|
|
},
|
|
{
|
|
"epoch": 22.86,
|
|
"learning_rate": 3.9141414141414145e-05,
|
|
"loss": 0.141,
|
|
"step": 520
|
|
},
|
|
{
|
|
"epoch": 22.99,
|
|
"eval_accuracy": 0.6923076923076923,
|
|
"eval_loss": 1.031468391418457,
|
|
"eval_runtime": 0.7467,
|
|
"eval_samples_per_second": 69.643,
|
|
"eval_steps_per_second": 5.357,
|
|
"step": 523
|
|
},
|
|
{
|
|
"epoch": 23.3,
|
|
"learning_rate": 3.8825757575757574e-05,
|
|
"loss": 0.1474,
|
|
"step": 530
|
|
},
|
|
{
|
|
"epoch": 23.74,
|
|
"learning_rate": 3.8510101010101015e-05,
|
|
"loss": 0.1275,
|
|
"step": 540
|
|
},
|
|
{
|
|
"epoch": 24.0,
|
|
"eval_accuracy": 0.75,
|
|
"eval_loss": 1.0766026973724365,
|
|
"eval_runtime": 0.6561,
|
|
"eval_samples_per_second": 79.25,
|
|
"eval_steps_per_second": 6.096,
|
|
"step": 546
|
|
},
|
|
{
|
|
"epoch": 24.18,
|
|
"learning_rate": 3.8194444444444444e-05,
|
|
"loss": 0.1708,
|
|
"step": 550
|
|
},
|
|
{
|
|
"epoch": 24.62,
|
|
"learning_rate": 3.787878787878788e-05,
|
|
"loss": 0.136,
|
|
"step": 560
|
|
},
|
|
{
|
|
"epoch": 24.97,
|
|
"eval_accuracy": 0.7115384615384616,
|
|
"eval_loss": 1.1796035766601562,
|
|
"eval_runtime": 0.6882,
|
|
"eval_samples_per_second": 75.564,
|
|
"eval_steps_per_second": 5.813,
|
|
"step": 568
|
|
},
|
|
{
|
|
"epoch": 25.05,
|
|
"learning_rate": 3.7563131313131314e-05,
|
|
"loss": 0.1539,
|
|
"step": 570
|
|
},
|
|
{
|
|
"epoch": 25.49,
|
|
"learning_rate": 3.724747474747475e-05,
|
|
"loss": 0.129,
|
|
"step": 580
|
|
},
|
|
{
|
|
"epoch": 25.93,
|
|
"learning_rate": 3.6931818181818184e-05,
|
|
"loss": 0.1402,
|
|
"step": 590
|
|
},
|
|
{
|
|
"epoch": 25.98,
|
|
"eval_accuracy": 0.7115384615384616,
|
|
"eval_loss": 1.0338743925094604,
|
|
"eval_runtime": 0.6301,
|
|
"eval_samples_per_second": 82.521,
|
|
"eval_steps_per_second": 6.348,
|
|
"step": 591
|
|
},
|
|
{
|
|
"epoch": 26.37,
|
|
"learning_rate": 3.661616161616162e-05,
|
|
"loss": 0.1492,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 26.81,
|
|
"learning_rate": 3.6300505050505054e-05,
|
|
"loss": 0.1336,
|
|
"step": 610
|
|
},
|
|
{
|
|
"epoch": 26.99,
|
|
"eval_accuracy": 0.6153846153846154,
|
|
"eval_loss": 1.3445886373519897,
|
|
"eval_runtime": 0.6437,
|
|
"eval_samples_per_second": 80.789,
|
|
"eval_steps_per_second": 6.215,
|
|
"step": 614
|
|
},
|
|
{
|
|
"epoch": 27.25,
|
|
"learning_rate": 3.598484848484849e-05,
|
|
"loss": 0.0881,
|
|
"step": 620
|
|
},
|
|
{
|
|
"epoch": 27.69,
|
|
"learning_rate": 3.566919191919192e-05,
|
|
"loss": 0.1218,
|
|
"step": 630
|
|
},
|
|
{
|
|
"epoch": 28.0,
|
|
"eval_accuracy": 0.7115384615384616,
|
|
"eval_loss": 1.2967442274093628,
|
|
"eval_runtime": 0.6371,
|
|
"eval_samples_per_second": 81.614,
|
|
"eval_steps_per_second": 6.278,
|
|
"step": 637
|
|
},
|
|
{
|
|
"epoch": 28.13,
|
|
"learning_rate": 3.535353535353535e-05,
|
|
"loss": 0.1095,
|
|
"step": 640
|
|
},
|
|
{
|
|
"epoch": 28.57,
|
|
"learning_rate": 3.5037878787878794e-05,
|
|
"loss": 0.1034,
|
|
"step": 650
|
|
},
|
|
{
|
|
"epoch": 28.97,
|
|
"eval_accuracy": 0.6538461538461539,
|
|
"eval_loss": 1.5955091714859009,
|
|
"eval_runtime": 0.6367,
|
|
"eval_samples_per_second": 81.675,
|
|
"eval_steps_per_second": 6.283,
|
|
"step": 659
|
|
},
|
|
{
|
|
"epoch": 29.01,
|
|
"learning_rate": 3.472222222222222e-05,
|
|
"loss": 0.1467,
|
|
"step": 660
|
|
},
|
|
{
|
|
"epoch": 29.45,
|
|
"learning_rate": 3.440656565656566e-05,
|
|
"loss": 0.1292,
|
|
"step": 670
|
|
},
|
|
{
|
|
"epoch": 29.89,
|
|
"learning_rate": 3.409090909090909e-05,
|
|
"loss": 0.1196,
|
|
"step": 680
|
|
},
|
|
{
|
|
"epoch": 29.98,
|
|
"eval_accuracy": 0.5769230769230769,
|
|
"eval_loss": 1.5720566511154175,
|
|
"eval_runtime": 0.6382,
|
|
"eval_samples_per_second": 81.484,
|
|
"eval_steps_per_second": 6.268,
|
|
"step": 682
|
|
},
|
|
{
|
|
"epoch": 30.33,
|
|
"learning_rate": 3.377525252525253e-05,
|
|
"loss": 0.0938,
|
|
"step": 690
|
|
},
|
|
{
|
|
"epoch": 30.77,
|
|
"learning_rate": 3.345959595959596e-05,
|
|
"loss": 0.1368,
|
|
"step": 700
|
|
},
|
|
{
|
|
"epoch": 30.99,
|
|
"eval_accuracy": 0.6346153846153846,
|
|
"eval_loss": 1.8208184242248535,
|
|
"eval_runtime": 0.6327,
|
|
"eval_samples_per_second": 82.194,
|
|
"eval_steps_per_second": 6.323,
|
|
"step": 705
|
|
},
|
|
{
|
|
"epoch": 31.21,
|
|
"learning_rate": 3.314393939393939e-05,
|
|
"loss": 0.1194,
|
|
"step": 710
|
|
},
|
|
{
|
|
"epoch": 31.65,
|
|
"learning_rate": 3.282828282828283e-05,
|
|
"loss": 0.1477,
|
|
"step": 720
|
|
},
|
|
{
|
|
"epoch": 32.0,
|
|
"eval_accuracy": 0.6923076923076923,
|
|
"eval_loss": 1.4237351417541504,
|
|
"eval_runtime": 0.6551,
|
|
"eval_samples_per_second": 79.371,
|
|
"eval_steps_per_second": 6.105,
|
|
"step": 728
|
|
},
|
|
{
|
|
"epoch": 32.09,
|
|
"learning_rate": 3.251262626262627e-05,
|
|
"loss": 0.1274,
|
|
"step": 730
|
|
},
|
|
{
|
|
"epoch": 32.53,
|
|
"learning_rate": 3.2196969696969696e-05,
|
|
"loss": 0.1198,
|
|
"step": 740
|
|
},
|
|
{
|
|
"epoch": 32.97,
|
|
"learning_rate": 3.188131313131314e-05,
|
|
"loss": 0.1299,
|
|
"step": 750
|
|
},
|
|
{
|
|
"epoch": 32.97,
|
|
"eval_accuracy": 0.7115384615384616,
|
|
"eval_loss": 1.4061400890350342,
|
|
"eval_runtime": 0.6447,
|
|
"eval_samples_per_second": 80.663,
|
|
"eval_steps_per_second": 6.205,
|
|
"step": 750
|
|
},
|
|
{
|
|
"epoch": 33.41,
|
|
"learning_rate": 3.1565656565656566e-05,
|
|
"loss": 0.0806,
|
|
"step": 760
|
|
},
|
|
{
|
|
"epoch": 33.85,
|
|
"learning_rate": 3.125e-05,
|
|
"loss": 0.1111,
|
|
"step": 770
|
|
},
|
|
{
|
|
"epoch": 33.98,
|
|
"eval_accuracy": 0.6346153846153846,
|
|
"eval_loss": 1.666426420211792,
|
|
"eval_runtime": 0.6892,
|
|
"eval_samples_per_second": 75.455,
|
|
"eval_steps_per_second": 5.804,
|
|
"step": 773
|
|
},
|
|
{
|
|
"epoch": 34.29,
|
|
"learning_rate": 3.0934343434343436e-05,
|
|
"loss": 0.0773,
|
|
"step": 780
|
|
},
|
|
{
|
|
"epoch": 34.73,
|
|
"learning_rate": 3.061868686868687e-05,
|
|
"loss": 0.068,
|
|
"step": 790
|
|
},
|
|
{
|
|
"epoch": 34.99,
|
|
"eval_accuracy": 0.6538461538461539,
|
|
"eval_loss": 1.7432496547698975,
|
|
"eval_runtime": 0.7027,
|
|
"eval_samples_per_second": 74.004,
|
|
"eval_steps_per_second": 5.693,
|
|
"step": 796
|
|
},
|
|
{
|
|
"epoch": 35.16,
|
|
"learning_rate": 3.0303030303030306e-05,
|
|
"loss": 0.1432,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 35.6,
|
|
"learning_rate": 2.9987373737373737e-05,
|
|
"loss": 0.1142,
|
|
"step": 810
|
|
},
|
|
{
|
|
"epoch": 36.0,
|
|
"eval_accuracy": 0.6923076923076923,
|
|
"eval_loss": 1.4517791271209717,
|
|
"eval_runtime": 0.6291,
|
|
"eval_samples_per_second": 82.652,
|
|
"eval_steps_per_second": 6.358,
|
|
"step": 819
|
|
},
|
|
{
|
|
"epoch": 36.04,
|
|
"learning_rate": 2.9671717171717172e-05,
|
|
"loss": 0.1228,
|
|
"step": 820
|
|
},
|
|
{
|
|
"epoch": 36.48,
|
|
"learning_rate": 2.935606060606061e-05,
|
|
"loss": 0.0968,
|
|
"step": 830
|
|
},
|
|
{
|
|
"epoch": 36.92,
|
|
"learning_rate": 2.904040404040404e-05,
|
|
"loss": 0.1258,
|
|
"step": 840
|
|
},
|
|
{
|
|
"epoch": 36.97,
|
|
"eval_accuracy": 0.6346153846153846,
|
|
"eval_loss": 1.7216651439666748,
|
|
"eval_runtime": 0.7027,
|
|
"eval_samples_per_second": 74.003,
|
|
"eval_steps_per_second": 5.693,
|
|
"step": 841
|
|
},
|
|
{
|
|
"epoch": 37.36,
|
|
"learning_rate": 2.8724747474747477e-05,
|
|
"loss": 0.0971,
|
|
"step": 850
|
|
},
|
|
{
|
|
"epoch": 37.8,
|
|
"learning_rate": 2.8409090909090912e-05,
|
|
"loss": 0.1055,
|
|
"step": 860
|
|
},
|
|
{
|
|
"epoch": 37.98,
|
|
"eval_accuracy": 0.6153846153846154,
|
|
"eval_loss": 1.6348490715026855,
|
|
"eval_runtime": 0.6862,
|
|
"eval_samples_per_second": 75.785,
|
|
"eval_steps_per_second": 5.83,
|
|
"step": 864
|
|
},
|
|
{
|
|
"epoch": 38.24,
|
|
"learning_rate": 2.8093434343434344e-05,
|
|
"loss": 0.1109,
|
|
"step": 870
|
|
},
|
|
{
|
|
"epoch": 38.68,
|
|
"learning_rate": 2.777777777777778e-05,
|
|
"loss": 0.1049,
|
|
"step": 880
|
|
},
|
|
{
|
|
"epoch": 38.99,
|
|
"eval_accuracy": 0.6346153846153846,
|
|
"eval_loss": 1.837777853012085,
|
|
"eval_runtime": 0.7382,
|
|
"eval_samples_per_second": 70.445,
|
|
"eval_steps_per_second": 5.419,
|
|
"step": 887
|
|
},
|
|
{
|
|
"epoch": 39.12,
|
|
"learning_rate": 2.746212121212121e-05,
|
|
"loss": 0.0726,
|
|
"step": 890
|
|
},
|
|
{
|
|
"epoch": 39.56,
|
|
"learning_rate": 2.714646464646465e-05,
|
|
"loss": 0.0672,
|
|
"step": 900
|
|
},
|
|
{
|
|
"epoch": 40.0,
|
|
"learning_rate": 2.6830808080808084e-05,
|
|
"loss": 0.0822,
|
|
"step": 910
|
|
},
|
|
{
|
|
"epoch": 40.0,
|
|
"eval_accuracy": 0.6730769230769231,
|
|
"eval_loss": 1.6760356426239014,
|
|
"eval_runtime": 0.6697,
|
|
"eval_samples_per_second": 77.651,
|
|
"eval_steps_per_second": 5.973,
|
|
"step": 910
|
|
},
|
|
{
|
|
"epoch": 40.44,
|
|
"learning_rate": 2.6515151515151516e-05,
|
|
"loss": 0.0798,
|
|
"step": 920
|
|
},
|
|
{
|
|
"epoch": 40.88,
|
|
"learning_rate": 2.619949494949495e-05,
|
|
"loss": 0.1114,
|
|
"step": 930
|
|
},
|
|
{
|
|
"epoch": 40.97,
|
|
"eval_accuracy": 0.6346153846153846,
|
|
"eval_loss": 1.7310110330581665,
|
|
"eval_runtime": 0.7447,
|
|
"eval_samples_per_second": 69.829,
|
|
"eval_steps_per_second": 5.371,
|
|
"step": 932
|
|
},
|
|
{
|
|
"epoch": 41.32,
|
|
"learning_rate": 2.5883838383838382e-05,
|
|
"loss": 0.0942,
|
|
"step": 940
|
|
},
|
|
{
|
|
"epoch": 41.76,
|
|
"learning_rate": 2.5568181818181817e-05,
|
|
"loss": 0.0704,
|
|
"step": 950
|
|
},
|
|
{
|
|
"epoch": 41.98,
|
|
"eval_accuracy": 0.6538461538461539,
|
|
"eval_loss": 1.7105393409729004,
|
|
"eval_runtime": 0.6411,
|
|
"eval_samples_per_second": 81.105,
|
|
"eval_steps_per_second": 6.239,
|
|
"step": 955
|
|
},
|
|
{
|
|
"epoch": 42.2,
|
|
"learning_rate": 2.5252525252525256e-05,
|
|
"loss": 0.115,
|
|
"step": 960
|
|
},
|
|
{
|
|
"epoch": 42.64,
|
|
"learning_rate": 2.4936868686868688e-05,
|
|
"loss": 0.0983,
|
|
"step": 970
|
|
},
|
|
{
|
|
"epoch": 42.99,
|
|
"eval_accuracy": 0.5961538461538461,
|
|
"eval_loss": 1.8320040702819824,
|
|
"eval_runtime": 0.6571,
|
|
"eval_samples_per_second": 79.13,
|
|
"eval_steps_per_second": 6.087,
|
|
"step": 978
|
|
},
|
|
{
|
|
"epoch": 43.08,
|
|
"learning_rate": 2.4621212121212123e-05,
|
|
"loss": 0.0707,
|
|
"step": 980
|
|
},
|
|
{
|
|
"epoch": 43.52,
|
|
"learning_rate": 2.4305555555555558e-05,
|
|
"loss": 0.0665,
|
|
"step": 990
|
|
},
|
|
{
|
|
"epoch": 43.96,
|
|
"learning_rate": 2.398989898989899e-05,
|
|
"loss": 0.0909,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 44.0,
|
|
"eval_accuracy": 0.6346153846153846,
|
|
"eval_loss": 1.563212275505066,
|
|
"eval_runtime": 0.7357,
|
|
"eval_samples_per_second": 70.683,
|
|
"eval_steps_per_second": 5.437,
|
|
"step": 1001
|
|
},
|
|
{
|
|
"epoch": 44.4,
|
|
"learning_rate": 2.3674242424242428e-05,
|
|
"loss": 0.1091,
|
|
"step": 1010
|
|
},
|
|
{
|
|
"epoch": 44.84,
|
|
"learning_rate": 2.335858585858586e-05,
|
|
"loss": 0.0991,
|
|
"step": 1020
|
|
},
|
|
{
|
|
"epoch": 44.97,
|
|
"eval_accuracy": 0.6730769230769231,
|
|
"eval_loss": 1.7606291770935059,
|
|
"eval_runtime": 0.6397,
|
|
"eval_samples_per_second": 81.294,
|
|
"eval_steps_per_second": 6.253,
|
|
"step": 1023
|
|
},
|
|
{
|
|
"epoch": 45.27,
|
|
"learning_rate": 2.3042929292929294e-05,
|
|
"loss": 0.0821,
|
|
"step": 1030
|
|
},
|
|
{
|
|
"epoch": 45.71,
|
|
"learning_rate": 2.272727272727273e-05,
|
|
"loss": 0.0658,
|
|
"step": 1040
|
|
},
|
|
{
|
|
"epoch": 45.98,
|
|
"eval_accuracy": 0.6538461538461539,
|
|
"eval_loss": 1.592665433883667,
|
|
"eval_runtime": 0.6456,
|
|
"eval_samples_per_second": 80.539,
|
|
"eval_steps_per_second": 6.195,
|
|
"step": 1046
|
|
},
|
|
{
|
|
"epoch": 46.15,
|
|
"learning_rate": 2.2411616161616164e-05,
|
|
"loss": 0.0869,
|
|
"step": 1050
|
|
},
|
|
{
|
|
"epoch": 46.59,
|
|
"learning_rate": 2.20959595959596e-05,
|
|
"loss": 0.0412,
|
|
"step": 1060
|
|
},
|
|
{
|
|
"epoch": 46.99,
|
|
"eval_accuracy": 0.6538461538461539,
|
|
"eval_loss": 1.4660203456878662,
|
|
"eval_runtime": 0.6371,
|
|
"eval_samples_per_second": 81.614,
|
|
"eval_steps_per_second": 6.278,
|
|
"step": 1069
|
|
},
|
|
{
|
|
"epoch": 47.03,
|
|
"learning_rate": 2.178030303030303e-05,
|
|
"loss": 0.084,
|
|
"step": 1070
|
|
},
|
|
{
|
|
"epoch": 47.47,
|
|
"learning_rate": 2.1464646464646466e-05,
|
|
"loss": 0.0618,
|
|
"step": 1080
|
|
},
|
|
{
|
|
"epoch": 47.91,
|
|
"learning_rate": 2.1148989898989898e-05,
|
|
"loss": 0.0919,
|
|
"step": 1090
|
|
},
|
|
{
|
|
"epoch": 48.0,
|
|
"eval_accuracy": 0.6730769230769231,
|
|
"eval_loss": 1.3294285535812378,
|
|
"eval_runtime": 0.6842,
|
|
"eval_samples_per_second": 76.006,
|
|
"eval_steps_per_second": 5.847,
|
|
"step": 1092
|
|
},
|
|
{
|
|
"epoch": 48.35,
|
|
"learning_rate": 2.0833333333333336e-05,
|
|
"loss": 0.0733,
|
|
"step": 1100
|
|
},
|
|
{
|
|
"epoch": 48.79,
|
|
"learning_rate": 2.0517676767676768e-05,
|
|
"loss": 0.0726,
|
|
"step": 1110
|
|
},
|
|
{
|
|
"epoch": 48.97,
|
|
"eval_accuracy": 0.6346153846153846,
|
|
"eval_loss": 1.5551464557647705,
|
|
"eval_runtime": 0.6647,
|
|
"eval_samples_per_second": 78.236,
|
|
"eval_steps_per_second": 6.018,
|
|
"step": 1114
|
|
},
|
|
{
|
|
"epoch": 49.23,
|
|
"learning_rate": 2.0202020202020203e-05,
|
|
"loss": 0.0662,
|
|
"step": 1120
|
|
},
|
|
{
|
|
"epoch": 49.67,
|
|
"learning_rate": 1.9886363636363638e-05,
|
|
"loss": 0.0554,
|
|
"step": 1130
|
|
},
|
|
{
|
|
"epoch": 49.98,
|
|
"eval_accuracy": 0.6153846153846154,
|
|
"eval_loss": 1.7157161235809326,
|
|
"eval_runtime": 0.6597,
|
|
"eval_samples_per_second": 78.829,
|
|
"eval_steps_per_second": 6.064,
|
|
"step": 1137
|
|
},
|
|
{
|
|
"epoch": 50.11,
|
|
"learning_rate": 1.9570707070707073e-05,
|
|
"loss": 0.0697,
|
|
"step": 1140
|
|
},
|
|
{
|
|
"epoch": 50.55,
|
|
"learning_rate": 1.9255050505050508e-05,
|
|
"loss": 0.1054,
|
|
"step": 1150
|
|
},
|
|
{
|
|
"epoch": 50.99,
|
|
"learning_rate": 1.893939393939394e-05,
|
|
"loss": 0.0585,
|
|
"step": 1160
|
|
},
|
|
{
|
|
"epoch": 50.99,
|
|
"eval_accuracy": 0.5961538461538461,
|
|
"eval_loss": 1.8279716968536377,
|
|
"eval_runtime": 0.6462,
|
|
"eval_samples_per_second": 80.475,
|
|
"eval_steps_per_second": 6.19,
|
|
"step": 1160
|
|
},
|
|
{
|
|
"epoch": 51.43,
|
|
"learning_rate": 1.8623737373737374e-05,
|
|
"loss": 0.0821,
|
|
"step": 1170
|
|
},
|
|
{
|
|
"epoch": 51.87,
|
|
"learning_rate": 1.830808080808081e-05,
|
|
"loss": 0.0607,
|
|
"step": 1180
|
|
},
|
|
{
|
|
"epoch": 52.0,
|
|
"eval_accuracy": 0.6538461538461539,
|
|
"eval_loss": 1.6141858100891113,
|
|
"eval_runtime": 0.6587,
|
|
"eval_samples_per_second": 78.949,
|
|
"eval_steps_per_second": 6.073,
|
|
"step": 1183
|
|
},
|
|
{
|
|
"epoch": 52.31,
|
|
"learning_rate": 1.7992424242424244e-05,
|
|
"loss": 0.0444,
|
|
"step": 1190
|
|
},
|
|
{
|
|
"epoch": 52.75,
|
|
"learning_rate": 1.7676767676767676e-05,
|
|
"loss": 0.0719,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 52.97,
|
|
"eval_accuracy": 0.5961538461538461,
|
|
"eval_loss": 1.992350459098816,
|
|
"eval_runtime": 0.6432,
|
|
"eval_samples_per_second": 80.851,
|
|
"eval_steps_per_second": 6.219,
|
|
"step": 1205
|
|
},
|
|
{
|
|
"epoch": 53.19,
|
|
"learning_rate": 1.736111111111111e-05,
|
|
"loss": 0.1031,
|
|
"step": 1210
|
|
},
|
|
{
|
|
"epoch": 53.63,
|
|
"learning_rate": 1.7045454545454546e-05,
|
|
"loss": 0.0877,
|
|
"step": 1220
|
|
},
|
|
{
|
|
"epoch": 53.98,
|
|
"eval_accuracy": 0.6346153846153846,
|
|
"eval_loss": 1.7806010246276855,
|
|
"eval_runtime": 0.6742,
|
|
"eval_samples_per_second": 77.134,
|
|
"eval_steps_per_second": 5.933,
|
|
"step": 1228
|
|
},
|
|
{
|
|
"epoch": 54.07,
|
|
"learning_rate": 1.672979797979798e-05,
|
|
"loss": 0.0783,
|
|
"step": 1230
|
|
},
|
|
{
|
|
"epoch": 54.51,
|
|
"learning_rate": 1.6414141414141416e-05,
|
|
"loss": 0.0664,
|
|
"step": 1240
|
|
},
|
|
{
|
|
"epoch": 54.95,
|
|
"learning_rate": 1.6098484848484848e-05,
|
|
"loss": 0.0743,
|
|
"step": 1250
|
|
},
|
|
{
|
|
"epoch": 54.99,
|
|
"eval_accuracy": 0.6538461538461539,
|
|
"eval_loss": 1.9819928407669067,
|
|
"eval_runtime": 0.7032,
|
|
"eval_samples_per_second": 73.95,
|
|
"eval_steps_per_second": 5.688,
|
|
"step": 1251
|
|
},
|
|
{
|
|
"epoch": 55.38,
|
|
"learning_rate": 1.5782828282828283e-05,
|
|
"loss": 0.0652,
|
|
"step": 1260
|
|
},
|
|
{
|
|
"epoch": 55.82,
|
|
"learning_rate": 1.5467171717171718e-05,
|
|
"loss": 0.0464,
|
|
"step": 1270
|
|
},
|
|
{
|
|
"epoch": 56.0,
|
|
"eval_accuracy": 0.6346153846153846,
|
|
"eval_loss": 1.944918155670166,
|
|
"eval_runtime": 0.6432,
|
|
"eval_samples_per_second": 80.851,
|
|
"eval_steps_per_second": 6.219,
|
|
"step": 1274
|
|
},
|
|
{
|
|
"epoch": 56.26,
|
|
"learning_rate": 1.5151515151515153e-05,
|
|
"loss": 0.0522,
|
|
"step": 1280
|
|
},
|
|
{
|
|
"epoch": 56.7,
|
|
"learning_rate": 1.4835858585858586e-05,
|
|
"loss": 0.077,
|
|
"step": 1290
|
|
},
|
|
{
|
|
"epoch": 56.97,
|
|
"eval_accuracy": 0.6923076923076923,
|
|
"eval_loss": 1.6825530529022217,
|
|
"eval_runtime": 0.6687,
|
|
"eval_samples_per_second": 77.768,
|
|
"eval_steps_per_second": 5.982,
|
|
"step": 1296
|
|
},
|
|
{
|
|
"epoch": 57.14,
|
|
"learning_rate": 1.452020202020202e-05,
|
|
"loss": 0.0688,
|
|
"step": 1300
|
|
},
|
|
{
|
|
"epoch": 57.58,
|
|
"learning_rate": 1.4204545454545456e-05,
|
|
"loss": 0.073,
|
|
"step": 1310
|
|
},
|
|
{
|
|
"epoch": 57.98,
|
|
"eval_accuracy": 0.6538461538461539,
|
|
"eval_loss": 1.7594443559646606,
|
|
"eval_runtime": 0.6671,
|
|
"eval_samples_per_second": 77.944,
|
|
"eval_steps_per_second": 5.996,
|
|
"step": 1319
|
|
},
|
|
{
|
|
"epoch": 58.02,
|
|
"learning_rate": 1.388888888888889e-05,
|
|
"loss": 0.0488,
|
|
"step": 1320
|
|
},
|
|
{
|
|
"epoch": 58.46,
|
|
"learning_rate": 1.3573232323232325e-05,
|
|
"loss": 0.0568,
|
|
"step": 1330
|
|
},
|
|
{
|
|
"epoch": 58.9,
|
|
"learning_rate": 1.3257575757575758e-05,
|
|
"loss": 0.0623,
|
|
"step": 1340
|
|
},
|
|
{
|
|
"epoch": 58.99,
|
|
"eval_accuracy": 0.6346153846153846,
|
|
"eval_loss": 1.8303287029266357,
|
|
"eval_runtime": 0.6563,
|
|
"eval_samples_per_second": 79.235,
|
|
"eval_steps_per_second": 6.095,
|
|
"step": 1342
|
|
},
|
|
{
|
|
"epoch": 59.34,
|
|
"learning_rate": 1.2941919191919191e-05,
|
|
"loss": 0.0818,
|
|
"step": 1350
|
|
},
|
|
{
|
|
"epoch": 59.78,
|
|
"learning_rate": 1.2626262626262628e-05,
|
|
"loss": 0.0383,
|
|
"step": 1360
|
|
},
|
|
{
|
|
"epoch": 60.0,
|
|
"eval_accuracy": 0.6153846153846154,
|
|
"eval_loss": 1.8124154806137085,
|
|
"eval_runtime": 0.6437,
|
|
"eval_samples_per_second": 80.789,
|
|
"eval_steps_per_second": 6.215,
|
|
"step": 1365
|
|
},
|
|
{
|
|
"epoch": 60.22,
|
|
"learning_rate": 1.2310606060606061e-05,
|
|
"loss": 0.0486,
|
|
"step": 1370
|
|
},
|
|
{
|
|
"epoch": 60.66,
|
|
"learning_rate": 1.1994949494949495e-05,
|
|
"loss": 0.0526,
|
|
"step": 1380
|
|
},
|
|
{
|
|
"epoch": 60.97,
|
|
"eval_accuracy": 0.6923076923076923,
|
|
"eval_loss": 1.8164315223693848,
|
|
"eval_runtime": 0.6712,
|
|
"eval_samples_per_second": 77.479,
|
|
"eval_steps_per_second": 5.96,
|
|
"step": 1387
|
|
},
|
|
{
|
|
"epoch": 61.1,
|
|
"learning_rate": 1.167929292929293e-05,
|
|
"loss": 0.0676,
|
|
"step": 1390
|
|
},
|
|
{
|
|
"epoch": 61.54,
|
|
"learning_rate": 1.1363636363636365e-05,
|
|
"loss": 0.0482,
|
|
"step": 1400
|
|
},
|
|
{
|
|
"epoch": 61.98,
|
|
"learning_rate": 1.10479797979798e-05,
|
|
"loss": 0.0679,
|
|
"step": 1410
|
|
},
|
|
{
|
|
"epoch": 61.98,
|
|
"eval_accuracy": 0.6730769230769231,
|
|
"eval_loss": 1.8585782051086426,
|
|
"eval_runtime": 0.6812,
|
|
"eval_samples_per_second": 76.341,
|
|
"eval_steps_per_second": 5.872,
|
|
"step": 1410
|
|
},
|
|
{
|
|
"epoch": 62.42,
|
|
"learning_rate": 1.0732323232323233e-05,
|
|
"loss": 0.0791,
|
|
"step": 1420
|
|
},
|
|
{
|
|
"epoch": 62.86,
|
|
"learning_rate": 1.0416666666666668e-05,
|
|
"loss": 0.0625,
|
|
"step": 1430
|
|
},
|
|
{
|
|
"epoch": 62.99,
|
|
"eval_accuracy": 0.6346153846153846,
|
|
"eval_loss": 1.9150481224060059,
|
|
"eval_runtime": 0.7422,
|
|
"eval_samples_per_second": 70.065,
|
|
"eval_steps_per_second": 5.39,
|
|
"step": 1433
|
|
},
|
|
{
|
|
"epoch": 63.3,
|
|
"learning_rate": 1.0101010101010101e-05,
|
|
"loss": 0.0363,
|
|
"step": 1440
|
|
},
|
|
{
|
|
"epoch": 63.74,
|
|
"learning_rate": 9.785353535353536e-06,
|
|
"loss": 0.0482,
|
|
"step": 1450
|
|
},
|
|
{
|
|
"epoch": 64.0,
|
|
"eval_accuracy": 0.6346153846153846,
|
|
"eval_loss": 1.9622400999069214,
|
|
"eval_runtime": 0.6337,
|
|
"eval_samples_per_second": 82.064,
|
|
"eval_steps_per_second": 6.313,
|
|
"step": 1456
|
|
},
|
|
{
|
|
"epoch": 64.18,
|
|
"learning_rate": 9.46969696969697e-06,
|
|
"loss": 0.071,
|
|
"step": 1460
|
|
},
|
|
{
|
|
"epoch": 64.62,
|
|
"learning_rate": 9.154040404040405e-06,
|
|
"loss": 0.0646,
|
|
"step": 1470
|
|
},
|
|
{
|
|
"epoch": 64.97,
|
|
"eval_accuracy": 0.6153846153846154,
|
|
"eval_loss": 1.947584629058838,
|
|
"eval_runtime": 1.1818,
|
|
"eval_samples_per_second": 44.001,
|
|
"eval_steps_per_second": 3.385,
|
|
"step": 1478
|
|
},
|
|
{
|
|
"epoch": 65.05,
|
|
"learning_rate": 8.838383838383838e-06,
|
|
"loss": 0.0348,
|
|
"step": 1480
|
|
},
|
|
{
|
|
"epoch": 65.49,
|
|
"learning_rate": 8.522727272727273e-06,
|
|
"loss": 0.0363,
|
|
"step": 1490
|
|
},
|
|
{
|
|
"epoch": 65.93,
|
|
"learning_rate": 8.207070707070708e-06,
|
|
"loss": 0.0594,
|
|
"step": 1500
|
|
},
|
|
{
|
|
"epoch": 65.98,
|
|
"eval_accuracy": 0.6923076923076923,
|
|
"eval_loss": 1.5957955121994019,
|
|
"eval_runtime": 0.6717,
|
|
"eval_samples_per_second": 77.42,
|
|
"eval_steps_per_second": 5.955,
|
|
"step": 1501
|
|
},
|
|
{
|
|
"epoch": 66.37,
|
|
"learning_rate": 7.891414141414141e-06,
|
|
"loss": 0.0375,
|
|
"step": 1510
|
|
},
|
|
{
|
|
"epoch": 66.81,
|
|
"learning_rate": 7.5757575757575764e-06,
|
|
"loss": 0.0568,
|
|
"step": 1520
|
|
},
|
|
{
|
|
"epoch": 66.99,
|
|
"eval_accuracy": 0.6730769230769231,
|
|
"eval_loss": 1.8275247812271118,
|
|
"eval_runtime": 0.7647,
|
|
"eval_samples_per_second": 68.002,
|
|
"eval_steps_per_second": 5.231,
|
|
"step": 1524
|
|
},
|
|
{
|
|
"epoch": 67.25,
|
|
"learning_rate": 7.26010101010101e-06,
|
|
"loss": 0.0443,
|
|
"step": 1530
|
|
},
|
|
{
|
|
"epoch": 67.69,
|
|
"learning_rate": 6.944444444444445e-06,
|
|
"loss": 0.0662,
|
|
"step": 1540
|
|
},
|
|
{
|
|
"epoch": 68.0,
|
|
"eval_accuracy": 0.6730769230769231,
|
|
"eval_loss": 1.757631778717041,
|
|
"eval_runtime": 0.6531,
|
|
"eval_samples_per_second": 79.615,
|
|
"eval_steps_per_second": 6.124,
|
|
"step": 1547
|
|
},
|
|
{
|
|
"epoch": 68.13,
|
|
"learning_rate": 6.628787878787879e-06,
|
|
"loss": 0.0352,
|
|
"step": 1550
|
|
},
|
|
{
|
|
"epoch": 68.57,
|
|
"learning_rate": 6.313131313131314e-06,
|
|
"loss": 0.0428,
|
|
"step": 1560
|
|
},
|
|
{
|
|
"epoch": 68.97,
|
|
"eval_accuracy": 0.6538461538461539,
|
|
"eval_loss": 1.9324886798858643,
|
|
"eval_runtime": 0.7317,
|
|
"eval_samples_per_second": 71.07,
|
|
"eval_steps_per_second": 5.467,
|
|
"step": 1569
|
|
},
|
|
{
|
|
"epoch": 69.01,
|
|
"learning_rate": 5.997474747474747e-06,
|
|
"loss": 0.0549,
|
|
"step": 1570
|
|
},
|
|
{
|
|
"epoch": 69.45,
|
|
"learning_rate": 5.681818181818182e-06,
|
|
"loss": 0.0571,
|
|
"step": 1580
|
|
},
|
|
{
|
|
"epoch": 69.89,
|
|
"learning_rate": 5.3661616161616165e-06,
|
|
"loss": 0.0433,
|
|
"step": 1590
|
|
},
|
|
{
|
|
"epoch": 69.98,
|
|
"eval_accuracy": 0.6730769230769231,
|
|
"eval_loss": 1.8206470012664795,
|
|
"eval_runtime": 0.6331,
|
|
"eval_samples_per_second": 82.13,
|
|
"eval_steps_per_second": 6.318,
|
|
"step": 1592
|
|
},
|
|
{
|
|
"epoch": 70.33,
|
|
"learning_rate": 5.050505050505051e-06,
|
|
"loss": 0.0477,
|
|
"step": 1600
|
|
},
|
|
{
|
|
"epoch": 70.77,
|
|
"learning_rate": 4.734848484848485e-06,
|
|
"loss": 0.0511,
|
|
"step": 1610
|
|
},
|
|
{
|
|
"epoch": 70.99,
|
|
"eval_accuracy": 0.6538461538461539,
|
|
"eval_loss": 1.9029183387756348,
|
|
"eval_runtime": 0.6361,
|
|
"eval_samples_per_second": 81.743,
|
|
"eval_steps_per_second": 6.288,
|
|
"step": 1615
|
|
},
|
|
{
|
|
"epoch": 71.21,
|
|
"learning_rate": 4.419191919191919e-06,
|
|
"loss": 0.058,
|
|
"step": 1620
|
|
},
|
|
{
|
|
"epoch": 71.65,
|
|
"learning_rate": 4.103535353535354e-06,
|
|
"loss": 0.0502,
|
|
"step": 1630
|
|
},
|
|
{
|
|
"epoch": 72.0,
|
|
"eval_accuracy": 0.6538461538461539,
|
|
"eval_loss": 1.8820760250091553,
|
|
"eval_runtime": 0.6281,
|
|
"eval_samples_per_second": 82.784,
|
|
"eval_steps_per_second": 6.368,
|
|
"step": 1638
|
|
},
|
|
{
|
|
"epoch": 72.09,
|
|
"learning_rate": 3.7878787878787882e-06,
|
|
"loss": 0.0507,
|
|
"step": 1640
|
|
},
|
|
{
|
|
"epoch": 72.53,
|
|
"learning_rate": 3.4722222222222224e-06,
|
|
"loss": 0.0497,
|
|
"step": 1650
|
|
},
|
|
{
|
|
"epoch": 72.97,
|
|
"learning_rate": 3.156565656565657e-06,
|
|
"loss": 0.0544,
|
|
"step": 1660
|
|
},
|
|
{
|
|
"epoch": 72.97,
|
|
"eval_accuracy": 0.6538461538461539,
|
|
"eval_loss": 1.9534863233566284,
|
|
"eval_runtime": 0.6302,
|
|
"eval_samples_per_second": 82.519,
|
|
"eval_steps_per_second": 6.348,
|
|
"step": 1660
|
|
},
|
|
{
|
|
"epoch": 73.41,
|
|
"learning_rate": 2.840909090909091e-06,
|
|
"loss": 0.0406,
|
|
"step": 1670
|
|
},
|
|
{
|
|
"epoch": 73.85,
|
|
"learning_rate": 2.5252525252525253e-06,
|
|
"loss": 0.0399,
|
|
"step": 1680
|
|
},
|
|
{
|
|
"epoch": 73.98,
|
|
"eval_accuracy": 0.6538461538461539,
|
|
"eval_loss": 1.8454902172088623,
|
|
"eval_runtime": 0.6792,
|
|
"eval_samples_per_second": 76.566,
|
|
"eval_steps_per_second": 5.89,
|
|
"step": 1683
|
|
},
|
|
{
|
|
"epoch": 74.29,
|
|
"learning_rate": 2.2095959595959595e-06,
|
|
"loss": 0.0517,
|
|
"step": 1690
|
|
},
|
|
{
|
|
"epoch": 74.73,
|
|
"learning_rate": 1.8939393939393941e-06,
|
|
"loss": 0.0561,
|
|
"step": 1700
|
|
},
|
|
{
|
|
"epoch": 74.99,
|
|
"eval_accuracy": 0.6538461538461539,
|
|
"eval_loss": 1.8289686441421509,
|
|
"eval_runtime": 0.6441,
|
|
"eval_samples_per_second": 80.727,
|
|
"eval_steps_per_second": 6.21,
|
|
"step": 1706
|
|
},
|
|
{
|
|
"epoch": 75.16,
|
|
"learning_rate": 1.5782828282828285e-06,
|
|
"loss": 0.0487,
|
|
"step": 1710
|
|
},
|
|
{
|
|
"epoch": 75.6,
|
|
"learning_rate": 1.2626262626262627e-06,
|
|
"loss": 0.041,
|
|
"step": 1720
|
|
},
|
|
{
|
|
"epoch": 76.0,
|
|
"eval_accuracy": 0.6538461538461539,
|
|
"eval_loss": 1.8427259922027588,
|
|
"eval_runtime": 0.6382,
|
|
"eval_samples_per_second": 81.484,
|
|
"eval_steps_per_second": 6.268,
|
|
"step": 1729
|
|
},
|
|
{
|
|
"epoch": 76.04,
|
|
"learning_rate": 9.469696969696971e-07,
|
|
"loss": 0.0387,
|
|
"step": 1730
|
|
},
|
|
{
|
|
"epoch": 76.48,
|
|
"learning_rate": 6.313131313131313e-07,
|
|
"loss": 0.0612,
|
|
"step": 1740
|
|
},
|
|
{
|
|
"epoch": 76.92,
|
|
"learning_rate": 3.1565656565656567e-07,
|
|
"loss": 0.0582,
|
|
"step": 1750
|
|
},
|
|
{
|
|
"epoch": 76.97,
|
|
"eval_accuracy": 0.6538461538461539,
|
|
"eval_loss": 1.8591234683990479,
|
|
"eval_runtime": 0.6637,
|
|
"eval_samples_per_second": 78.354,
|
|
"eval_steps_per_second": 6.027,
|
|
"step": 1751
|
|
},
|
|
{
|
|
"epoch": 77.36,
|
|
"learning_rate": 0.0,
|
|
"loss": 0.0315,
|
|
"step": 1760
|
|
},
|
|
{
|
|
"epoch": 77.36,
|
|
"eval_accuracy": 0.6538461538461539,
|
|
"eval_loss": 1.8611557483673096,
|
|
"eval_runtime": 1.1148,
|
|
"eval_samples_per_second": 46.647,
|
|
"eval_steps_per_second": 3.588,
|
|
"step": 1760
|
|
},
|
|
{
|
|
"epoch": 77.36,
|
|
"step": 1760,
|
|
"total_flos": 3.637414425770459e+18,
|
|
"train_loss": 0.22083052936941386,
|
|
"train_runtime": 1958.4327,
|
|
"train_samples_per_second": 59.027,
|
|
"train_steps_per_second": 0.899
|
|
}
|
|
],
|
|
"logging_steps": 10,
|
|
"max_steps": 1760,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 80,
|
|
"save_steps": 500,
|
|
"total_flos": 3.637414425770459e+18,
|
|
"train_batch_size": 16,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|
|
|