sidushdid's picture
Upload 9 files
e20387a verified
raw
history blame
45.8 kB
{
"best_metric": 0.9545454545454546,
"best_model_checkpoint": "ViT-base-16-224-7.5-1.5-1.5-split-lion-4\\checkpoint-943",
"epoch": 143.47826086956522,
"eval_steps": 500,
"global_step": 1650,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.96,
"eval_accuracy": 0.7077922077922078,
"eval_loss": 0.7381948232650757,
"eval_runtime": 1.6508,
"eval_samples_per_second": 93.289,
"eval_steps_per_second": 6.058,
"step": 11
},
{
"epoch": 1.74,
"learning_rate": 1.5780272002695102e-05,
"loss": 0.8102,
"step": 20
},
{
"epoch": 2.0,
"eval_accuracy": 0.8181818181818182,
"eval_loss": 0.5202796459197998,
"eval_runtime": 1.6447,
"eval_samples_per_second": 93.636,
"eval_steps_per_second": 6.08,
"step": 23
},
{
"epoch": 2.96,
"eval_accuracy": 0.8701298701298701,
"eval_loss": 0.4076531231403351,
"eval_runtime": 1.6584,
"eval_samples_per_second": 92.862,
"eval_steps_per_second": 6.03,
"step": 34
},
{
"epoch": 3.48,
"learning_rate": 1.3309654930856552e-05,
"loss": 0.4016,
"step": 40
},
{
"epoch": 4.0,
"eval_accuracy": 0.8636363636363636,
"eval_loss": 0.3777826428413391,
"eval_runtime": 1.6734,
"eval_samples_per_second": 92.03,
"eval_steps_per_second": 5.976,
"step": 46
},
{
"epoch": 4.96,
"eval_accuracy": 0.8766233766233766,
"eval_loss": 0.35903552174568176,
"eval_runtime": 1.6866,
"eval_samples_per_second": 91.308,
"eval_steps_per_second": 5.929,
"step": 57
},
{
"epoch": 5.22,
"learning_rate": 9.780401480557754e-06,
"loss": 0.2052,
"step": 60
},
{
"epoch": 6.0,
"eval_accuracy": 0.9025974025974026,
"eval_loss": 0.29355403780937195,
"eval_runtime": 1.7256,
"eval_samples_per_second": 89.245,
"eval_steps_per_second": 5.795,
"step": 69
},
{
"epoch": 6.96,
"learning_rate": 5.943306394074249e-06,
"loss": 0.0838,
"step": 80
},
{
"epoch": 6.96,
"eval_accuracy": 0.8961038961038961,
"eval_loss": 0.2710248529911041,
"eval_runtime": 1.6959,
"eval_samples_per_second": 90.805,
"eval_steps_per_second": 5.896,
"step": 80
},
{
"epoch": 8.0,
"eval_accuracy": 0.8961038961038961,
"eval_loss": 0.2775874435901642,
"eval_runtime": 1.6736,
"eval_samples_per_second": 92.018,
"eval_steps_per_second": 5.975,
"step": 92
},
{
"epoch": 8.7,
"learning_rate": 2.6146530177605546e-06,
"loss": 0.0407,
"step": 100
},
{
"epoch": 8.96,
"eval_accuracy": 0.9415584415584416,
"eval_loss": 0.22880351543426514,
"eval_runtime": 1.6916,
"eval_samples_per_second": 91.039,
"eval_steps_per_second": 5.912,
"step": 103
},
{
"epoch": 10.0,
"eval_accuracy": 0.9415584415584416,
"eval_loss": 0.2207733541727066,
"eval_runtime": 1.6896,
"eval_samples_per_second": 91.147,
"eval_steps_per_second": 5.919,
"step": 115
},
{
"epoch": 10.43,
"learning_rate": 5.025614934507641e-07,
"loss": 0.039,
"step": 120
},
{
"epoch": 10.96,
"eval_accuracy": 0.9415584415584416,
"eval_loss": 0.22476842999458313,
"eval_runtime": 1.7237,
"eval_samples_per_second": 89.342,
"eval_steps_per_second": 5.801,
"step": 126
},
{
"epoch": 12.0,
"eval_accuracy": 0.9025974025974026,
"eval_loss": 0.3085295557975769,
"eval_runtime": 1.727,
"eval_samples_per_second": 89.173,
"eval_steps_per_second": 5.79,
"step": 138
},
{
"epoch": 12.17,
"learning_rate": 1.6610319647849526e-05,
"loss": 0.0324,
"step": 140
},
{
"epoch": 12.96,
"eval_accuracy": 0.9285714285714286,
"eval_loss": 0.26784980297088623,
"eval_runtime": 1.7027,
"eval_samples_per_second": 90.446,
"eval_steps_per_second": 5.873,
"step": 149
},
{
"epoch": 13.91,
"learning_rate": 1.529573176177447e-05,
"loss": 0.022,
"step": 160
},
{
"epoch": 14.0,
"eval_accuracy": 0.948051948051948,
"eval_loss": 0.2529321610927582,
"eval_runtime": 1.7126,
"eval_samples_per_second": 89.923,
"eval_steps_per_second": 5.839,
"step": 161
},
{
"epoch": 14.96,
"eval_accuracy": 0.922077922077922,
"eval_loss": 0.24028430879116058,
"eval_runtime": 1.7176,
"eval_samples_per_second": 89.659,
"eval_steps_per_second": 5.822,
"step": 172
},
{
"epoch": 15.65,
"learning_rate": 1.25e-05,
"loss": 0.012,
"step": 180
},
{
"epoch": 16.0,
"eval_accuracy": 0.9090909090909091,
"eval_loss": 0.3513343632221222,
"eval_runtime": 1.7476,
"eval_samples_per_second": 88.12,
"eval_steps_per_second": 5.722,
"step": 184
},
{
"epoch": 16.96,
"eval_accuracy": 0.935064935064935,
"eval_loss": 0.3014402389526367,
"eval_runtime": 1.7276,
"eval_samples_per_second": 89.139,
"eval_steps_per_second": 5.788,
"step": 195
},
{
"epoch": 17.39,
"learning_rate": 8.817873574253966e-06,
"loss": 0.0097,
"step": 200
},
{
"epoch": 18.0,
"eval_accuracy": 0.935064935064935,
"eval_loss": 0.31746622920036316,
"eval_runtime": 1.9143,
"eval_samples_per_second": 80.446,
"eval_steps_per_second": 5.224,
"step": 207
},
{
"epoch": 18.96,
"eval_accuracy": 0.935064935064935,
"eval_loss": 0.2747339904308319,
"eval_runtime": 1.7322,
"eval_samples_per_second": 88.904,
"eval_steps_per_second": 5.773,
"step": 218
},
{
"epoch": 19.13,
"learning_rate": 5.03266861634036e-06,
"loss": 0.0052,
"step": 220
},
{
"epoch": 20.0,
"eval_accuracy": 0.935064935064935,
"eval_loss": 0.2932997941970825,
"eval_runtime": 1.7167,
"eval_samples_per_second": 89.706,
"eval_steps_per_second": 5.825,
"step": 230
},
{
"epoch": 20.87,
"learning_rate": 1.9496296406751813e-06,
"loss": 0.009,
"step": 240
},
{
"epoch": 20.96,
"eval_accuracy": 0.9415584415584416,
"eval_loss": 0.28077924251556396,
"eval_runtime": 1.7005,
"eval_samples_per_second": 90.563,
"eval_steps_per_second": 5.881,
"step": 241
},
{
"epoch": 22.0,
"eval_accuracy": 0.935064935064935,
"eval_loss": 0.29576078057289124,
"eval_runtime": 1.7368,
"eval_samples_per_second": 88.67,
"eval_steps_per_second": 5.758,
"step": 253
},
{
"epoch": 22.61,
"learning_rate": 2.246260785014683e-07,
"loss": 0.0115,
"step": 260
},
{
"epoch": 22.96,
"eval_accuracy": 0.935064935064935,
"eval_loss": 0.2983975112438202,
"eval_runtime": 1.7116,
"eval_samples_per_second": 89.976,
"eval_steps_per_second": 5.843,
"step": 264
},
{
"epoch": 24.0,
"eval_accuracy": 0.9285714285714286,
"eval_loss": 0.3521440327167511,
"eval_runtime": 1.7503,
"eval_samples_per_second": 87.986,
"eval_steps_per_second": 5.713,
"step": 276
},
{
"epoch": 24.35,
"learning_rate": 1.64420405881652e-05,
"loss": 0.0104,
"step": 280
},
{
"epoch": 24.96,
"eval_accuracy": 0.9025974025974026,
"eval_loss": 0.4289417862892151,
"eval_runtime": 1.7466,
"eval_samples_per_second": 88.173,
"eval_steps_per_second": 5.726,
"step": 287
},
{
"epoch": 26.0,
"eval_accuracy": 0.9025974025974026,
"eval_loss": 0.6231942176818848,
"eval_runtime": 1.7359,
"eval_samples_per_second": 88.715,
"eval_steps_per_second": 5.761,
"step": 299
},
{
"epoch": 26.09,
"learning_rate": 1.4717037025991483e-05,
"loss": 0.0086,
"step": 300
},
{
"epoch": 26.96,
"eval_accuracy": 0.9090909090909091,
"eval_loss": 0.5161650776863098,
"eval_runtime": 1.7289,
"eval_samples_per_second": 89.075,
"eval_steps_per_second": 5.784,
"step": 310
},
{
"epoch": 27.83,
"learning_rate": 1.1633998050326307e-05,
"loss": 0.0205,
"step": 320
},
{
"epoch": 28.0,
"eval_accuracy": 0.8896103896103896,
"eval_loss": 0.6762561798095703,
"eval_runtime": 1.7546,
"eval_samples_per_second": 87.77,
"eval_steps_per_second": 5.699,
"step": 322
},
{
"epoch": 28.96,
"eval_accuracy": 0.9155844155844156,
"eval_loss": 0.4664335250854492,
"eval_runtime": 1.7617,
"eval_samples_per_second": 87.414,
"eval_steps_per_second": 5.676,
"step": 333
},
{
"epoch": 29.57,
"learning_rate": 7.848793092412702e-06,
"loss": 0.012,
"step": 340
},
{
"epoch": 30.0,
"eval_accuracy": 0.9285714285714286,
"eval_loss": 0.3841441869735718,
"eval_runtime": 1.7019,
"eval_samples_per_second": 90.487,
"eval_steps_per_second": 5.876,
"step": 345
},
{
"epoch": 30.96,
"eval_accuracy": 0.9155844155844156,
"eval_loss": 0.42462781071662903,
"eval_runtime": 1.741,
"eval_samples_per_second": 88.455,
"eval_steps_per_second": 5.744,
"step": 356
},
{
"epoch": 31.3,
"learning_rate": 4.1666666666666686e-06,
"loss": 0.0061,
"step": 360
},
{
"epoch": 32.0,
"eval_accuracy": 0.9155844155844156,
"eval_loss": 0.43087005615234375,
"eval_runtime": 1.7385,
"eval_samples_per_second": 88.582,
"eval_steps_per_second": 5.752,
"step": 368
},
{
"epoch": 32.96,
"eval_accuracy": 0.9155844155844156,
"eval_loss": 0.45713886618614197,
"eval_runtime": 1.7098,
"eval_samples_per_second": 90.069,
"eval_steps_per_second": 5.849,
"step": 379
},
{
"epoch": 33.04,
"learning_rate": 1.3709349048921951e-06,
"loss": 0.0093,
"step": 380
},
{
"epoch": 34.0,
"eval_accuracy": 0.9090909090909091,
"eval_loss": 0.48606640100479126,
"eval_runtime": 1.7668,
"eval_samples_per_second": 87.163,
"eval_steps_per_second": 5.66,
"step": 391
},
{
"epoch": 34.78,
"learning_rate": 5.634701881714148e-08,
"loss": 0.0101,
"step": 400
},
{
"epoch": 34.96,
"eval_accuracy": 0.9090909090909091,
"eval_loss": 0.49100440740585327,
"eval_runtime": 1.7267,
"eval_samples_per_second": 89.186,
"eval_steps_per_second": 5.791,
"step": 402
},
{
"epoch": 36.0,
"eval_accuracy": 0.8961038961038961,
"eval_loss": 0.5978976488113403,
"eval_runtime": 1.7357,
"eval_samples_per_second": 88.725,
"eval_steps_per_second": 5.761,
"step": 414
},
{
"epoch": 36.52,
"learning_rate": 1.6164105173215904e-05,
"loss": 0.011,
"step": 420
},
{
"epoch": 36.96,
"eval_accuracy": 0.9090909090909091,
"eval_loss": 0.3977736830711365,
"eval_runtime": 1.7968,
"eval_samples_per_second": 85.708,
"eval_steps_per_second": 5.565,
"step": 425
},
{
"epoch": 38.0,
"eval_accuracy": 0.935064935064935,
"eval_loss": 0.40591639280319214,
"eval_runtime": 1.7136,
"eval_samples_per_second": 89.87,
"eval_steps_per_second": 5.836,
"step": 437
},
{
"epoch": 38.26,
"learning_rate": 1.4052013648906114e-05,
"loss": 0.0226,
"step": 440
},
{
"epoch": 38.96,
"eval_accuracy": 0.9090909090909091,
"eval_loss": 0.4942101240158081,
"eval_runtime": 1.7437,
"eval_samples_per_second": 88.318,
"eval_steps_per_second": 5.735,
"step": 448
},
{
"epoch": 40.0,
"learning_rate": 1.0723360272592418e-05,
"loss": 0.0118,
"step": 460
},
{
"epoch": 40.0,
"eval_accuracy": 0.8896103896103896,
"eval_loss": 0.7233626842498779,
"eval_runtime": 1.7317,
"eval_samples_per_second": 88.928,
"eval_steps_per_second": 5.775,
"step": 460
},
{
"epoch": 40.96,
"eval_accuracy": 0.9155844155844156,
"eval_loss": 0.5826935172080994,
"eval_runtime": 1.7439,
"eval_samples_per_second": 88.31,
"eval_steps_per_second": 5.734,
"step": 471
},
{
"epoch": 41.74,
"learning_rate": 6.886265186108914e-06,
"loss": 0.011,
"step": 480
},
{
"epoch": 42.0,
"eval_accuracy": 0.8961038961038961,
"eval_loss": 0.662550687789917,
"eval_runtime": 1.7627,
"eval_samples_per_second": 87.366,
"eval_steps_per_second": 5.673,
"step": 483
},
{
"epoch": 42.96,
"eval_accuracy": 0.9090909090909091,
"eval_loss": 0.5871102213859558,
"eval_runtime": 1.7486,
"eval_samples_per_second": 88.072,
"eval_steps_per_second": 5.719,
"step": 494
},
{
"epoch": 43.48,
"learning_rate": 3.3570117358101172e-06,
"loss": 0.0003,
"step": 500
},
{
"epoch": 44.0,
"eval_accuracy": 0.9090909090909091,
"eval_loss": 0.5640321969985962,
"eval_runtime": 1.6975,
"eval_samples_per_second": 90.721,
"eval_steps_per_second": 5.891,
"step": 506
},
{
"epoch": 44.96,
"eval_accuracy": 0.9090909090909091,
"eval_loss": 0.5655084848403931,
"eval_runtime": 1.7184,
"eval_samples_per_second": 89.619,
"eval_steps_per_second": 5.819,
"step": 517
},
{
"epoch": 45.22,
"learning_rate": 8.863946639715635e-07,
"loss": 0.0005,
"step": 520
},
{
"epoch": 46.0,
"eval_accuracy": 0.9090909090909091,
"eval_loss": 0.5844298601150513,
"eval_runtime": 1.7344,
"eval_samples_per_second": 88.793,
"eval_steps_per_second": 5.766,
"step": 529
},
{
"epoch": 46.96,
"learning_rate": 1.6666666666666667e-05,
"loss": 0.0064,
"step": 540
},
{
"epoch": 46.96,
"eval_accuracy": 0.9090909090909091,
"eval_loss": 0.5887525677680969,
"eval_runtime": 1.7094,
"eval_samples_per_second": 90.091,
"eval_steps_per_second": 5.85,
"step": 540
},
{
"epoch": 48.0,
"eval_accuracy": 0.8896103896103896,
"eval_loss": 0.6656709313392639,
"eval_runtime": 1.7414,
"eval_samples_per_second": 88.435,
"eval_steps_per_second": 5.743,
"step": 552
},
{
"epoch": 48.7,
"learning_rate": 1.5780272002695102e-05,
"loss": 0.0084,
"step": 560
},
{
"epoch": 48.96,
"eval_accuracy": 0.8961038961038961,
"eval_loss": 0.7156269550323486,
"eval_runtime": 1.7014,
"eval_samples_per_second": 90.514,
"eval_steps_per_second": 5.878,
"step": 563
},
{
"epoch": 50.0,
"eval_accuracy": 0.8766233766233766,
"eval_loss": 0.9346238970756531,
"eval_runtime": 1.7164,
"eval_samples_per_second": 89.724,
"eval_steps_per_second": 5.826,
"step": 575
},
{
"epoch": 50.43,
"learning_rate": 1.3309654930856552e-05,
"loss": 0.0318,
"step": 580
},
{
"epoch": 50.96,
"eval_accuracy": 0.8961038961038961,
"eval_loss": 0.8030693531036377,
"eval_runtime": 1.6994,
"eval_samples_per_second": 90.621,
"eval_steps_per_second": 5.884,
"step": 586
},
{
"epoch": 52.0,
"eval_accuracy": 0.8961038961038961,
"eval_loss": 0.5700052976608276,
"eval_runtime": 1.7294,
"eval_samples_per_second": 89.049,
"eval_steps_per_second": 5.782,
"step": 598
},
{
"epoch": 52.17,
"learning_rate": 9.780401480557754e-06,
"loss": 0.0338,
"step": 600
},
{
"epoch": 52.96,
"eval_accuracy": 0.9155844155844156,
"eval_loss": 0.40834710001945496,
"eval_runtime": 1.6964,
"eval_samples_per_second": 90.781,
"eval_steps_per_second": 5.895,
"step": 609
},
{
"epoch": 53.91,
"learning_rate": 5.943306394074249e-06,
"loss": 0.0147,
"step": 620
},
{
"epoch": 54.0,
"eval_accuracy": 0.9155844155844156,
"eval_loss": 0.41295498609542847,
"eval_runtime": 1.7144,
"eval_samples_per_second": 89.828,
"eval_steps_per_second": 5.833,
"step": 621
},
{
"epoch": 54.96,
"eval_accuracy": 0.9285714285714286,
"eval_loss": 0.40372058749198914,
"eval_runtime": 1.7344,
"eval_samples_per_second": 88.792,
"eval_steps_per_second": 5.766,
"step": 632
},
{
"epoch": 55.65,
"learning_rate": 2.6146530177605546e-06,
"loss": 0.0011,
"step": 640
},
{
"epoch": 56.0,
"eval_accuracy": 0.9090909090909091,
"eval_loss": 0.36978378891944885,
"eval_runtime": 1.7234,
"eval_samples_per_second": 89.359,
"eval_steps_per_second": 5.803,
"step": 644
},
{
"epoch": 56.96,
"eval_accuracy": 0.9155844155844156,
"eval_loss": 0.38696253299713135,
"eval_runtime": 1.7014,
"eval_samples_per_second": 90.515,
"eval_steps_per_second": 5.878,
"step": 655
},
{
"epoch": 57.39,
"learning_rate": 5.025614934507641e-07,
"loss": 0.0021,
"step": 660
},
{
"epoch": 58.0,
"eval_accuracy": 0.9090909090909091,
"eval_loss": 0.39924710988998413,
"eval_runtime": 1.6854,
"eval_samples_per_second": 91.374,
"eval_steps_per_second": 5.933,
"step": 667
},
{
"epoch": 58.96,
"eval_accuracy": 0.9155844155844156,
"eval_loss": 0.44311344623565674,
"eval_runtime": 1.6854,
"eval_samples_per_second": 91.374,
"eval_steps_per_second": 5.933,
"step": 678
},
{
"epoch": 59.13,
"learning_rate": 1.6610319647849526e-05,
"loss": 0.0002,
"step": 680
},
{
"epoch": 60.0,
"eval_accuracy": 0.9090909090909091,
"eval_loss": 0.44143199920654297,
"eval_runtime": 1.7504,
"eval_samples_per_second": 87.98,
"eval_steps_per_second": 5.713,
"step": 690
},
{
"epoch": 60.87,
"learning_rate": 1.529573176177447e-05,
"loss": 0.0088,
"step": 700
},
{
"epoch": 60.96,
"eval_accuracy": 0.9415584415584416,
"eval_loss": 0.4255146086215973,
"eval_runtime": 1.7144,
"eval_samples_per_second": 89.828,
"eval_steps_per_second": 5.833,
"step": 701
},
{
"epoch": 62.0,
"eval_accuracy": 0.922077922077922,
"eval_loss": 0.4168331027030945,
"eval_runtime": 1.7434,
"eval_samples_per_second": 88.334,
"eval_steps_per_second": 5.736,
"step": 713
},
{
"epoch": 62.61,
"learning_rate": 1.25e-05,
"loss": 0.0061,
"step": 720
},
{
"epoch": 62.96,
"eval_accuracy": 0.922077922077922,
"eval_loss": 0.49312305450439453,
"eval_runtime": 1.7334,
"eval_samples_per_second": 88.843,
"eval_steps_per_second": 5.769,
"step": 724
},
{
"epoch": 64.0,
"eval_accuracy": 0.948051948051948,
"eval_loss": 0.28522124886512756,
"eval_runtime": 1.7184,
"eval_samples_per_second": 89.619,
"eval_steps_per_second": 5.819,
"step": 736
},
{
"epoch": 64.35,
"learning_rate": 8.817873574253966e-06,
"loss": 0.0179,
"step": 740
},
{
"epoch": 64.96,
"eval_accuracy": 0.9285714285714286,
"eval_loss": 0.44120827317237854,
"eval_runtime": 1.7364,
"eval_samples_per_second": 88.69,
"eval_steps_per_second": 5.759,
"step": 747
},
{
"epoch": 66.0,
"eval_accuracy": 0.935064935064935,
"eval_loss": 0.331409215927124,
"eval_runtime": 1.7124,
"eval_samples_per_second": 89.933,
"eval_steps_per_second": 5.84,
"step": 759
},
{
"epoch": 66.09,
"learning_rate": 5.03266861634036e-06,
"loss": 0.0014,
"step": 760
},
{
"epoch": 66.96,
"eval_accuracy": 0.9415584415584416,
"eval_loss": 0.2971489727497101,
"eval_runtime": 1.7494,
"eval_samples_per_second": 88.031,
"eval_steps_per_second": 5.716,
"step": 770
},
{
"epoch": 67.83,
"learning_rate": 1.9496296406751813e-06,
"loss": 0.0199,
"step": 780
},
{
"epoch": 68.0,
"eval_accuracy": 0.9285714285714286,
"eval_loss": 0.3261590301990509,
"eval_runtime": 1.7504,
"eval_samples_per_second": 87.98,
"eval_steps_per_second": 5.713,
"step": 782
},
{
"epoch": 68.96,
"eval_accuracy": 0.935064935064935,
"eval_loss": 0.3835467994213104,
"eval_runtime": 1.7824,
"eval_samples_per_second": 86.401,
"eval_steps_per_second": 5.61,
"step": 793
},
{
"epoch": 69.57,
"learning_rate": 2.246260785014683e-07,
"loss": 0.0091,
"step": 800
},
{
"epoch": 70.0,
"eval_accuracy": 0.9285714285714286,
"eval_loss": 0.3903743326663971,
"eval_runtime": 1.8425,
"eval_samples_per_second": 83.582,
"eval_steps_per_second": 5.427,
"step": 805
},
{
"epoch": 70.96,
"eval_accuracy": 0.9285714285714286,
"eval_loss": 0.47730717062950134,
"eval_runtime": 1.8005,
"eval_samples_per_second": 85.531,
"eval_steps_per_second": 5.554,
"step": 816
},
{
"epoch": 71.3,
"learning_rate": 1.64420405881652e-05,
"loss": 0.0029,
"step": 820
},
{
"epoch": 72.0,
"eval_accuracy": 0.922077922077922,
"eval_loss": 0.4937627613544464,
"eval_runtime": 1.7875,
"eval_samples_per_second": 86.152,
"eval_steps_per_second": 5.594,
"step": 828
},
{
"epoch": 72.96,
"eval_accuracy": 0.948051948051948,
"eval_loss": 0.456775426864624,
"eval_runtime": 1.7718,
"eval_samples_per_second": 86.918,
"eval_steps_per_second": 5.644,
"step": 839
},
{
"epoch": 73.04,
"learning_rate": 1.4717037025991483e-05,
"loss": 0.0224,
"step": 840
},
{
"epoch": 74.0,
"eval_accuracy": 0.9285714285714286,
"eval_loss": 0.4465040862560272,
"eval_runtime": 1.7516,
"eval_samples_per_second": 87.92,
"eval_steps_per_second": 5.709,
"step": 851
},
{
"epoch": 74.78,
"learning_rate": 1.1633998050326307e-05,
"loss": 0.0045,
"step": 860
},
{
"epoch": 74.96,
"eval_accuracy": 0.9155844155844156,
"eval_loss": 0.6093705296516418,
"eval_runtime": 1.8028,
"eval_samples_per_second": 85.421,
"eval_steps_per_second": 5.547,
"step": 862
},
{
"epoch": 76.0,
"eval_accuracy": 0.9155844155844156,
"eval_loss": 0.6924065947532654,
"eval_runtime": 1.7779,
"eval_samples_per_second": 86.617,
"eval_steps_per_second": 5.624,
"step": 874
},
{
"epoch": 76.52,
"learning_rate": 7.848793092412702e-06,
"loss": 0.0088,
"step": 880
},
{
"epoch": 76.96,
"eval_accuracy": 0.9155844155844156,
"eval_loss": 0.48120445013046265,
"eval_runtime": 1.7635,
"eval_samples_per_second": 87.327,
"eval_steps_per_second": 5.671,
"step": 885
},
{
"epoch": 78.0,
"eval_accuracy": 0.9090909090909091,
"eval_loss": 0.6367415189743042,
"eval_runtime": 1.7799,
"eval_samples_per_second": 86.522,
"eval_steps_per_second": 5.618,
"step": 897
},
{
"epoch": 78.26,
"learning_rate": 4.1666666666666686e-06,
"loss": 0.0033,
"step": 900
},
{
"epoch": 78.96,
"eval_accuracy": 0.922077922077922,
"eval_loss": 0.49070408940315247,
"eval_runtime": 1.7676,
"eval_samples_per_second": 87.125,
"eval_steps_per_second": 5.657,
"step": 908
},
{
"epoch": 80.0,
"learning_rate": 1.3709349048921951e-06,
"loss": 0.0076,
"step": 920
},
{
"epoch": 80.0,
"eval_accuracy": 0.9415584415584416,
"eval_loss": 0.31148040294647217,
"eval_runtime": 1.7496,
"eval_samples_per_second": 88.021,
"eval_steps_per_second": 5.716,
"step": 920
},
{
"epoch": 80.96,
"eval_accuracy": 0.948051948051948,
"eval_loss": 0.2700955271720886,
"eval_runtime": 1.7827,
"eval_samples_per_second": 86.387,
"eval_steps_per_second": 5.61,
"step": 931
},
{
"epoch": 81.74,
"learning_rate": 5.634701881714148e-08,
"loss": 0.0002,
"step": 940
},
{
"epoch": 82.0,
"eval_accuracy": 0.9545454545454546,
"eval_loss": 0.2613329589366913,
"eval_runtime": 1.7597,
"eval_samples_per_second": 87.515,
"eval_steps_per_second": 5.683,
"step": 943
},
{
"epoch": 82.96,
"eval_accuracy": 0.9155844155844156,
"eval_loss": 0.40443289279937744,
"eval_runtime": 1.7637,
"eval_samples_per_second": 87.314,
"eval_steps_per_second": 5.67,
"step": 954
},
{
"epoch": 83.48,
"learning_rate": 1.6164105173215904e-05,
"loss": 0.0193,
"step": 960
},
{
"epoch": 84.0,
"eval_accuracy": 0.9025974025974026,
"eval_loss": 0.9613493084907532,
"eval_runtime": 1.7854,
"eval_samples_per_second": 86.255,
"eval_steps_per_second": 5.601,
"step": 966
},
{
"epoch": 84.96,
"eval_accuracy": 0.9025974025974026,
"eval_loss": 0.6934040784835815,
"eval_runtime": 1.7617,
"eval_samples_per_second": 87.414,
"eval_steps_per_second": 5.676,
"step": 977
},
{
"epoch": 85.22,
"learning_rate": 1.4052013648906114e-05,
"loss": 0.0238,
"step": 980
},
{
"epoch": 86.0,
"eval_accuracy": 0.8896103896103896,
"eval_loss": 0.9348794221878052,
"eval_runtime": 1.7915,
"eval_samples_per_second": 85.962,
"eval_steps_per_second": 5.582,
"step": 989
},
{
"epoch": 86.96,
"learning_rate": 1.0723360272592418e-05,
"loss": 0.011,
"step": 1000
},
{
"epoch": 86.96,
"eval_accuracy": 0.9025974025974026,
"eval_loss": 0.8836289644241333,
"eval_runtime": 1.8098,
"eval_samples_per_second": 85.095,
"eval_steps_per_second": 5.526,
"step": 1000
},
{
"epoch": 88.0,
"eval_accuracy": 0.9025974025974026,
"eval_loss": 0.7403988838195801,
"eval_runtime": 1.7608,
"eval_samples_per_second": 87.459,
"eval_steps_per_second": 5.679,
"step": 1012
},
{
"epoch": 88.7,
"learning_rate": 6.886265186108914e-06,
"loss": 0.018,
"step": 1020
},
{
"epoch": 88.96,
"eval_accuracy": 0.9155844155844156,
"eval_loss": 0.5259799957275391,
"eval_runtime": 1.7749,
"eval_samples_per_second": 86.764,
"eval_steps_per_second": 5.634,
"step": 1023
},
{
"epoch": 90.0,
"eval_accuracy": 0.9155844155844156,
"eval_loss": 0.5202356576919556,
"eval_runtime": 1.7868,
"eval_samples_per_second": 86.187,
"eval_steps_per_second": 5.597,
"step": 1035
},
{
"epoch": 90.43,
"learning_rate": 3.3570117358101172e-06,
"loss": 0.0041,
"step": 1040
},
{
"epoch": 90.96,
"eval_accuracy": 0.922077922077922,
"eval_loss": 0.48472902178764343,
"eval_runtime": 1.7845,
"eval_samples_per_second": 86.297,
"eval_steps_per_second": 5.604,
"step": 1046
},
{
"epoch": 92.0,
"eval_accuracy": 0.9155844155844156,
"eval_loss": 0.48678579926490784,
"eval_runtime": 1.7317,
"eval_samples_per_second": 88.93,
"eval_steps_per_second": 5.775,
"step": 1058
},
{
"epoch": 92.17,
"learning_rate": 8.863946639715635e-07,
"loss": 0.001,
"step": 1060
},
{
"epoch": 92.96,
"eval_accuracy": 0.9090909090909091,
"eval_loss": 0.4975196421146393,
"eval_runtime": 1.7866,
"eval_samples_per_second": 86.198,
"eval_steps_per_second": 5.597,
"step": 1069
},
{
"epoch": 93.91,
"learning_rate": 1.6666666666666667e-05,
"loss": 0.0014,
"step": 1080
},
{
"epoch": 94.0,
"eval_accuracy": 0.922077922077922,
"eval_loss": 0.6255179643630981,
"eval_runtime": 1.7777,
"eval_samples_per_second": 86.631,
"eval_steps_per_second": 5.625,
"step": 1081
},
{
"epoch": 94.96,
"eval_accuracy": 0.8766233766233766,
"eval_loss": 0.9968315362930298,
"eval_runtime": 1.7619,
"eval_samples_per_second": 87.408,
"eval_steps_per_second": 5.676,
"step": 1092
},
{
"epoch": 95.65,
"learning_rate": 1.5780272002695102e-05,
"loss": 0.0165,
"step": 1100
},
{
"epoch": 96.0,
"eval_accuracy": 0.9155844155844156,
"eval_loss": 0.6173205971717834,
"eval_runtime": 1.8105,
"eval_samples_per_second": 85.06,
"eval_steps_per_second": 5.523,
"step": 1104
},
{
"epoch": 96.96,
"eval_accuracy": 0.9155844155844156,
"eval_loss": 0.7537987232208252,
"eval_runtime": 1.7727,
"eval_samples_per_second": 86.873,
"eval_steps_per_second": 5.641,
"step": 1115
},
{
"epoch": 97.39,
"learning_rate": 1.3309654930856552e-05,
"loss": 0.013,
"step": 1120
},
{
"epoch": 98.0,
"eval_accuracy": 0.8766233766233766,
"eval_loss": 0.7560279965400696,
"eval_runtime": 1.7715,
"eval_samples_per_second": 86.931,
"eval_steps_per_second": 5.645,
"step": 1127
},
{
"epoch": 98.96,
"eval_accuracy": 0.9155844155844156,
"eval_loss": 0.5807818174362183,
"eval_runtime": 1.79,
"eval_samples_per_second": 86.032,
"eval_steps_per_second": 5.586,
"step": 1138
},
{
"epoch": 99.13,
"learning_rate": 9.780401480557754e-06,
"loss": 0.0237,
"step": 1140
},
{
"epoch": 100.0,
"eval_accuracy": 0.9090909090909091,
"eval_loss": 0.5147651433944702,
"eval_runtime": 1.7527,
"eval_samples_per_second": 87.864,
"eval_steps_per_second": 5.705,
"step": 1150
},
{
"epoch": 100.87,
"learning_rate": 5.943306394074249e-06,
"loss": 0.0061,
"step": 1160
},
{
"epoch": 100.96,
"eval_accuracy": 0.9090909090909091,
"eval_loss": 0.5672047734260559,
"eval_runtime": 1.7885,
"eval_samples_per_second": 86.106,
"eval_steps_per_second": 5.591,
"step": 1161
},
{
"epoch": 102.0,
"eval_accuracy": 0.9155844155844156,
"eval_loss": 0.4343276619911194,
"eval_runtime": 1.7759,
"eval_samples_per_second": 86.714,
"eval_steps_per_second": 5.631,
"step": 1173
},
{
"epoch": 102.61,
"learning_rate": 2.6146530177605546e-06,
"loss": 0.002,
"step": 1180
},
{
"epoch": 102.96,
"eval_accuracy": 0.9155844155844156,
"eval_loss": 0.32392024993896484,
"eval_runtime": 1.8027,
"eval_samples_per_second": 85.425,
"eval_steps_per_second": 5.547,
"step": 1184
},
{
"epoch": 104.0,
"eval_accuracy": 0.9285714285714286,
"eval_loss": 0.2951604127883911,
"eval_runtime": 1.8006,
"eval_samples_per_second": 85.525,
"eval_steps_per_second": 5.554,
"step": 1196
},
{
"epoch": 104.35,
"learning_rate": 5.025614934507641e-07,
"loss": 0.0005,
"step": 1200
},
{
"epoch": 104.96,
"eval_accuracy": 0.9285714285714286,
"eval_loss": 0.2926579713821411,
"eval_runtime": 1.7666,
"eval_samples_per_second": 87.174,
"eval_steps_per_second": 5.661,
"step": 1207
},
{
"epoch": 106.0,
"eval_accuracy": 0.9285714285714286,
"eval_loss": 0.3511568605899811,
"eval_runtime": 1.7794,
"eval_samples_per_second": 86.546,
"eval_steps_per_second": 5.62,
"step": 1219
},
{
"epoch": 106.09,
"learning_rate": 1.6610319647849526e-05,
"loss": 0.0003,
"step": 1220
},
{
"epoch": 106.96,
"eval_accuracy": 0.922077922077922,
"eval_loss": 0.4030219316482544,
"eval_runtime": 1.7918,
"eval_samples_per_second": 85.945,
"eval_steps_per_second": 5.581,
"step": 1230
},
{
"epoch": 107.83,
"learning_rate": 1.529573176177447e-05,
"loss": 0.0023,
"step": 1240
},
{
"epoch": 108.0,
"eval_accuracy": 0.8896103896103896,
"eval_loss": 0.3911021053791046,
"eval_runtime": 1.7635,
"eval_samples_per_second": 87.328,
"eval_steps_per_second": 5.671,
"step": 1242
},
{
"epoch": 108.96,
"eval_accuracy": 0.9090909090909091,
"eval_loss": 0.5156851410865784,
"eval_runtime": 1.7917,
"eval_samples_per_second": 85.953,
"eval_steps_per_second": 5.581,
"step": 1253
},
{
"epoch": 109.57,
"learning_rate": 1.25e-05,
"loss": 0.0114,
"step": 1260
},
{
"epoch": 110.0,
"eval_accuracy": 0.9155844155844156,
"eval_loss": 0.5531629323959351,
"eval_runtime": 1.7244,
"eval_samples_per_second": 89.307,
"eval_steps_per_second": 5.799,
"step": 1265
},
{
"epoch": 110.96,
"eval_accuracy": 0.9090909090909091,
"eval_loss": 0.6447522640228271,
"eval_runtime": 1.7304,
"eval_samples_per_second": 88.997,
"eval_steps_per_second": 5.779,
"step": 1276
},
{
"epoch": 111.3,
"learning_rate": 8.817873574253966e-06,
"loss": 0.0003,
"step": 1280
},
{
"epoch": 112.0,
"eval_accuracy": 0.9090909090909091,
"eval_loss": 0.6866676807403564,
"eval_runtime": 1.7494,
"eval_samples_per_second": 88.031,
"eval_steps_per_second": 5.716,
"step": 1288
},
{
"epoch": 112.96,
"eval_accuracy": 0.9025974025974026,
"eval_loss": 0.7411206960678101,
"eval_runtime": 1.7444,
"eval_samples_per_second": 88.283,
"eval_steps_per_second": 5.733,
"step": 1299
},
{
"epoch": 113.04,
"learning_rate": 5.03266861634036e-06,
"loss": 0.0153,
"step": 1300
},
{
"epoch": 114.0,
"eval_accuracy": 0.8896103896103896,
"eval_loss": 0.7488161325454712,
"eval_runtime": 1.7644,
"eval_samples_per_second": 87.282,
"eval_steps_per_second": 5.668,
"step": 1311
},
{
"epoch": 114.78,
"learning_rate": 1.9496296406751813e-06,
"loss": 0.0039,
"step": 1320
},
{
"epoch": 114.96,
"eval_accuracy": 0.9025974025974026,
"eval_loss": 0.6945769786834717,
"eval_runtime": 1.7344,
"eval_samples_per_second": 88.792,
"eval_steps_per_second": 5.766,
"step": 1322
},
{
"epoch": 116.0,
"eval_accuracy": 0.9025974025974026,
"eval_loss": 0.7218338847160339,
"eval_runtime": 1.7644,
"eval_samples_per_second": 87.281,
"eval_steps_per_second": 5.668,
"step": 1334
},
{
"epoch": 116.52,
"learning_rate": 2.246260785014683e-07,
"loss": 0.0002,
"step": 1340
},
{
"epoch": 116.96,
"eval_accuracy": 0.9025974025974026,
"eval_loss": 0.7305352091789246,
"eval_runtime": 1.7634,
"eval_samples_per_second": 87.332,
"eval_steps_per_second": 5.671,
"step": 1345
},
{
"epoch": 118.0,
"eval_accuracy": 0.8701298701298701,
"eval_loss": 1.0061231851577759,
"eval_runtime": 1.7854,
"eval_samples_per_second": 86.255,
"eval_steps_per_second": 5.601,
"step": 1357
},
{
"epoch": 118.26,
"learning_rate": 1.64420405881652e-05,
"loss": 0.0066,
"step": 1360
},
{
"epoch": 118.96,
"eval_accuracy": 0.9025974025974026,
"eval_loss": 0.5966177582740784,
"eval_runtime": 1.7834,
"eval_samples_per_second": 86.352,
"eval_steps_per_second": 5.607,
"step": 1368
},
{
"epoch": 120.0,
"learning_rate": 1.4717037025991483e-05,
"loss": 0.0083,
"step": 1380
},
{
"epoch": 120.0,
"eval_accuracy": 0.8636363636363636,
"eval_loss": 1.1087840795516968,
"eval_runtime": 1.7834,
"eval_samples_per_second": 86.352,
"eval_steps_per_second": 5.607,
"step": 1380
},
{
"epoch": 120.96,
"eval_accuracy": 0.8831168831168831,
"eval_loss": 0.821342945098877,
"eval_runtime": 1.7534,
"eval_samples_per_second": 87.83,
"eval_steps_per_second": 5.703,
"step": 1391
},
{
"epoch": 121.74,
"learning_rate": 1.1633998050326307e-05,
"loss": 0.0202,
"step": 1400
},
{
"epoch": 122.0,
"eval_accuracy": 0.9155844155844156,
"eval_loss": 0.5226480960845947,
"eval_runtime": 1.7954,
"eval_samples_per_second": 85.775,
"eval_steps_per_second": 5.57,
"step": 1403
},
{
"epoch": 122.96,
"eval_accuracy": 0.922077922077922,
"eval_loss": 0.44853323698043823,
"eval_runtime": 1.7774,
"eval_samples_per_second": 86.643,
"eval_steps_per_second": 5.626,
"step": 1414
},
{
"epoch": 123.48,
"learning_rate": 7.848793092412702e-06,
"loss": 0.0033,
"step": 1420
},
{
"epoch": 124.0,
"eval_accuracy": 0.9090909090909091,
"eval_loss": 0.7281427979469299,
"eval_runtime": 1.8004,
"eval_samples_per_second": 85.537,
"eval_steps_per_second": 5.554,
"step": 1426
},
{
"epoch": 124.96,
"eval_accuracy": 0.9090909090909091,
"eval_loss": 0.6655176281929016,
"eval_runtime": 1.7664,
"eval_samples_per_second": 87.183,
"eval_steps_per_second": 5.661,
"step": 1437
},
{
"epoch": 125.22,
"learning_rate": 4.1666666666666686e-06,
"loss": 0.0185,
"step": 1440
},
{
"epoch": 126.0,
"eval_accuracy": 0.9155844155844156,
"eval_loss": 0.5161400437355042,
"eval_runtime": 1.7704,
"eval_samples_per_second": 86.986,
"eval_steps_per_second": 5.648,
"step": 1449
},
{
"epoch": 126.96,
"learning_rate": 1.3709349048921951e-06,
"loss": 0.0001,
"step": 1460
},
{
"epoch": 126.96,
"eval_accuracy": 0.9025974025974026,
"eval_loss": 0.4859886169433594,
"eval_runtime": 1.7874,
"eval_samples_per_second": 86.159,
"eval_steps_per_second": 5.595,
"step": 1460
},
{
"epoch": 128.0,
"eval_accuracy": 0.9155844155844156,
"eval_loss": 0.4834165573120117,
"eval_runtime": 1.7944,
"eval_samples_per_second": 85.822,
"eval_steps_per_second": 5.573,
"step": 1472
},
{
"epoch": 128.7,
"learning_rate": 5.634701881714148e-08,
"loss": 0.0047,
"step": 1480
},
{
"epoch": 128.96,
"eval_accuracy": 0.9155844155844156,
"eval_loss": 0.48360273241996765,
"eval_runtime": 1.7574,
"eval_samples_per_second": 87.63,
"eval_steps_per_second": 5.69,
"step": 1483
},
{
"epoch": 130.0,
"eval_accuracy": 0.9155844155844156,
"eval_loss": 0.6164301037788391,
"eval_runtime": 1.7394,
"eval_samples_per_second": 88.537,
"eval_steps_per_second": 5.749,
"step": 1495
},
{
"epoch": 130.43,
"learning_rate": 1.6164105173215904e-05,
"loss": 0.011,
"step": 1500
},
{
"epoch": 130.96,
"eval_accuracy": 0.8961038961038961,
"eval_loss": 0.7818012237548828,
"eval_runtime": 1.7684,
"eval_samples_per_second": 87.085,
"eval_steps_per_second": 5.655,
"step": 1506
},
{
"epoch": 132.0,
"eval_accuracy": 0.8636363636363636,
"eval_loss": 0.8022345900535583,
"eval_runtime": 1.7544,
"eval_samples_per_second": 87.779,
"eval_steps_per_second": 5.7,
"step": 1518
},
{
"epoch": 132.17,
"learning_rate": 1.4052013648906114e-05,
"loss": 0.0023,
"step": 1520
},
{
"epoch": 132.96,
"eval_accuracy": 0.8636363636363636,
"eval_loss": 0.8653693199157715,
"eval_runtime": 1.7604,
"eval_samples_per_second": 87.48,
"eval_steps_per_second": 5.681,
"step": 1529
},
{
"epoch": 133.91,
"learning_rate": 1.0723360272592418e-05,
"loss": 0.0222,
"step": 1540
},
{
"epoch": 134.0,
"eval_accuracy": 0.8896103896103896,
"eval_loss": 0.6707515716552734,
"eval_runtime": 1.7434,
"eval_samples_per_second": 88.334,
"eval_steps_per_second": 5.736,
"step": 1541
},
{
"epoch": 134.96,
"eval_accuracy": 0.935064935064935,
"eval_loss": 0.4996984004974365,
"eval_runtime": 1.7534,
"eval_samples_per_second": 87.83,
"eval_steps_per_second": 5.703,
"step": 1552
},
{
"epoch": 135.65,
"learning_rate": 6.886265186108914e-06,
"loss": 0.0126,
"step": 1560
},
{
"epoch": 136.0,
"eval_accuracy": 0.922077922077922,
"eval_loss": 0.5560286641120911,
"eval_runtime": 1.7314,
"eval_samples_per_second": 88.946,
"eval_steps_per_second": 5.776,
"step": 1564
},
{
"epoch": 136.96,
"eval_accuracy": 0.8961038961038961,
"eval_loss": 0.6162758469581604,
"eval_runtime": 1.7204,
"eval_samples_per_second": 89.515,
"eval_steps_per_second": 5.813,
"step": 1575
},
{
"epoch": 137.39,
"learning_rate": 3.3570117358101172e-06,
"loss": 0.014,
"step": 1580
},
{
"epoch": 138.0,
"eval_accuracy": 0.9025974025974026,
"eval_loss": 0.6488694548606873,
"eval_runtime": 1.7194,
"eval_samples_per_second": 89.567,
"eval_steps_per_second": 5.816,
"step": 1587
},
{
"epoch": 138.96,
"eval_accuracy": 0.8701298701298701,
"eval_loss": 0.684516191482544,
"eval_runtime": 1.7304,
"eval_samples_per_second": 88.997,
"eval_steps_per_second": 5.779,
"step": 1598
},
{
"epoch": 139.13,
"learning_rate": 8.863946639715635e-07,
"loss": 0.0088,
"step": 1600
},
{
"epoch": 140.0,
"eval_accuracy": 0.8766233766233766,
"eval_loss": 0.7022619247436523,
"eval_runtime": 1.7374,
"eval_samples_per_second": 88.639,
"eval_steps_per_second": 5.756,
"step": 1610
},
{
"epoch": 140.87,
"learning_rate": 1.6666666666666667e-05,
"loss": 0.0022,
"step": 1620
},
{
"epoch": 140.96,
"eval_accuracy": 0.8701298701298701,
"eval_loss": 0.762221097946167,
"eval_runtime": 1.6984,
"eval_samples_per_second": 90.675,
"eval_steps_per_second": 5.888,
"step": 1621
},
{
"epoch": 142.0,
"eval_accuracy": 0.8961038961038961,
"eval_loss": 0.6736029982566833,
"eval_runtime": 1.7594,
"eval_samples_per_second": 87.53,
"eval_steps_per_second": 5.684,
"step": 1633
},
{
"epoch": 142.61,
"learning_rate": 1.5780272002695102e-05,
"loss": 0.0017,
"step": 1640
},
{
"epoch": 142.96,
"eval_accuracy": 0.9090909090909091,
"eval_loss": 0.5298991799354553,
"eval_runtime": 1.6954,
"eval_samples_per_second": 90.835,
"eval_steps_per_second": 5.898,
"step": 1644
},
{
"epoch": 143.48,
"eval_accuracy": 0.9025974025974026,
"eval_loss": 0.5584802031517029,
"eval_runtime": 1.6964,
"eval_samples_per_second": 90.782,
"eval_steps_per_second": 5.895,
"step": 1650
},
{
"epoch": 143.48,
"step": 1650,
"total_flos": 8.017005638819359e+18,
"train_loss": 0.028458750352940775,
"train_runtime": 2286.6106,
"train_samples_per_second": 47.297,
"train_steps_per_second": 0.722
}
],
"logging_steps": 20,
"max_steps": 1650,
"num_train_epochs": 150,
"save_steps": 500,
"total_flos": 8.017005638819359e+18,
"trial_name": null,
"trial_params": null
}