DiegoBraz's picture
End of training
fc4005a verified
{
"best_metric": 0.8727272727272727,
"best_model_checkpoint": "swin-tiny-patch4-window7-224-finetuned-eurosat/checkpoint-350",
"epoch": 45.16129032258065,
"eval_steps": 500,
"global_step": 350,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.9032258064516129,
"eval_accuracy": 0.2,
"eval_loss": 2.3726940155029297,
"eval_runtime": 2.5133,
"eval_samples_per_second": 43.767,
"eval_steps_per_second": 1.592,
"step": 7
},
{
"epoch": 1.2903225806451613,
"grad_norm": 6.954860687255859,
"learning_rate": 1.4285714285714285e-05,
"loss": 2.3966,
"step": 10
},
{
"epoch": 1.935483870967742,
"eval_accuracy": 0.3181818181818182,
"eval_loss": 2.2909836769104004,
"eval_runtime": 2.5816,
"eval_samples_per_second": 42.609,
"eval_steps_per_second": 1.549,
"step": 15
},
{
"epoch": 2.5806451612903225,
"grad_norm": 5.919530391693115,
"learning_rate": 2.857142857142857e-05,
"loss": 2.3131,
"step": 20
},
{
"epoch": 2.967741935483871,
"eval_accuracy": 0.4090909090909091,
"eval_loss": 2.1218321323394775,
"eval_runtime": 2.4729,
"eval_samples_per_second": 44.483,
"eval_steps_per_second": 1.618,
"step": 23
},
{
"epoch": 3.870967741935484,
"grad_norm": 8.89976692199707,
"learning_rate": 4.2857142857142856e-05,
"loss": 2.072,
"step": 30
},
{
"epoch": 4.0,
"eval_accuracy": 0.45454545454545453,
"eval_loss": 1.8349428176879883,
"eval_runtime": 2.5693,
"eval_samples_per_second": 42.813,
"eval_steps_per_second": 1.557,
"step": 31
},
{
"epoch": 4.903225806451613,
"eval_accuracy": 0.5363636363636364,
"eval_loss": 1.463451862335205,
"eval_runtime": 2.638,
"eval_samples_per_second": 41.698,
"eval_steps_per_second": 1.516,
"step": 38
},
{
"epoch": 5.161290322580645,
"grad_norm": 12.236054420471191,
"learning_rate": 4.9206349206349204e-05,
"loss": 1.5528,
"step": 40
},
{
"epoch": 5.935483870967742,
"eval_accuracy": 0.6636363636363637,
"eval_loss": 1.103641390800476,
"eval_runtime": 2.4384,
"eval_samples_per_second": 45.111,
"eval_steps_per_second": 1.64,
"step": 46
},
{
"epoch": 6.451612903225806,
"grad_norm": 10.842184066772461,
"learning_rate": 4.761904761904762e-05,
"loss": 1.0472,
"step": 50
},
{
"epoch": 6.967741935483871,
"eval_accuracy": 0.7272727272727273,
"eval_loss": 0.9272710680961609,
"eval_runtime": 3.0352,
"eval_samples_per_second": 36.241,
"eval_steps_per_second": 1.318,
"step": 54
},
{
"epoch": 7.741935483870968,
"grad_norm": 10.389890670776367,
"learning_rate": 4.603174603174603e-05,
"loss": 0.7989,
"step": 60
},
{
"epoch": 8.0,
"eval_accuracy": 0.7909090909090909,
"eval_loss": 0.8008114695549011,
"eval_runtime": 2.4733,
"eval_samples_per_second": 44.475,
"eval_steps_per_second": 1.617,
"step": 62
},
{
"epoch": 8.903225806451612,
"eval_accuracy": 0.7818181818181819,
"eval_loss": 0.7358732223510742,
"eval_runtime": 2.439,
"eval_samples_per_second": 45.101,
"eval_steps_per_second": 1.64,
"step": 69
},
{
"epoch": 9.03225806451613,
"grad_norm": 8.742050170898438,
"learning_rate": 4.4444444444444447e-05,
"loss": 0.604,
"step": 70
},
{
"epoch": 9.935483870967742,
"eval_accuracy": 0.7909090909090909,
"eval_loss": 0.7282811403274536,
"eval_runtime": 3.311,
"eval_samples_per_second": 33.223,
"eval_steps_per_second": 1.208,
"step": 77
},
{
"epoch": 10.32258064516129,
"grad_norm": 11.235774993896484,
"learning_rate": 4.2857142857142856e-05,
"loss": 0.5228,
"step": 80
},
{
"epoch": 10.967741935483872,
"eval_accuracy": 0.8363636363636363,
"eval_loss": 0.5896822214126587,
"eval_runtime": 2.4309,
"eval_samples_per_second": 45.251,
"eval_steps_per_second": 1.646,
"step": 85
},
{
"epoch": 11.612903225806452,
"grad_norm": 10.12472915649414,
"learning_rate": 4.126984126984127e-05,
"loss": 0.4734,
"step": 90
},
{
"epoch": 12.0,
"eval_accuracy": 0.8181818181818182,
"eval_loss": 0.6503061652183533,
"eval_runtime": 2.482,
"eval_samples_per_second": 44.32,
"eval_steps_per_second": 1.612,
"step": 93
},
{
"epoch": 12.903225806451612,
"grad_norm": 5.467583179473877,
"learning_rate": 3.968253968253968e-05,
"loss": 0.3987,
"step": 100
},
{
"epoch": 12.903225806451612,
"eval_accuracy": 0.8272727272727273,
"eval_loss": 0.5785130858421326,
"eval_runtime": 2.5549,
"eval_samples_per_second": 43.054,
"eval_steps_per_second": 1.566,
"step": 100
},
{
"epoch": 13.935483870967742,
"eval_accuracy": 0.8181818181818182,
"eval_loss": 0.6091312766075134,
"eval_runtime": 2.4459,
"eval_samples_per_second": 44.973,
"eval_steps_per_second": 1.635,
"step": 108
},
{
"epoch": 14.193548387096774,
"grad_norm": 8.375397682189941,
"learning_rate": 3.809523809523809e-05,
"loss": 0.3742,
"step": 110
},
{
"epoch": 14.967741935483872,
"eval_accuracy": 0.8454545454545455,
"eval_loss": 0.5278283953666687,
"eval_runtime": 3.0723,
"eval_samples_per_second": 35.804,
"eval_steps_per_second": 1.302,
"step": 116
},
{
"epoch": 15.483870967741936,
"grad_norm": 9.191360473632812,
"learning_rate": 3.650793650793651e-05,
"loss": 0.3588,
"step": 120
},
{
"epoch": 16.0,
"eval_accuracy": 0.8545454545454545,
"eval_loss": 0.5279448628425598,
"eval_runtime": 2.7269,
"eval_samples_per_second": 40.339,
"eval_steps_per_second": 1.467,
"step": 124
},
{
"epoch": 16.774193548387096,
"grad_norm": 10.030477523803711,
"learning_rate": 3.492063492063492e-05,
"loss": 0.3536,
"step": 130
},
{
"epoch": 16.903225806451612,
"eval_accuracy": 0.8363636363636363,
"eval_loss": 0.5189336538314819,
"eval_runtime": 2.4748,
"eval_samples_per_second": 44.447,
"eval_steps_per_second": 1.616,
"step": 131
},
{
"epoch": 17.93548387096774,
"eval_accuracy": 0.8545454545454545,
"eval_loss": 0.5036130547523499,
"eval_runtime": 3.2666,
"eval_samples_per_second": 33.674,
"eval_steps_per_second": 1.225,
"step": 139
},
{
"epoch": 18.06451612903226,
"grad_norm": 5.293987274169922,
"learning_rate": 3.3333333333333335e-05,
"loss": 0.331,
"step": 140
},
{
"epoch": 18.967741935483872,
"eval_accuracy": 0.8363636363636363,
"eval_loss": 0.5327084064483643,
"eval_runtime": 2.49,
"eval_samples_per_second": 44.178,
"eval_steps_per_second": 1.606,
"step": 147
},
{
"epoch": 19.35483870967742,
"grad_norm": 4.783825397491455,
"learning_rate": 3.1746031746031745e-05,
"loss": 0.2836,
"step": 150
},
{
"epoch": 20.0,
"eval_accuracy": 0.8636363636363636,
"eval_loss": 0.47167953848838806,
"eval_runtime": 2.4734,
"eval_samples_per_second": 44.474,
"eval_steps_per_second": 1.617,
"step": 155
},
{
"epoch": 20.64516129032258,
"grad_norm": 6.62587308883667,
"learning_rate": 3.0158730158730158e-05,
"loss": 0.2785,
"step": 160
},
{
"epoch": 20.903225806451612,
"eval_accuracy": 0.8545454545454545,
"eval_loss": 0.4598047435283661,
"eval_runtime": 2.8824,
"eval_samples_per_second": 38.162,
"eval_steps_per_second": 1.388,
"step": 162
},
{
"epoch": 21.93548387096774,
"grad_norm": 8.891733169555664,
"learning_rate": 2.857142857142857e-05,
"loss": 0.2439,
"step": 170
},
{
"epoch": 21.93548387096774,
"eval_accuracy": 0.8545454545454545,
"eval_loss": 0.4782707691192627,
"eval_runtime": 2.4502,
"eval_samples_per_second": 44.894,
"eval_steps_per_second": 1.632,
"step": 170
},
{
"epoch": 22.967741935483872,
"eval_accuracy": 0.8545454545454545,
"eval_loss": 0.4948057234287262,
"eval_runtime": 2.6324,
"eval_samples_per_second": 41.787,
"eval_steps_per_second": 1.52,
"step": 178
},
{
"epoch": 23.225806451612904,
"grad_norm": 8.404036521911621,
"learning_rate": 2.6984126984126984e-05,
"loss": 0.2779,
"step": 180
},
{
"epoch": 24.0,
"eval_accuracy": 0.8454545454545455,
"eval_loss": 0.48835644125938416,
"eval_runtime": 2.483,
"eval_samples_per_second": 44.301,
"eval_steps_per_second": 1.611,
"step": 186
},
{
"epoch": 24.516129032258064,
"grad_norm": 3.865527629852295,
"learning_rate": 2.5396825396825397e-05,
"loss": 0.2167,
"step": 190
},
{
"epoch": 24.903225806451612,
"eval_accuracy": 0.8545454545454545,
"eval_loss": 0.5084207057952881,
"eval_runtime": 2.4768,
"eval_samples_per_second": 44.413,
"eval_steps_per_second": 1.615,
"step": 193
},
{
"epoch": 25.806451612903224,
"grad_norm": 4.576181888580322,
"learning_rate": 2.380952380952381e-05,
"loss": 0.2164,
"step": 200
},
{
"epoch": 25.93548387096774,
"eval_accuracy": 0.8545454545454545,
"eval_loss": 0.471531480550766,
"eval_runtime": 3.2991,
"eval_samples_per_second": 33.342,
"eval_steps_per_second": 1.212,
"step": 201
},
{
"epoch": 26.967741935483872,
"eval_accuracy": 0.8272727272727273,
"eval_loss": 0.5503013730049133,
"eval_runtime": 2.5348,
"eval_samples_per_second": 43.396,
"eval_steps_per_second": 1.578,
"step": 209
},
{
"epoch": 27.096774193548388,
"grad_norm": 9.690849304199219,
"learning_rate": 2.2222222222222223e-05,
"loss": 0.2342,
"step": 210
},
{
"epoch": 28.0,
"eval_accuracy": 0.8272727272727273,
"eval_loss": 0.49801039695739746,
"eval_runtime": 2.4694,
"eval_samples_per_second": 44.545,
"eval_steps_per_second": 1.62,
"step": 217
},
{
"epoch": 28.387096774193548,
"grad_norm": 6.414791107177734,
"learning_rate": 2.0634920634920636e-05,
"loss": 0.216,
"step": 220
},
{
"epoch": 28.903225806451612,
"eval_accuracy": 0.8545454545454545,
"eval_loss": 0.4240585267543793,
"eval_runtime": 2.5761,
"eval_samples_per_second": 42.7,
"eval_steps_per_second": 1.553,
"step": 224
},
{
"epoch": 29.677419354838708,
"grad_norm": 7.521275043487549,
"learning_rate": 1.9047619047619046e-05,
"loss": 0.1986,
"step": 230
},
{
"epoch": 29.93548387096774,
"eval_accuracy": 0.8545454545454545,
"eval_loss": 0.4466007649898529,
"eval_runtime": 2.468,
"eval_samples_per_second": 44.571,
"eval_steps_per_second": 1.621,
"step": 232
},
{
"epoch": 30.967741935483872,
"grad_norm": 4.934516429901123,
"learning_rate": 1.746031746031746e-05,
"loss": 0.1919,
"step": 240
},
{
"epoch": 30.967741935483872,
"eval_accuracy": 0.8636363636363636,
"eval_loss": 0.4557681381702423,
"eval_runtime": 2.9585,
"eval_samples_per_second": 37.181,
"eval_steps_per_second": 1.352,
"step": 240
},
{
"epoch": 32.0,
"eval_accuracy": 0.8636363636363636,
"eval_loss": 0.43898770213127136,
"eval_runtime": 2.4694,
"eval_samples_per_second": 44.544,
"eval_steps_per_second": 1.62,
"step": 248
},
{
"epoch": 32.25806451612903,
"grad_norm": 8.577577590942383,
"learning_rate": 1.5873015873015872e-05,
"loss": 0.1958,
"step": 250
},
{
"epoch": 32.903225806451616,
"eval_accuracy": 0.8545454545454545,
"eval_loss": 0.43786120414733887,
"eval_runtime": 2.4873,
"eval_samples_per_second": 44.224,
"eval_steps_per_second": 1.608,
"step": 255
},
{
"epoch": 33.54838709677419,
"grad_norm": 6.008741855621338,
"learning_rate": 1.4285714285714285e-05,
"loss": 0.1693,
"step": 260
},
{
"epoch": 33.935483870967744,
"eval_accuracy": 0.8454545454545455,
"eval_loss": 0.442380428314209,
"eval_runtime": 3.2712,
"eval_samples_per_second": 33.627,
"eval_steps_per_second": 1.223,
"step": 263
},
{
"epoch": 34.83870967741935,
"grad_norm": 7.021655559539795,
"learning_rate": 1.2698412698412699e-05,
"loss": 0.2158,
"step": 270
},
{
"epoch": 34.96774193548387,
"eval_accuracy": 0.8363636363636363,
"eval_loss": 0.45243895053863525,
"eval_runtime": 2.4917,
"eval_samples_per_second": 44.146,
"eval_steps_per_second": 1.605,
"step": 271
},
{
"epoch": 36.0,
"eval_accuracy": 0.8545454545454545,
"eval_loss": 0.4387998878955841,
"eval_runtime": 2.6925,
"eval_samples_per_second": 40.854,
"eval_steps_per_second": 1.486,
"step": 279
},
{
"epoch": 36.12903225806452,
"grad_norm": 7.29483699798584,
"learning_rate": 1.1111111111111112e-05,
"loss": 0.1578,
"step": 280
},
{
"epoch": 36.903225806451616,
"eval_accuracy": 0.8545454545454545,
"eval_loss": 0.4327390491962433,
"eval_runtime": 2.5946,
"eval_samples_per_second": 42.395,
"eval_steps_per_second": 1.542,
"step": 286
},
{
"epoch": 37.41935483870968,
"grad_norm": 4.809728622436523,
"learning_rate": 9.523809523809523e-06,
"loss": 0.1866,
"step": 290
},
{
"epoch": 37.935483870967744,
"eval_accuracy": 0.8454545454545455,
"eval_loss": 0.4527719020843506,
"eval_runtime": 2.884,
"eval_samples_per_second": 38.141,
"eval_steps_per_second": 1.387,
"step": 294
},
{
"epoch": 38.70967741935484,
"grad_norm": 5.220081329345703,
"learning_rate": 7.936507936507936e-06,
"loss": 0.1664,
"step": 300
},
{
"epoch": 38.96774193548387,
"eval_accuracy": 0.8454545454545455,
"eval_loss": 0.45329469442367554,
"eval_runtime": 3.0675,
"eval_samples_per_second": 35.86,
"eval_steps_per_second": 1.304,
"step": 302
},
{
"epoch": 40.0,
"grad_norm": 6.373405933380127,
"learning_rate": 6.349206349206349e-06,
"loss": 0.1757,
"step": 310
},
{
"epoch": 40.0,
"eval_accuracy": 0.8545454545454545,
"eval_loss": 0.4492085576057434,
"eval_runtime": 2.5073,
"eval_samples_per_second": 43.872,
"eval_steps_per_second": 1.595,
"step": 310
},
{
"epoch": 40.903225806451616,
"eval_accuracy": 0.8636363636363636,
"eval_loss": 0.44178226590156555,
"eval_runtime": 2.4731,
"eval_samples_per_second": 44.479,
"eval_steps_per_second": 1.617,
"step": 317
},
{
"epoch": 41.29032258064516,
"grad_norm": 6.021412372589111,
"learning_rate": 4.7619047619047615e-06,
"loss": 0.1542,
"step": 320
},
{
"epoch": 41.935483870967744,
"eval_accuracy": 0.8636363636363636,
"eval_loss": 0.4412206709384918,
"eval_runtime": 3.2551,
"eval_samples_per_second": 33.793,
"eval_steps_per_second": 1.229,
"step": 325
},
{
"epoch": 42.58064516129032,
"grad_norm": 6.342203617095947,
"learning_rate": 3.1746031746031746e-06,
"loss": 0.144,
"step": 330
},
{
"epoch": 42.96774193548387,
"eval_accuracy": 0.8545454545454545,
"eval_loss": 0.44375744462013245,
"eval_runtime": 2.4771,
"eval_samples_per_second": 44.406,
"eval_steps_per_second": 1.615,
"step": 333
},
{
"epoch": 43.87096774193548,
"grad_norm": 4.18281888961792,
"learning_rate": 1.5873015873015873e-06,
"loss": 0.1647,
"step": 340
},
{
"epoch": 44.0,
"eval_accuracy": 0.8636363636363636,
"eval_loss": 0.441061407327652,
"eval_runtime": 2.4882,
"eval_samples_per_second": 44.209,
"eval_steps_per_second": 1.608,
"step": 341
},
{
"epoch": 44.903225806451616,
"eval_accuracy": 0.8636363636363636,
"eval_loss": 0.43833261728286743,
"eval_runtime": 3.0681,
"eval_samples_per_second": 35.853,
"eval_steps_per_second": 1.304,
"step": 348
},
{
"epoch": 45.16129032258065,
"grad_norm": 5.460458278656006,
"learning_rate": 0.0,
"loss": 0.1418,
"step": 350
},
{
"epoch": 45.16129032258065,
"eval_accuracy": 0.8727272727272727,
"eval_loss": 0.4381871521472931,
"eval_runtime": 2.8317,
"eval_samples_per_second": 38.846,
"eval_steps_per_second": 1.413,
"step": 350
},
{
"epoch": 45.16129032258065,
"step": 350,
"total_flos": 1.1015215348522291e+18,
"train_loss": 0.5093351830754961,
"train_runtime": 1677.4202,
"train_samples_per_second": 29.241,
"train_steps_per_second": 0.209
}
],
"logging_steps": 10,
"max_steps": 350,
"num_input_tokens_seen": 0,
"num_train_epochs": 50,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.1015215348522291e+18,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}