|
{ |
|
"best_metric": 0.8727272727272727, |
|
"best_model_checkpoint": "swin-tiny-patch4-window7-224-finetuned-eurosat/checkpoint-350", |
|
"epoch": 45.16129032258065, |
|
"eval_steps": 500, |
|
"global_step": 350, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.9032258064516129, |
|
"eval_accuracy": 0.2, |
|
"eval_loss": 2.3726940155029297, |
|
"eval_runtime": 2.5133, |
|
"eval_samples_per_second": 43.767, |
|
"eval_steps_per_second": 1.592, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 1.2903225806451613, |
|
"grad_norm": 6.954860687255859, |
|
"learning_rate": 1.4285714285714285e-05, |
|
"loss": 2.3966, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 1.935483870967742, |
|
"eval_accuracy": 0.3181818181818182, |
|
"eval_loss": 2.2909836769104004, |
|
"eval_runtime": 2.5816, |
|
"eval_samples_per_second": 42.609, |
|
"eval_steps_per_second": 1.549, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 2.5806451612903225, |
|
"grad_norm": 5.919530391693115, |
|
"learning_rate": 2.857142857142857e-05, |
|
"loss": 2.3131, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 2.967741935483871, |
|
"eval_accuracy": 0.4090909090909091, |
|
"eval_loss": 2.1218321323394775, |
|
"eval_runtime": 2.4729, |
|
"eval_samples_per_second": 44.483, |
|
"eval_steps_per_second": 1.618, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 3.870967741935484, |
|
"grad_norm": 8.89976692199707, |
|
"learning_rate": 4.2857142857142856e-05, |
|
"loss": 2.072, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.45454545454545453, |
|
"eval_loss": 1.8349428176879883, |
|
"eval_runtime": 2.5693, |
|
"eval_samples_per_second": 42.813, |
|
"eval_steps_per_second": 1.557, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 4.903225806451613, |
|
"eval_accuracy": 0.5363636363636364, |
|
"eval_loss": 1.463451862335205, |
|
"eval_runtime": 2.638, |
|
"eval_samples_per_second": 41.698, |
|
"eval_steps_per_second": 1.516, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 5.161290322580645, |
|
"grad_norm": 12.236054420471191, |
|
"learning_rate": 4.9206349206349204e-05, |
|
"loss": 1.5528, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 5.935483870967742, |
|
"eval_accuracy": 0.6636363636363637, |
|
"eval_loss": 1.103641390800476, |
|
"eval_runtime": 2.4384, |
|
"eval_samples_per_second": 45.111, |
|
"eval_steps_per_second": 1.64, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 6.451612903225806, |
|
"grad_norm": 10.842184066772461, |
|
"learning_rate": 4.761904761904762e-05, |
|
"loss": 1.0472, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 6.967741935483871, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 0.9272710680961609, |
|
"eval_runtime": 3.0352, |
|
"eval_samples_per_second": 36.241, |
|
"eval_steps_per_second": 1.318, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 7.741935483870968, |
|
"grad_norm": 10.389890670776367, |
|
"learning_rate": 4.603174603174603e-05, |
|
"loss": 0.7989, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.7909090909090909, |
|
"eval_loss": 0.8008114695549011, |
|
"eval_runtime": 2.4733, |
|
"eval_samples_per_second": 44.475, |
|
"eval_steps_per_second": 1.617, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 8.903225806451612, |
|
"eval_accuracy": 0.7818181818181819, |
|
"eval_loss": 0.7358732223510742, |
|
"eval_runtime": 2.439, |
|
"eval_samples_per_second": 45.101, |
|
"eval_steps_per_second": 1.64, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 9.03225806451613, |
|
"grad_norm": 8.742050170898438, |
|
"learning_rate": 4.4444444444444447e-05, |
|
"loss": 0.604, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 9.935483870967742, |
|
"eval_accuracy": 0.7909090909090909, |
|
"eval_loss": 0.7282811403274536, |
|
"eval_runtime": 3.311, |
|
"eval_samples_per_second": 33.223, |
|
"eval_steps_per_second": 1.208, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 10.32258064516129, |
|
"grad_norm": 11.235774993896484, |
|
"learning_rate": 4.2857142857142856e-05, |
|
"loss": 0.5228, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 10.967741935483872, |
|
"eval_accuracy": 0.8363636363636363, |
|
"eval_loss": 0.5896822214126587, |
|
"eval_runtime": 2.4309, |
|
"eval_samples_per_second": 45.251, |
|
"eval_steps_per_second": 1.646, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 11.612903225806452, |
|
"grad_norm": 10.12472915649414, |
|
"learning_rate": 4.126984126984127e-05, |
|
"loss": 0.4734, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8181818181818182, |
|
"eval_loss": 0.6503061652183533, |
|
"eval_runtime": 2.482, |
|
"eval_samples_per_second": 44.32, |
|
"eval_steps_per_second": 1.612, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 12.903225806451612, |
|
"grad_norm": 5.467583179473877, |
|
"learning_rate": 3.968253968253968e-05, |
|
"loss": 0.3987, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 12.903225806451612, |
|
"eval_accuracy": 0.8272727272727273, |
|
"eval_loss": 0.5785130858421326, |
|
"eval_runtime": 2.5549, |
|
"eval_samples_per_second": 43.054, |
|
"eval_steps_per_second": 1.566, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 13.935483870967742, |
|
"eval_accuracy": 0.8181818181818182, |
|
"eval_loss": 0.6091312766075134, |
|
"eval_runtime": 2.4459, |
|
"eval_samples_per_second": 44.973, |
|
"eval_steps_per_second": 1.635, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 14.193548387096774, |
|
"grad_norm": 8.375397682189941, |
|
"learning_rate": 3.809523809523809e-05, |
|
"loss": 0.3742, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 14.967741935483872, |
|
"eval_accuracy": 0.8454545454545455, |
|
"eval_loss": 0.5278283953666687, |
|
"eval_runtime": 3.0723, |
|
"eval_samples_per_second": 35.804, |
|
"eval_steps_per_second": 1.302, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 15.483870967741936, |
|
"grad_norm": 9.191360473632812, |
|
"learning_rate": 3.650793650793651e-05, |
|
"loss": 0.3588, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.8545454545454545, |
|
"eval_loss": 0.5279448628425598, |
|
"eval_runtime": 2.7269, |
|
"eval_samples_per_second": 40.339, |
|
"eval_steps_per_second": 1.467, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 16.774193548387096, |
|
"grad_norm": 10.030477523803711, |
|
"learning_rate": 3.492063492063492e-05, |
|
"loss": 0.3536, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 16.903225806451612, |
|
"eval_accuracy": 0.8363636363636363, |
|
"eval_loss": 0.5189336538314819, |
|
"eval_runtime": 2.4748, |
|
"eval_samples_per_second": 44.447, |
|
"eval_steps_per_second": 1.616, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 17.93548387096774, |
|
"eval_accuracy": 0.8545454545454545, |
|
"eval_loss": 0.5036130547523499, |
|
"eval_runtime": 3.2666, |
|
"eval_samples_per_second": 33.674, |
|
"eval_steps_per_second": 1.225, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 18.06451612903226, |
|
"grad_norm": 5.293987274169922, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 0.331, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 18.967741935483872, |
|
"eval_accuracy": 0.8363636363636363, |
|
"eval_loss": 0.5327084064483643, |
|
"eval_runtime": 2.49, |
|
"eval_samples_per_second": 44.178, |
|
"eval_steps_per_second": 1.606, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 19.35483870967742, |
|
"grad_norm": 4.783825397491455, |
|
"learning_rate": 3.1746031746031745e-05, |
|
"loss": 0.2836, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.8636363636363636, |
|
"eval_loss": 0.47167953848838806, |
|
"eval_runtime": 2.4734, |
|
"eval_samples_per_second": 44.474, |
|
"eval_steps_per_second": 1.617, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 20.64516129032258, |
|
"grad_norm": 6.62587308883667, |
|
"learning_rate": 3.0158730158730158e-05, |
|
"loss": 0.2785, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 20.903225806451612, |
|
"eval_accuracy": 0.8545454545454545, |
|
"eval_loss": 0.4598047435283661, |
|
"eval_runtime": 2.8824, |
|
"eval_samples_per_second": 38.162, |
|
"eval_steps_per_second": 1.388, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 21.93548387096774, |
|
"grad_norm": 8.891733169555664, |
|
"learning_rate": 2.857142857142857e-05, |
|
"loss": 0.2439, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 21.93548387096774, |
|
"eval_accuracy": 0.8545454545454545, |
|
"eval_loss": 0.4782707691192627, |
|
"eval_runtime": 2.4502, |
|
"eval_samples_per_second": 44.894, |
|
"eval_steps_per_second": 1.632, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 22.967741935483872, |
|
"eval_accuracy": 0.8545454545454545, |
|
"eval_loss": 0.4948057234287262, |
|
"eval_runtime": 2.6324, |
|
"eval_samples_per_second": 41.787, |
|
"eval_steps_per_second": 1.52, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 23.225806451612904, |
|
"grad_norm": 8.404036521911621, |
|
"learning_rate": 2.6984126984126984e-05, |
|
"loss": 0.2779, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.8454545454545455, |
|
"eval_loss": 0.48835644125938416, |
|
"eval_runtime": 2.483, |
|
"eval_samples_per_second": 44.301, |
|
"eval_steps_per_second": 1.611, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 24.516129032258064, |
|
"grad_norm": 3.865527629852295, |
|
"learning_rate": 2.5396825396825397e-05, |
|
"loss": 0.2167, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 24.903225806451612, |
|
"eval_accuracy": 0.8545454545454545, |
|
"eval_loss": 0.5084207057952881, |
|
"eval_runtime": 2.4768, |
|
"eval_samples_per_second": 44.413, |
|
"eval_steps_per_second": 1.615, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 25.806451612903224, |
|
"grad_norm": 4.576181888580322, |
|
"learning_rate": 2.380952380952381e-05, |
|
"loss": 0.2164, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 25.93548387096774, |
|
"eval_accuracy": 0.8545454545454545, |
|
"eval_loss": 0.471531480550766, |
|
"eval_runtime": 3.2991, |
|
"eval_samples_per_second": 33.342, |
|
"eval_steps_per_second": 1.212, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 26.967741935483872, |
|
"eval_accuracy": 0.8272727272727273, |
|
"eval_loss": 0.5503013730049133, |
|
"eval_runtime": 2.5348, |
|
"eval_samples_per_second": 43.396, |
|
"eval_steps_per_second": 1.578, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 27.096774193548388, |
|
"grad_norm": 9.690849304199219, |
|
"learning_rate": 2.2222222222222223e-05, |
|
"loss": 0.2342, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.8272727272727273, |
|
"eval_loss": 0.49801039695739746, |
|
"eval_runtime": 2.4694, |
|
"eval_samples_per_second": 44.545, |
|
"eval_steps_per_second": 1.62, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 28.387096774193548, |
|
"grad_norm": 6.414791107177734, |
|
"learning_rate": 2.0634920634920636e-05, |
|
"loss": 0.216, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 28.903225806451612, |
|
"eval_accuracy": 0.8545454545454545, |
|
"eval_loss": 0.4240585267543793, |
|
"eval_runtime": 2.5761, |
|
"eval_samples_per_second": 42.7, |
|
"eval_steps_per_second": 1.553, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 29.677419354838708, |
|
"grad_norm": 7.521275043487549, |
|
"learning_rate": 1.9047619047619046e-05, |
|
"loss": 0.1986, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 29.93548387096774, |
|
"eval_accuracy": 0.8545454545454545, |
|
"eval_loss": 0.4466007649898529, |
|
"eval_runtime": 2.468, |
|
"eval_samples_per_second": 44.571, |
|
"eval_steps_per_second": 1.621, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 30.967741935483872, |
|
"grad_norm": 4.934516429901123, |
|
"learning_rate": 1.746031746031746e-05, |
|
"loss": 0.1919, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 30.967741935483872, |
|
"eval_accuracy": 0.8636363636363636, |
|
"eval_loss": 0.4557681381702423, |
|
"eval_runtime": 2.9585, |
|
"eval_samples_per_second": 37.181, |
|
"eval_steps_per_second": 1.352, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.8636363636363636, |
|
"eval_loss": 0.43898770213127136, |
|
"eval_runtime": 2.4694, |
|
"eval_samples_per_second": 44.544, |
|
"eval_steps_per_second": 1.62, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 32.25806451612903, |
|
"grad_norm": 8.577577590942383, |
|
"learning_rate": 1.5873015873015872e-05, |
|
"loss": 0.1958, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 32.903225806451616, |
|
"eval_accuracy": 0.8545454545454545, |
|
"eval_loss": 0.43786120414733887, |
|
"eval_runtime": 2.4873, |
|
"eval_samples_per_second": 44.224, |
|
"eval_steps_per_second": 1.608, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 33.54838709677419, |
|
"grad_norm": 6.008741855621338, |
|
"learning_rate": 1.4285714285714285e-05, |
|
"loss": 0.1693, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 33.935483870967744, |
|
"eval_accuracy": 0.8454545454545455, |
|
"eval_loss": 0.442380428314209, |
|
"eval_runtime": 3.2712, |
|
"eval_samples_per_second": 33.627, |
|
"eval_steps_per_second": 1.223, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 34.83870967741935, |
|
"grad_norm": 7.021655559539795, |
|
"learning_rate": 1.2698412698412699e-05, |
|
"loss": 0.2158, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 34.96774193548387, |
|
"eval_accuracy": 0.8363636363636363, |
|
"eval_loss": 0.45243895053863525, |
|
"eval_runtime": 2.4917, |
|
"eval_samples_per_second": 44.146, |
|
"eval_steps_per_second": 1.605, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.8545454545454545, |
|
"eval_loss": 0.4387998878955841, |
|
"eval_runtime": 2.6925, |
|
"eval_samples_per_second": 40.854, |
|
"eval_steps_per_second": 1.486, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 36.12903225806452, |
|
"grad_norm": 7.29483699798584, |
|
"learning_rate": 1.1111111111111112e-05, |
|
"loss": 0.1578, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 36.903225806451616, |
|
"eval_accuracy": 0.8545454545454545, |
|
"eval_loss": 0.4327390491962433, |
|
"eval_runtime": 2.5946, |
|
"eval_samples_per_second": 42.395, |
|
"eval_steps_per_second": 1.542, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 37.41935483870968, |
|
"grad_norm": 4.809728622436523, |
|
"learning_rate": 9.523809523809523e-06, |
|
"loss": 0.1866, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 37.935483870967744, |
|
"eval_accuracy": 0.8454545454545455, |
|
"eval_loss": 0.4527719020843506, |
|
"eval_runtime": 2.884, |
|
"eval_samples_per_second": 38.141, |
|
"eval_steps_per_second": 1.387, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 38.70967741935484, |
|
"grad_norm": 5.220081329345703, |
|
"learning_rate": 7.936507936507936e-06, |
|
"loss": 0.1664, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 38.96774193548387, |
|
"eval_accuracy": 0.8454545454545455, |
|
"eval_loss": 0.45329469442367554, |
|
"eval_runtime": 3.0675, |
|
"eval_samples_per_second": 35.86, |
|
"eval_steps_per_second": 1.304, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"grad_norm": 6.373405933380127, |
|
"learning_rate": 6.349206349206349e-06, |
|
"loss": 0.1757, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.8545454545454545, |
|
"eval_loss": 0.4492085576057434, |
|
"eval_runtime": 2.5073, |
|
"eval_samples_per_second": 43.872, |
|
"eval_steps_per_second": 1.595, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 40.903225806451616, |
|
"eval_accuracy": 0.8636363636363636, |
|
"eval_loss": 0.44178226590156555, |
|
"eval_runtime": 2.4731, |
|
"eval_samples_per_second": 44.479, |
|
"eval_steps_per_second": 1.617, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 41.29032258064516, |
|
"grad_norm": 6.021412372589111, |
|
"learning_rate": 4.7619047619047615e-06, |
|
"loss": 0.1542, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 41.935483870967744, |
|
"eval_accuracy": 0.8636363636363636, |
|
"eval_loss": 0.4412206709384918, |
|
"eval_runtime": 3.2551, |
|
"eval_samples_per_second": 33.793, |
|
"eval_steps_per_second": 1.229, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 42.58064516129032, |
|
"grad_norm": 6.342203617095947, |
|
"learning_rate": 3.1746031746031746e-06, |
|
"loss": 0.144, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 42.96774193548387, |
|
"eval_accuracy": 0.8545454545454545, |
|
"eval_loss": 0.44375744462013245, |
|
"eval_runtime": 2.4771, |
|
"eval_samples_per_second": 44.406, |
|
"eval_steps_per_second": 1.615, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 43.87096774193548, |
|
"grad_norm": 4.18281888961792, |
|
"learning_rate": 1.5873015873015873e-06, |
|
"loss": 0.1647, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.8636363636363636, |
|
"eval_loss": 0.441061407327652, |
|
"eval_runtime": 2.4882, |
|
"eval_samples_per_second": 44.209, |
|
"eval_steps_per_second": 1.608, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 44.903225806451616, |
|
"eval_accuracy": 0.8636363636363636, |
|
"eval_loss": 0.43833261728286743, |
|
"eval_runtime": 3.0681, |
|
"eval_samples_per_second": 35.853, |
|
"eval_steps_per_second": 1.304, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 45.16129032258065, |
|
"grad_norm": 5.460458278656006, |
|
"learning_rate": 0.0, |
|
"loss": 0.1418, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 45.16129032258065, |
|
"eval_accuracy": 0.8727272727272727, |
|
"eval_loss": 0.4381871521472931, |
|
"eval_runtime": 2.8317, |
|
"eval_samples_per_second": 38.846, |
|
"eval_steps_per_second": 1.413, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 45.16129032258065, |
|
"step": 350, |
|
"total_flos": 1.1015215348522291e+18, |
|
"train_loss": 0.5093351830754961, |
|
"train_runtime": 1677.4202, |
|
"train_samples_per_second": 29.241, |
|
"train_steps_per_second": 0.209 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 350, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 50, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.1015215348522291e+18, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|