|
{ |
|
"best_metric": 0.8732394366197183, |
|
"best_model_checkpoint": "deit-base-distilled-patch16-224-65-fold3/checkpoint-276", |
|
"epoch": 92.3076923076923, |
|
"eval_steps": 500, |
|
"global_step": 300, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.9230769230769231, |
|
"eval_accuracy": 0.5492957746478874, |
|
"eval_loss": 0.7402985692024231, |
|
"eval_runtime": 0.9976, |
|
"eval_samples_per_second": 71.17, |
|
"eval_steps_per_second": 3.007, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 1.8461538461538463, |
|
"eval_accuracy": 0.5211267605633803, |
|
"eval_loss": 0.7199354767799377, |
|
"eval_runtime": 0.9611, |
|
"eval_samples_per_second": 73.873, |
|
"eval_steps_per_second": 3.121, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 2.769230769230769, |
|
"eval_accuracy": 0.5633802816901409, |
|
"eval_loss": 0.7111433148384094, |
|
"eval_runtime": 0.9671, |
|
"eval_samples_per_second": 73.414, |
|
"eval_steps_per_second": 3.102, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 3.076923076923077, |
|
"grad_norm": 7.598382949829102, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 0.7693, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.5352112676056338, |
|
"eval_loss": 0.7014583945274353, |
|
"eval_runtime": 0.9614, |
|
"eval_samples_per_second": 73.854, |
|
"eval_steps_per_second": 3.121, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 4.923076923076923, |
|
"eval_accuracy": 0.6197183098591549, |
|
"eval_loss": 0.6470862627029419, |
|
"eval_runtime": 0.965, |
|
"eval_samples_per_second": 73.577, |
|
"eval_steps_per_second": 3.109, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 5.846153846153846, |
|
"eval_accuracy": 0.6056338028169014, |
|
"eval_loss": 0.6691372394561768, |
|
"eval_runtime": 0.966, |
|
"eval_samples_per_second": 73.497, |
|
"eval_steps_per_second": 3.105, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 6.153846153846154, |
|
"grad_norm": 1.975386381149292, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 0.6542, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 6.769230769230769, |
|
"eval_accuracy": 0.6197183098591549, |
|
"eval_loss": 0.6188070774078369, |
|
"eval_runtime": 0.9657, |
|
"eval_samples_per_second": 73.52, |
|
"eval_steps_per_second": 3.106, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.5774647887323944, |
|
"eval_loss": 0.69666588306427, |
|
"eval_runtime": 0.9856, |
|
"eval_samples_per_second": 72.037, |
|
"eval_steps_per_second": 3.044, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 8.923076923076923, |
|
"eval_accuracy": 0.7323943661971831, |
|
"eval_loss": 0.5731548070907593, |
|
"eval_runtime": 0.9661, |
|
"eval_samples_per_second": 73.491, |
|
"eval_steps_per_second": 3.105, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 9.23076923076923, |
|
"grad_norm": 5.027002811431885, |
|
"learning_rate": 5e-05, |
|
"loss": 0.5935, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 9.846153846153847, |
|
"eval_accuracy": 0.704225352112676, |
|
"eval_loss": 0.5184200406074524, |
|
"eval_runtime": 0.9721, |
|
"eval_samples_per_second": 73.035, |
|
"eval_steps_per_second": 3.086, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 10.76923076923077, |
|
"eval_accuracy": 0.7183098591549296, |
|
"eval_loss": 0.6031274795532227, |
|
"eval_runtime": 0.976, |
|
"eval_samples_per_second": 72.75, |
|
"eval_steps_per_second": 3.074, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.647887323943662, |
|
"eval_loss": 0.6670598983764648, |
|
"eval_runtime": 0.9859, |
|
"eval_samples_per_second": 72.016, |
|
"eval_steps_per_second": 3.043, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 12.307692307692308, |
|
"grad_norm": 7.941720008850098, |
|
"learning_rate": 4.814814814814815e-05, |
|
"loss": 0.549, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 12.923076923076923, |
|
"eval_accuracy": 0.7183098591549296, |
|
"eval_loss": 0.5281336903572083, |
|
"eval_runtime": 1.0091, |
|
"eval_samples_per_second": 70.358, |
|
"eval_steps_per_second": 2.973, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 13.846153846153847, |
|
"eval_accuracy": 0.7183098591549296, |
|
"eval_loss": 0.5792147517204285, |
|
"eval_runtime": 1.0732, |
|
"eval_samples_per_second": 66.158, |
|
"eval_steps_per_second": 2.795, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 14.76923076923077, |
|
"eval_accuracy": 0.7464788732394366, |
|
"eval_loss": 0.5389305353164673, |
|
"eval_runtime": 1.015, |
|
"eval_samples_per_second": 69.952, |
|
"eval_steps_per_second": 2.956, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 15.384615384615385, |
|
"grad_norm": 3.035529613494873, |
|
"learning_rate": 4.62962962962963e-05, |
|
"loss": 0.4778, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.704225352112676, |
|
"eval_loss": 0.600963830947876, |
|
"eval_runtime": 0.9862, |
|
"eval_samples_per_second": 71.992, |
|
"eval_steps_per_second": 3.042, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 16.923076923076923, |
|
"eval_accuracy": 0.7605633802816901, |
|
"eval_loss": 0.5244532227516174, |
|
"eval_runtime": 0.9743, |
|
"eval_samples_per_second": 72.876, |
|
"eval_steps_per_second": 3.079, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 17.846153846153847, |
|
"eval_accuracy": 0.7183098591549296, |
|
"eval_loss": 0.5491234064102173, |
|
"eval_runtime": 0.997, |
|
"eval_samples_per_second": 71.212, |
|
"eval_steps_per_second": 3.009, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 18.46153846153846, |
|
"grad_norm": 2.951366662979126, |
|
"learning_rate": 4.4444444444444447e-05, |
|
"loss": 0.4039, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 18.76923076923077, |
|
"eval_accuracy": 0.7464788732394366, |
|
"eval_loss": 0.55897057056427, |
|
"eval_runtime": 0.9926, |
|
"eval_samples_per_second": 71.527, |
|
"eval_steps_per_second": 3.022, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.7323943661971831, |
|
"eval_loss": 0.4886135458946228, |
|
"eval_runtime": 0.9968, |
|
"eval_samples_per_second": 71.225, |
|
"eval_steps_per_second": 3.01, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 20.923076923076923, |
|
"eval_accuracy": 0.7323943661971831, |
|
"eval_loss": 0.5049741268157959, |
|
"eval_runtime": 0.9885, |
|
"eval_samples_per_second": 71.822, |
|
"eval_steps_per_second": 3.035, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 21.53846153846154, |
|
"grad_norm": 2.950965404510498, |
|
"learning_rate": 4.259259259259259e-05, |
|
"loss": 0.3409, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 21.846153846153847, |
|
"eval_accuracy": 0.7464788732394366, |
|
"eval_loss": 0.49122846126556396, |
|
"eval_runtime": 0.9814, |
|
"eval_samples_per_second": 72.349, |
|
"eval_steps_per_second": 3.057, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 22.76923076923077, |
|
"eval_accuracy": 0.7746478873239436, |
|
"eval_loss": 0.492927223443985, |
|
"eval_runtime": 0.9891, |
|
"eval_samples_per_second": 71.783, |
|
"eval_steps_per_second": 3.033, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.7746478873239436, |
|
"eval_loss": 0.5365388989448547, |
|
"eval_runtime": 0.9902, |
|
"eval_samples_per_second": 71.704, |
|
"eval_steps_per_second": 3.03, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 24.615384615384617, |
|
"grad_norm": 8.019288063049316, |
|
"learning_rate": 4.074074074074074e-05, |
|
"loss": 0.3202, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 24.923076923076923, |
|
"eval_accuracy": 0.8028169014084507, |
|
"eval_loss": 0.4685072600841522, |
|
"eval_runtime": 0.983, |
|
"eval_samples_per_second": 72.224, |
|
"eval_steps_per_second": 3.052, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 25.846153846153847, |
|
"eval_accuracy": 0.8169014084507042, |
|
"eval_loss": 0.44036272168159485, |
|
"eval_runtime": 0.9931, |
|
"eval_samples_per_second": 71.493, |
|
"eval_steps_per_second": 3.021, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 26.76923076923077, |
|
"eval_accuracy": 0.8028169014084507, |
|
"eval_loss": 0.463866263628006, |
|
"eval_runtime": 0.9949, |
|
"eval_samples_per_second": 71.364, |
|
"eval_steps_per_second": 3.015, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 27.692307692307693, |
|
"grad_norm": 2.1704952716827393, |
|
"learning_rate": 3.888888888888889e-05, |
|
"loss": 0.2466, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.7605633802816901, |
|
"eval_loss": 0.5491413474082947, |
|
"eval_runtime": 0.9936, |
|
"eval_samples_per_second": 71.461, |
|
"eval_steps_per_second": 3.019, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 28.923076923076923, |
|
"eval_accuracy": 0.7605633802816901, |
|
"eval_loss": 0.5169638991355896, |
|
"eval_runtime": 1.0004, |
|
"eval_samples_per_second": 70.97, |
|
"eval_steps_per_second": 2.999, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 29.846153846153847, |
|
"eval_accuracy": 0.8028169014084507, |
|
"eval_loss": 0.4444153606891632, |
|
"eval_runtime": 0.9937, |
|
"eval_samples_per_second": 71.452, |
|
"eval_steps_per_second": 3.019, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 30.76923076923077, |
|
"grad_norm": 3.7423977851867676, |
|
"learning_rate": 3.7037037037037037e-05, |
|
"loss": 0.2433, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 30.76923076923077, |
|
"eval_accuracy": 0.8309859154929577, |
|
"eval_loss": 0.4516857862472534, |
|
"eval_runtime": 0.9934, |
|
"eval_samples_per_second": 71.473, |
|
"eval_steps_per_second": 3.02, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.7605633802816901, |
|
"eval_loss": 0.7796855568885803, |
|
"eval_runtime": 0.9972, |
|
"eval_samples_per_second": 71.202, |
|
"eval_steps_per_second": 3.009, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 32.92307692307692, |
|
"eval_accuracy": 0.8169014084507042, |
|
"eval_loss": 0.43210193514823914, |
|
"eval_runtime": 0.9973, |
|
"eval_samples_per_second": 71.192, |
|
"eval_steps_per_second": 3.008, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 33.84615384615385, |
|
"grad_norm": 4.042857646942139, |
|
"learning_rate": 3.518518518518519e-05, |
|
"loss": 0.2535, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 33.84615384615385, |
|
"eval_accuracy": 0.7746478873239436, |
|
"eval_loss": 0.5956353545188904, |
|
"eval_runtime": 0.9936, |
|
"eval_samples_per_second": 71.457, |
|
"eval_steps_per_second": 3.019, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 34.76923076923077, |
|
"eval_accuracy": 0.7887323943661971, |
|
"eval_loss": 0.46952158212661743, |
|
"eval_runtime": 1.0048, |
|
"eval_samples_per_second": 70.661, |
|
"eval_steps_per_second": 2.986, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.6901408450704225, |
|
"eval_loss": 0.8105931878089905, |
|
"eval_runtime": 0.9983, |
|
"eval_samples_per_second": 71.118, |
|
"eval_steps_per_second": 3.005, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 36.92307692307692, |
|
"grad_norm": 3.739529609680176, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 0.2215, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 36.92307692307692, |
|
"eval_accuracy": 0.7464788732394366, |
|
"eval_loss": 0.7118771076202393, |
|
"eval_runtime": 0.9979, |
|
"eval_samples_per_second": 71.147, |
|
"eval_steps_per_second": 3.006, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 37.84615384615385, |
|
"eval_accuracy": 0.8028169014084507, |
|
"eval_loss": 0.4752153158187866, |
|
"eval_runtime": 0.9912, |
|
"eval_samples_per_second": 71.631, |
|
"eval_steps_per_second": 3.027, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 38.76923076923077, |
|
"eval_accuracy": 0.8169014084507042, |
|
"eval_loss": 0.4784373342990875, |
|
"eval_runtime": 0.9957, |
|
"eval_samples_per_second": 71.31, |
|
"eval_steps_per_second": 3.013, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"grad_norm": 3.5269320011138916, |
|
"learning_rate": 3.148148148148148e-05, |
|
"loss": 0.2143, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.8028169014084507, |
|
"eval_loss": 0.4772735834121704, |
|
"eval_runtime": 1.0003, |
|
"eval_samples_per_second": 70.981, |
|
"eval_steps_per_second": 2.999, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 40.92307692307692, |
|
"eval_accuracy": 0.8169014084507042, |
|
"eval_loss": 0.5580780506134033, |
|
"eval_runtime": 0.9949, |
|
"eval_samples_per_second": 71.366, |
|
"eval_steps_per_second": 3.015, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 41.84615384615385, |
|
"eval_accuracy": 0.8028169014084507, |
|
"eval_loss": 0.6098394989967346, |
|
"eval_runtime": 0.9964, |
|
"eval_samples_per_second": 71.254, |
|
"eval_steps_per_second": 3.011, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 42.76923076923077, |
|
"eval_accuracy": 0.8169014084507042, |
|
"eval_loss": 0.5193454623222351, |
|
"eval_runtime": 0.9881, |
|
"eval_samples_per_second": 71.858, |
|
"eval_steps_per_second": 3.036, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 43.07692307692308, |
|
"grad_norm": 2.791985273361206, |
|
"learning_rate": 2.962962962962963e-05, |
|
"loss": 0.1726, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.8450704225352113, |
|
"eval_loss": 0.4306134879589081, |
|
"eval_runtime": 0.9976, |
|
"eval_samples_per_second": 71.173, |
|
"eval_steps_per_second": 3.007, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 44.92307692307692, |
|
"eval_accuracy": 0.8591549295774648, |
|
"eval_loss": 0.4233701527118683, |
|
"eval_runtime": 0.9983, |
|
"eval_samples_per_second": 71.12, |
|
"eval_steps_per_second": 3.005, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 45.84615384615385, |
|
"eval_accuracy": 0.8169014084507042, |
|
"eval_loss": 0.5263967514038086, |
|
"eval_runtime": 0.9942, |
|
"eval_samples_per_second": 71.411, |
|
"eval_steps_per_second": 3.017, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 46.15384615384615, |
|
"grad_norm": 3.0404417514801025, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 0.1684, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 46.76923076923077, |
|
"eval_accuracy": 0.8028169014084507, |
|
"eval_loss": 0.7302807569503784, |
|
"eval_runtime": 0.9944, |
|
"eval_samples_per_second": 71.403, |
|
"eval_steps_per_second": 3.017, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.8169014084507042, |
|
"eval_loss": 0.5079183578491211, |
|
"eval_runtime": 0.993, |
|
"eval_samples_per_second": 71.499, |
|
"eval_steps_per_second": 3.021, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 48.92307692307692, |
|
"eval_accuracy": 0.8169014084507042, |
|
"eval_loss": 0.5391997694969177, |
|
"eval_runtime": 0.9941, |
|
"eval_samples_per_second": 71.421, |
|
"eval_steps_per_second": 3.018, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 49.23076923076923, |
|
"grad_norm": 5.0057854652404785, |
|
"learning_rate": 2.5925925925925925e-05, |
|
"loss": 0.1604, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 49.84615384615385, |
|
"eval_accuracy": 0.8169014084507042, |
|
"eval_loss": 0.3951334059238434, |
|
"eval_runtime": 0.9969, |
|
"eval_samples_per_second": 71.222, |
|
"eval_steps_per_second": 3.009, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 50.76923076923077, |
|
"eval_accuracy": 0.8028169014084507, |
|
"eval_loss": 0.43108686804771423, |
|
"eval_runtime": 1.0007, |
|
"eval_samples_per_second": 70.95, |
|
"eval_steps_per_second": 2.998, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.8028169014084507, |
|
"eval_loss": 0.40819185972213745, |
|
"eval_runtime": 1.0027, |
|
"eval_samples_per_second": 70.81, |
|
"eval_steps_per_second": 2.992, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 52.30769230769231, |
|
"grad_norm": 2.250169515609741, |
|
"learning_rate": 2.4074074074074074e-05, |
|
"loss": 0.1457, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 52.92307692307692, |
|
"eval_accuracy": 0.7887323943661971, |
|
"eval_loss": 0.4173259139060974, |
|
"eval_runtime": 1.0625, |
|
"eval_samples_per_second": 66.825, |
|
"eval_steps_per_second": 2.824, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 53.84615384615385, |
|
"eval_accuracy": 0.8309859154929577, |
|
"eval_loss": 0.4310729205608368, |
|
"eval_runtime": 1.0019, |
|
"eval_samples_per_second": 70.864, |
|
"eval_steps_per_second": 2.994, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 54.76923076923077, |
|
"eval_accuracy": 0.8028169014084507, |
|
"eval_loss": 0.42128264904022217, |
|
"eval_runtime": 1.0021, |
|
"eval_samples_per_second": 70.851, |
|
"eval_steps_per_second": 2.994, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 55.38461538461539, |
|
"grad_norm": 3.804135799407959, |
|
"learning_rate": 2.2222222222222223e-05, |
|
"loss": 0.1549, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.8450704225352113, |
|
"eval_loss": 0.4712894558906555, |
|
"eval_runtime": 0.9989, |
|
"eval_samples_per_second": 71.076, |
|
"eval_steps_per_second": 3.003, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 56.92307692307692, |
|
"eval_accuracy": 0.8028169014084507, |
|
"eval_loss": 0.749286949634552, |
|
"eval_runtime": 0.9921, |
|
"eval_samples_per_second": 71.569, |
|
"eval_steps_per_second": 3.024, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 57.84615384615385, |
|
"eval_accuracy": 0.8450704225352113, |
|
"eval_loss": 0.5160987973213196, |
|
"eval_runtime": 0.9878, |
|
"eval_samples_per_second": 71.876, |
|
"eval_steps_per_second": 3.037, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 58.46153846153846, |
|
"grad_norm": 2.6206445693969727, |
|
"learning_rate": 2.037037037037037e-05, |
|
"loss": 0.1391, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 58.76923076923077, |
|
"eval_accuracy": 0.8169014084507042, |
|
"eval_loss": 0.46853163838386536, |
|
"eval_runtime": 0.9872, |
|
"eval_samples_per_second": 71.917, |
|
"eval_steps_per_second": 3.039, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.8028169014084507, |
|
"eval_loss": 0.6968369483947754, |
|
"eval_runtime": 0.9894, |
|
"eval_samples_per_second": 71.763, |
|
"eval_steps_per_second": 3.032, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 60.92307692307692, |
|
"eval_accuracy": 0.8309859154929577, |
|
"eval_loss": 0.5836513638496399, |
|
"eval_runtime": 0.9857, |
|
"eval_samples_per_second": 72.029, |
|
"eval_steps_per_second": 3.043, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 61.53846153846154, |
|
"grad_norm": 1.732333779335022, |
|
"learning_rate": 1.8518518518518518e-05, |
|
"loss": 0.1272, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 61.84615384615385, |
|
"eval_accuracy": 0.8169014084507042, |
|
"eval_loss": 0.5863271951675415, |
|
"eval_runtime": 0.9938, |
|
"eval_samples_per_second": 71.442, |
|
"eval_steps_per_second": 3.019, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 62.76923076923077, |
|
"eval_accuracy": 0.8309859154929577, |
|
"eval_loss": 0.5459548830986023, |
|
"eval_runtime": 0.9852, |
|
"eval_samples_per_second": 72.065, |
|
"eval_steps_per_second": 3.045, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_accuracy": 0.8309859154929577, |
|
"eval_loss": 0.6198180317878723, |
|
"eval_runtime": 0.9942, |
|
"eval_samples_per_second": 71.415, |
|
"eval_steps_per_second": 3.018, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 64.61538461538461, |
|
"grad_norm": 3.675976514816284, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 0.1341, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 64.92307692307692, |
|
"eval_accuracy": 0.8591549295774648, |
|
"eval_loss": 0.558370053768158, |
|
"eval_runtime": 1.01, |
|
"eval_samples_per_second": 70.294, |
|
"eval_steps_per_second": 2.97, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 65.84615384615384, |
|
"eval_accuracy": 0.8450704225352113, |
|
"eval_loss": 0.6429418325424194, |
|
"eval_runtime": 0.992, |
|
"eval_samples_per_second": 71.571, |
|
"eval_steps_per_second": 3.024, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 66.76923076923077, |
|
"eval_accuracy": 0.8028169014084507, |
|
"eval_loss": 0.8592150211334229, |
|
"eval_runtime": 1.0035, |
|
"eval_samples_per_second": 70.751, |
|
"eval_steps_per_second": 2.989, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 67.6923076923077, |
|
"grad_norm": 1.6102505922317505, |
|
"learning_rate": 1.4814814814814815e-05, |
|
"loss": 0.1144, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_accuracy": 0.8028169014084507, |
|
"eval_loss": 0.8471563458442688, |
|
"eval_runtime": 0.989, |
|
"eval_samples_per_second": 71.787, |
|
"eval_steps_per_second": 3.033, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 68.92307692307692, |
|
"eval_accuracy": 0.8169014084507042, |
|
"eval_loss": 0.83599853515625, |
|
"eval_runtime": 0.9959, |
|
"eval_samples_per_second": 71.291, |
|
"eval_steps_per_second": 3.012, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 69.84615384615384, |
|
"eval_accuracy": 0.8169014084507042, |
|
"eval_loss": 0.6696776151657104, |
|
"eval_runtime": 0.9955, |
|
"eval_samples_per_second": 71.323, |
|
"eval_steps_per_second": 3.014, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 70.76923076923077, |
|
"grad_norm": 2.509876012802124, |
|
"learning_rate": 1.2962962962962962e-05, |
|
"loss": 0.1321, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 70.76923076923077, |
|
"eval_accuracy": 0.8028169014084507, |
|
"eval_loss": 0.6625356674194336, |
|
"eval_runtime": 0.9946, |
|
"eval_samples_per_second": 71.389, |
|
"eval_steps_per_second": 3.016, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_accuracy": 0.8309859154929577, |
|
"eval_loss": 0.7227580547332764, |
|
"eval_runtime": 0.9869, |
|
"eval_samples_per_second": 71.944, |
|
"eval_steps_per_second": 3.04, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 72.92307692307692, |
|
"eval_accuracy": 0.8309859154929577, |
|
"eval_loss": 0.6792935729026794, |
|
"eval_runtime": 0.9957, |
|
"eval_samples_per_second": 71.304, |
|
"eval_steps_per_second": 3.013, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 73.84615384615384, |
|
"grad_norm": 3.0507752895355225, |
|
"learning_rate": 1.1111111111111112e-05, |
|
"loss": 0.1206, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 73.84615384615384, |
|
"eval_accuracy": 0.8591549295774648, |
|
"eval_loss": 0.5570999979972839, |
|
"eval_runtime": 0.9893, |
|
"eval_samples_per_second": 71.771, |
|
"eval_steps_per_second": 3.033, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 74.76923076923077, |
|
"eval_accuracy": 0.8450704225352113, |
|
"eval_loss": 0.5105892419815063, |
|
"eval_runtime": 1.0212, |
|
"eval_samples_per_second": 69.527, |
|
"eval_steps_per_second": 2.938, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_accuracy": 0.8309859154929577, |
|
"eval_loss": 0.6685615181922913, |
|
"eval_runtime": 1.0036, |
|
"eval_samples_per_second": 70.748, |
|
"eval_steps_per_second": 2.989, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 76.92307692307692, |
|
"grad_norm": 3.6703972816467285, |
|
"learning_rate": 9.259259259259259e-06, |
|
"loss": 0.131, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 76.92307692307692, |
|
"eval_accuracy": 0.8309859154929577, |
|
"eval_loss": 0.7132428288459778, |
|
"eval_runtime": 0.9879, |
|
"eval_samples_per_second": 71.873, |
|
"eval_steps_per_second": 3.037, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 77.84615384615384, |
|
"eval_accuracy": 0.8450704225352113, |
|
"eval_loss": 0.5945414900779724, |
|
"eval_runtime": 0.9943, |
|
"eval_samples_per_second": 71.404, |
|
"eval_steps_per_second": 3.017, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 78.76923076923077, |
|
"eval_accuracy": 0.7746478873239436, |
|
"eval_loss": 0.5515730977058411, |
|
"eval_runtime": 1.0089, |
|
"eval_samples_per_second": 70.372, |
|
"eval_steps_per_second": 2.973, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"grad_norm": 1.8021163940429688, |
|
"learning_rate": 7.4074074074074075e-06, |
|
"loss": 0.1009, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_accuracy": 0.7605633802816901, |
|
"eval_loss": 0.547419011592865, |
|
"eval_runtime": 1.0062, |
|
"eval_samples_per_second": 70.563, |
|
"eval_steps_per_second": 2.982, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 80.92307692307692, |
|
"eval_accuracy": 0.7887323943661971, |
|
"eval_loss": 0.5218686461448669, |
|
"eval_runtime": 0.9971, |
|
"eval_samples_per_second": 71.207, |
|
"eval_steps_per_second": 3.009, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 81.84615384615384, |
|
"eval_accuracy": 0.8450704225352113, |
|
"eval_loss": 0.5374658107757568, |
|
"eval_runtime": 0.9899, |
|
"eval_samples_per_second": 71.721, |
|
"eval_steps_per_second": 3.03, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 82.76923076923077, |
|
"eval_accuracy": 0.8450704225352113, |
|
"eval_loss": 0.5133435726165771, |
|
"eval_runtime": 0.9965, |
|
"eval_samples_per_second": 71.253, |
|
"eval_steps_per_second": 3.011, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 83.07692307692308, |
|
"grad_norm": 1.6209955215454102, |
|
"learning_rate": 5.555555555555556e-06, |
|
"loss": 0.1084, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"eval_accuracy": 0.8450704225352113, |
|
"eval_loss": 0.49109983444213867, |
|
"eval_runtime": 0.9882, |
|
"eval_samples_per_second": 71.848, |
|
"eval_steps_per_second": 3.036, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 84.92307692307692, |
|
"eval_accuracy": 0.8732394366197183, |
|
"eval_loss": 0.49927499890327454, |
|
"eval_runtime": 1.0035, |
|
"eval_samples_per_second": 70.749, |
|
"eval_steps_per_second": 2.989, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 85.84615384615384, |
|
"eval_accuracy": 0.8591549295774648, |
|
"eval_loss": 0.5418108701705933, |
|
"eval_runtime": 0.9918, |
|
"eval_samples_per_second": 71.588, |
|
"eval_steps_per_second": 3.025, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 86.15384615384616, |
|
"grad_norm": 2.604188919067383, |
|
"learning_rate": 3.7037037037037037e-06, |
|
"loss": 0.0851, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 86.76923076923077, |
|
"eval_accuracy": 0.8450704225352113, |
|
"eval_loss": 0.6010194420814514, |
|
"eval_runtime": 0.998, |
|
"eval_samples_per_second": 71.143, |
|
"eval_steps_per_second": 3.006, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"eval_accuracy": 0.8450704225352113, |
|
"eval_loss": 0.6304585933685303, |
|
"eval_runtime": 0.9947, |
|
"eval_samples_per_second": 71.377, |
|
"eval_steps_per_second": 3.016, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 88.92307692307692, |
|
"eval_accuracy": 0.8450704225352113, |
|
"eval_loss": 0.6016303300857544, |
|
"eval_runtime": 0.9919, |
|
"eval_samples_per_second": 71.58, |
|
"eval_steps_per_second": 3.025, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 89.23076923076923, |
|
"grad_norm": 2.3317203521728516, |
|
"learning_rate": 1.8518518518518519e-06, |
|
"loss": 0.1071, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 89.84615384615384, |
|
"eval_accuracy": 0.8591549295774648, |
|
"eval_loss": 0.577277660369873, |
|
"eval_runtime": 0.9927, |
|
"eval_samples_per_second": 71.522, |
|
"eval_steps_per_second": 3.022, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 90.76923076923077, |
|
"eval_accuracy": 0.8732394366197183, |
|
"eval_loss": 0.5610097646713257, |
|
"eval_runtime": 1.0072, |
|
"eval_samples_per_second": 70.491, |
|
"eval_steps_per_second": 2.978, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"eval_accuracy": 0.8732394366197183, |
|
"eval_loss": 0.5522463917732239, |
|
"eval_runtime": 0.9964, |
|
"eval_samples_per_second": 71.258, |
|
"eval_steps_per_second": 3.011, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 92.3076923076923, |
|
"grad_norm": 2.7261242866516113, |
|
"learning_rate": 0.0, |
|
"loss": 0.1139, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 92.3076923076923, |
|
"eval_accuracy": 0.8732394366197183, |
|
"eval_loss": 0.5514280796051025, |
|
"eval_runtime": 1.0006, |
|
"eval_samples_per_second": 70.955, |
|
"eval_steps_per_second": 2.998, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 92.3076923076923, |
|
"step": 300, |
|
"total_flos": 2.8402872494292173e+18, |
|
"train_loss": 0.2501216806968053, |
|
"train_runtime": 1631.2353, |
|
"train_samples_per_second": 24.337, |
|
"train_steps_per_second": 0.184 |
|
}, |
|
{ |
|
"epoch": 92.3076923076923, |
|
"eval_accuracy": 0.8732394366197183, |
|
"eval_loss": 0.49927499890327454, |
|
"eval_runtime": 1.0275, |
|
"eval_samples_per_second": 69.097, |
|
"eval_steps_per_second": 2.92, |
|
"step": 300 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 300, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 100, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.8402872494292173e+18, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|