|
{ |
|
"best_metric": 1.0, |
|
"best_model_checkpoint": "./swin-soiral/checkpoint-72", |
|
"epoch": 100.0, |
|
"eval_steps": 500, |
|
"global_step": 400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.5, |
|
"eval_loss": 0.7685546875, |
|
"eval_runtime": 0.2552, |
|
"eval_samples_per_second": 47.013, |
|
"eval_steps_per_second": 7.835, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7776, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.5, |
|
"eval_loss": 0.7459920048713684, |
|
"eval_runtime": 0.2163, |
|
"eval_samples_per_second": 55.485, |
|
"eval_steps_per_second": 9.247, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 2.25e-06, |
|
"loss": 0.7839, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.5, |
|
"eval_loss": 0.71142578125, |
|
"eval_runtime": 0.2223, |
|
"eval_samples_per_second": 53.982, |
|
"eval_steps_per_second": 8.997, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 3.5e-06, |
|
"loss": 0.6965, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.6666666666666666, |
|
"eval_loss": 0.6708170771598816, |
|
"eval_runtime": 0.1987, |
|
"eval_samples_per_second": 60.404, |
|
"eval_steps_per_second": 10.067, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 4.75e-06, |
|
"loss": 0.6778, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.6666666666666666, |
|
"eval_loss": 0.62982177734375, |
|
"eval_runtime": 0.1998, |
|
"eval_samples_per_second": 60.045, |
|
"eval_steps_per_second": 10.008, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.6666666666666666, |
|
"eval_loss": 0.5903117060661316, |
|
"eval_runtime": 0.2941, |
|
"eval_samples_per_second": 40.798, |
|
"eval_steps_per_second": 6.8, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 6.25, |
|
"learning_rate": 6e-06, |
|
"loss": 0.6297, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.75, |
|
"eval_loss": 0.5383097529411316, |
|
"eval_runtime": 0.2066, |
|
"eval_samples_per_second": 58.089, |
|
"eval_steps_per_second": 9.682, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"learning_rate": 7.25e-06, |
|
"loss": 0.5812, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.75, |
|
"eval_loss": 0.4876708984375, |
|
"eval_runtime": 0.2154, |
|
"eval_samples_per_second": 55.709, |
|
"eval_steps_per_second": 9.285, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 8.75, |
|
"learning_rate": 8.5e-06, |
|
"loss": 0.5141, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.42340087890625, |
|
"eval_runtime": 0.2045, |
|
"eval_samples_per_second": 58.685, |
|
"eval_steps_per_second": 9.781, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 9.75e-06, |
|
"loss": 0.4186, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.3495737612247467, |
|
"eval_runtime": 0.2143, |
|
"eval_samples_per_second": 55.994, |
|
"eval_steps_per_second": 9.332, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.8333333333333334, |
|
"eval_loss": 0.2956085205078125, |
|
"eval_runtime": 0.204, |
|
"eval_samples_per_second": 58.832, |
|
"eval_steps_per_second": 9.805, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 11.25, |
|
"learning_rate": 9.88888888888889e-06, |
|
"loss": 0.3791, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.26324209570884705, |
|
"eval_runtime": 0.2276, |
|
"eval_samples_per_second": 52.73, |
|
"eval_steps_per_second": 8.788, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"learning_rate": 9.75e-06, |
|
"loss": 0.2906, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.21923191845417023, |
|
"eval_runtime": 0.2866, |
|
"eval_samples_per_second": 41.866, |
|
"eval_steps_per_second": 6.978, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 13.75, |
|
"learning_rate": 9.611111111111112e-06, |
|
"loss": 0.2247, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.18271827697753906, |
|
"eval_runtime": 0.2073, |
|
"eval_samples_per_second": 57.879, |
|
"eval_steps_per_second": 9.647, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 9.472222222222223e-06, |
|
"loss": 0.1724, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.1536579132080078, |
|
"eval_runtime": 0.3096, |
|
"eval_samples_per_second": 38.763, |
|
"eval_steps_per_second": 6.461, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.11898771673440933, |
|
"eval_runtime": 0.207, |
|
"eval_samples_per_second": 57.977, |
|
"eval_steps_per_second": 9.663, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 16.25, |
|
"learning_rate": 9.333333333333334e-06, |
|
"loss": 0.1077, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.12817128002643585, |
|
"eval_runtime": 0.2101, |
|
"eval_samples_per_second": 57.105, |
|
"eval_steps_per_second": 9.517, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 17.5, |
|
"learning_rate": 9.194444444444445e-06, |
|
"loss": 0.1336, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.0798807144165039, |
|
"eval_runtime": 0.3334, |
|
"eval_samples_per_second": 35.996, |
|
"eval_steps_per_second": 5.999, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 18.75, |
|
"learning_rate": 9.083333333333333e-06, |
|
"loss": 0.0656, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.05580584332346916, |
|
"eval_runtime": 0.206, |
|
"eval_samples_per_second": 58.24, |
|
"eval_steps_per_second": 9.707, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 8.944444444444446e-06, |
|
"loss": 0.0564, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.046213943511247635, |
|
"eval_runtime": 0.3162, |
|
"eval_samples_per_second": 37.95, |
|
"eval_steps_per_second": 6.325, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.05256287381052971, |
|
"eval_runtime": 0.2134, |
|
"eval_samples_per_second": 56.23, |
|
"eval_steps_per_second": 9.372, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 21.25, |
|
"learning_rate": 8.805555555555557e-06, |
|
"loss": 0.0703, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.03809436038136482, |
|
"eval_runtime": 0.2, |
|
"eval_samples_per_second": 60.004, |
|
"eval_steps_per_second": 10.001, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 22.5, |
|
"learning_rate": 8.666666666666668e-06, |
|
"loss": 0.044, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.01835465431213379, |
|
"eval_runtime": 0.1962, |
|
"eval_samples_per_second": 61.174, |
|
"eval_steps_per_second": 10.196, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 23.75, |
|
"learning_rate": 8.527777777777779e-06, |
|
"loss": 0.0239, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.020169338211417198, |
|
"eval_runtime": 0.2151, |
|
"eval_samples_per_second": 55.779, |
|
"eval_steps_per_second": 9.297, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"learning_rate": 8.38888888888889e-06, |
|
"loss": 0.0214, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.024330079555511475, |
|
"eval_runtime": 0.2292, |
|
"eval_samples_per_second": 52.363, |
|
"eval_steps_per_second": 8.727, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.011685073375701904, |
|
"eval_runtime": 0.2099, |
|
"eval_samples_per_second": 57.169, |
|
"eval_steps_per_second": 9.528, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 26.25, |
|
"learning_rate": 8.25e-06, |
|
"loss": 0.031, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.008951862342655659, |
|
"eval_runtime": 0.2024, |
|
"eval_samples_per_second": 59.277, |
|
"eval_steps_per_second": 9.88, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 27.5, |
|
"learning_rate": 8.111111111111112e-06, |
|
"loss": 0.0334, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.03853602334856987, |
|
"eval_runtime": 0.2061, |
|
"eval_samples_per_second": 58.221, |
|
"eval_steps_per_second": 9.703, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 28.75, |
|
"learning_rate": 7.972222222222224e-06, |
|
"loss": 0.0046, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.03924691677093506, |
|
"eval_runtime": 0.3171, |
|
"eval_samples_per_second": 37.847, |
|
"eval_steps_per_second": 6.308, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"learning_rate": 7.833333333333333e-06, |
|
"loss": 0.0051, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.0031117696780711412, |
|
"eval_runtime": 0.217, |
|
"eval_samples_per_second": 55.303, |
|
"eval_steps_per_second": 9.217, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.0025899012107402086, |
|
"eval_runtime": 0.318, |
|
"eval_samples_per_second": 37.734, |
|
"eval_steps_per_second": 6.289, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 31.25, |
|
"learning_rate": 7.694444444444446e-06, |
|
"loss": 0.0045, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.002510249614715576, |
|
"eval_runtime": 0.2034, |
|
"eval_samples_per_second": 58.986, |
|
"eval_steps_per_second": 9.831, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 32.5, |
|
"learning_rate": 7.555555555555556e-06, |
|
"loss": 0.0133, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.07476559281349182, |
|
"eval_runtime": 0.2934, |
|
"eval_samples_per_second": 40.893, |
|
"eval_steps_per_second": 6.816, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 33.75, |
|
"learning_rate": 7.416666666666668e-06, |
|
"loss": 0.0014, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.16315531730651855, |
|
"eval_runtime": 0.2053, |
|
"eval_samples_per_second": 58.463, |
|
"eval_steps_per_second": 9.744, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"learning_rate": 7.277777777777778e-06, |
|
"loss": 0.0134, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.006152550224214792, |
|
"eval_runtime": 0.2135, |
|
"eval_samples_per_second": 56.219, |
|
"eval_steps_per_second": 9.37, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.005291670560836792, |
|
"eval_runtime": 0.2054, |
|
"eval_samples_per_second": 58.422, |
|
"eval_steps_per_second": 9.737, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 36.25, |
|
"learning_rate": 7.13888888888889e-06, |
|
"loss": 0.0365, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.0365130789577961, |
|
"eval_runtime": 0.2268, |
|
"eval_samples_per_second": 52.911, |
|
"eval_steps_per_second": 8.819, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 37.5, |
|
"learning_rate": 7e-06, |
|
"loss": 0.001, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.0017459988594055176, |
|
"eval_runtime": 0.2143, |
|
"eval_samples_per_second": 56.005, |
|
"eval_steps_per_second": 9.334, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 38.75, |
|
"learning_rate": 6.88888888888889e-06, |
|
"loss": 0.0503, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.05090367794036865, |
|
"eval_runtime": 0.208, |
|
"eval_samples_per_second": 57.695, |
|
"eval_steps_per_second": 9.616, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"learning_rate": 6.750000000000001e-06, |
|
"loss": 0.0094, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.07808921486139297, |
|
"eval_runtime": 0.2086, |
|
"eval_samples_per_second": 57.523, |
|
"eval_steps_per_second": 9.587, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.22892427444458008, |
|
"eval_runtime": 0.2158, |
|
"eval_samples_per_second": 55.599, |
|
"eval_steps_per_second": 9.267, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 41.25, |
|
"learning_rate": 6.6111111111111115e-06, |
|
"loss": 0.0231, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.16741518676280975, |
|
"eval_runtime": 0.2199, |
|
"eval_samples_per_second": 54.565, |
|
"eval_steps_per_second": 9.094, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 42.5, |
|
"learning_rate": 6.472222222222223e-06, |
|
"loss": 0.0002, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.25974369049072266, |
|
"eval_runtime": 0.297, |
|
"eval_samples_per_second": 40.411, |
|
"eval_steps_per_second": 6.735, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 43.75, |
|
"learning_rate": 6.333333333333333e-06, |
|
"loss": 0.0785, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.2814592123031616, |
|
"eval_runtime": 0.1943, |
|
"eval_samples_per_second": 61.754, |
|
"eval_steps_per_second": 10.292, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"learning_rate": 6.194444444444445e-06, |
|
"loss": 0.0009, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.2058378905057907, |
|
"eval_runtime": 0.3052, |
|
"eval_samples_per_second": 39.313, |
|
"eval_steps_per_second": 6.552, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.000926971435546875, |
|
"eval_runtime": 0.2098, |
|
"eval_samples_per_second": 57.192, |
|
"eval_steps_per_second": 9.532, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 46.25, |
|
"learning_rate": 6.055555555555555e-06, |
|
"loss": 0.0395, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.0010882416972890496, |
|
"eval_runtime": 0.3081, |
|
"eval_samples_per_second": 38.948, |
|
"eval_steps_per_second": 6.491, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 47.5, |
|
"learning_rate": 5.916666666666667e-06, |
|
"loss": 0.0158, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.000389377266401425, |
|
"eval_runtime": 0.2087, |
|
"eval_samples_per_second": 57.493, |
|
"eval_steps_per_second": 9.582, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 48.75, |
|
"learning_rate": 5.777777777777778e-06, |
|
"loss": 0.001, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.0034777026157826185, |
|
"eval_runtime": 0.2113, |
|
"eval_samples_per_second": 56.801, |
|
"eval_steps_per_second": 9.467, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"learning_rate": 5.638888888888889e-06, |
|
"loss": 0.0027, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.0018774966010823846, |
|
"eval_runtime": 0.1971, |
|
"eval_samples_per_second": 60.883, |
|
"eval_steps_per_second": 10.147, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.0005501409177668393, |
|
"eval_runtime": 0.2093, |
|
"eval_samples_per_second": 57.332, |
|
"eval_steps_per_second": 9.555, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 51.25, |
|
"learning_rate": 5.500000000000001e-06, |
|
"loss": 0.0005, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.0013666549930348992, |
|
"eval_runtime": 0.2027, |
|
"eval_samples_per_second": 59.215, |
|
"eval_steps_per_second": 9.869, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 52.5, |
|
"learning_rate": 5.361111111111112e-06, |
|
"loss": 0.003, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.0018307765712961555, |
|
"eval_runtime": 0.2153, |
|
"eval_samples_per_second": 55.739, |
|
"eval_steps_per_second": 9.29, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 53.75, |
|
"learning_rate": 5.2222222222222226e-06, |
|
"loss": 0.0039, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.01105162501335144, |
|
"eval_runtime": 0.2005, |
|
"eval_samples_per_second": 59.849, |
|
"eval_steps_per_second": 9.975, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"learning_rate": 5.0833333333333335e-06, |
|
"loss": 0.0081, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.021254947409033775, |
|
"eval_runtime": 0.2174, |
|
"eval_samples_per_second": 55.192, |
|
"eval_steps_per_second": 9.199, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.014328837394714355, |
|
"eval_runtime": 0.2399, |
|
"eval_samples_per_second": 50.017, |
|
"eval_steps_per_second": 8.336, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 56.25, |
|
"learning_rate": 4.944444444444445e-06, |
|
"loss": 0.0001, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.00964482594281435, |
|
"eval_runtime": 0.2188, |
|
"eval_samples_per_second": 54.838, |
|
"eval_steps_per_second": 9.14, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 57.5, |
|
"learning_rate": 4.805555555555556e-06, |
|
"loss": 0.0002, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.0017709335079416633, |
|
"eval_runtime": 0.2916, |
|
"eval_samples_per_second": 41.158, |
|
"eval_steps_per_second": 6.86, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 58.75, |
|
"learning_rate": 4.666666666666667e-06, |
|
"loss": 0.0035, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.0005045731668360531, |
|
"eval_runtime": 0.2036, |
|
"eval_samples_per_second": 58.93, |
|
"eval_steps_per_second": 9.822, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"learning_rate": 4.527777777777778e-06, |
|
"loss": 0.0049, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.0014440218219533563, |
|
"eval_runtime": 0.3276, |
|
"eval_samples_per_second": 36.631, |
|
"eval_steps_per_second": 6.105, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.0020179252605885267, |
|
"eval_runtime": 0.2179, |
|
"eval_samples_per_second": 55.076, |
|
"eval_steps_per_second": 9.179, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 61.25, |
|
"learning_rate": 4.388888888888889e-06, |
|
"loss": 0.0006, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.0003785987792070955, |
|
"eval_runtime": 0.2267, |
|
"eval_samples_per_second": 52.937, |
|
"eval_steps_per_second": 8.823, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 62.5, |
|
"learning_rate": 4.25e-06, |
|
"loss": 0.0001, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.00021861989807803184, |
|
"eval_runtime": 0.2108, |
|
"eval_samples_per_second": 56.934, |
|
"eval_steps_per_second": 9.489, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 63.75, |
|
"learning_rate": 4.111111111111111e-06, |
|
"loss": 0.0272, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.0002175172121496871, |
|
"eval_runtime": 0.2182, |
|
"eval_samples_per_second": 55.001, |
|
"eval_steps_per_second": 9.167, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"learning_rate": 3.972222222222223e-06, |
|
"loss": 0.0016, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.0003321866097394377, |
|
"eval_runtime": 0.1986, |
|
"eval_samples_per_second": 60.421, |
|
"eval_steps_per_second": 10.07, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.0012783011188730597, |
|
"eval_runtime": 0.2135, |
|
"eval_samples_per_second": 56.201, |
|
"eval_steps_per_second": 9.367, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 66.25, |
|
"learning_rate": 3.833333333333334e-06, |
|
"loss": 0.0002, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.004446456674486399, |
|
"eval_runtime": 0.2127, |
|
"eval_samples_per_second": 56.422, |
|
"eval_steps_per_second": 9.404, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 67.5, |
|
"learning_rate": 3.694444444444445e-06, |
|
"loss": 0.0001, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.013481984846293926, |
|
"eval_runtime": 0.2043, |
|
"eval_samples_per_second": 58.732, |
|
"eval_steps_per_second": 9.789, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 68.75, |
|
"learning_rate": 3.555555555555556e-06, |
|
"loss": 0.0002, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.012796193361282349, |
|
"eval_runtime": 0.2261, |
|
"eval_samples_per_second": 53.079, |
|
"eval_steps_per_second": 8.846, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"learning_rate": 3.416666666666667e-06, |
|
"loss": 0.0002, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.01079349685460329, |
|
"eval_runtime": 0.1991, |
|
"eval_samples_per_second": 60.277, |
|
"eval_steps_per_second": 10.046, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.008993417024612427, |
|
"eval_runtime": 0.2933, |
|
"eval_samples_per_second": 40.92, |
|
"eval_steps_per_second": 6.82, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 71.25, |
|
"learning_rate": 3.277777777777778e-06, |
|
"loss": 0.0001, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.006850639823824167, |
|
"eval_runtime": 0.2074, |
|
"eval_samples_per_second": 57.85, |
|
"eval_steps_per_second": 9.642, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 72.5, |
|
"learning_rate": 3.138888888888889e-06, |
|
"loss": 0.0001, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.0049311816692352295, |
|
"eval_runtime": 0.2813, |
|
"eval_samples_per_second": 42.663, |
|
"eval_steps_per_second": 7.111, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 73.75, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0002, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.0031823813915252686, |
|
"eval_runtime": 0.2076, |
|
"eval_samples_per_second": 57.79, |
|
"eval_steps_per_second": 9.632, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"learning_rate": 2.861111111111111e-06, |
|
"loss": 0.0, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.0018174747237935662, |
|
"eval_runtime": 0.1974, |
|
"eval_samples_per_second": 60.778, |
|
"eval_steps_per_second": 10.13, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.0033631324768066406, |
|
"eval_runtime": 0.2244, |
|
"eval_samples_per_second": 53.472, |
|
"eval_steps_per_second": 8.912, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 76.25, |
|
"learning_rate": 2.7222222222222224e-06, |
|
"loss": 0.0003, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 77.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.005034655332565308, |
|
"eval_runtime": 0.2054, |
|
"eval_samples_per_second": 58.436, |
|
"eval_steps_per_second": 9.739, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 77.5, |
|
"learning_rate": 2.5833333333333337e-06, |
|
"loss": 0.0001, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.0060021779499948025, |
|
"eval_runtime": 0.2165, |
|
"eval_samples_per_second": 55.438, |
|
"eval_steps_per_second": 9.24, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 78.75, |
|
"learning_rate": 2.4444444444444447e-06, |
|
"loss": 0.0, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 79.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.006388425827026367, |
|
"eval_runtime": 0.2076, |
|
"eval_samples_per_second": 57.793, |
|
"eval_steps_per_second": 9.632, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"learning_rate": 2.305555555555556e-06, |
|
"loss": 0.0001, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.0061742267571389675, |
|
"eval_runtime": 0.2188, |
|
"eval_samples_per_second": 54.852, |
|
"eval_steps_per_second": 9.142, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 81.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.005842983722686768, |
|
"eval_runtime": 0.2181, |
|
"eval_samples_per_second": 55.023, |
|
"eval_steps_per_second": 9.171, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 81.25, |
|
"learning_rate": 2.166666666666667e-06, |
|
"loss": 0.0001, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 82.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.0059954822063446045, |
|
"eval_runtime": 0.2026, |
|
"eval_samples_per_second": 59.232, |
|
"eval_steps_per_second": 9.872, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 82.5, |
|
"learning_rate": 2.027777777777778e-06, |
|
"loss": 0.0001, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 83.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.006137927528470755, |
|
"eval_runtime": 0.2087, |
|
"eval_samples_per_second": 57.509, |
|
"eval_steps_per_second": 9.585, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 83.75, |
|
"learning_rate": 1.888888888888889e-06, |
|
"loss": 0.0, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.007910847663879395, |
|
"eval_runtime": 0.2136, |
|
"eval_samples_per_second": 56.177, |
|
"eval_steps_per_second": 9.363, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 85.0, |
|
"learning_rate": 1.75e-06, |
|
"loss": 0.0005, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 85.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.012745578773319721, |
|
"eval_runtime": 0.2199, |
|
"eval_samples_per_second": 54.578, |
|
"eval_steps_per_second": 9.096, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 86.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.021893838420510292, |
|
"eval_runtime": 0.2149, |
|
"eval_samples_per_second": 55.845, |
|
"eval_steps_per_second": 9.307, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 86.25, |
|
"learning_rate": 1.6111111111111113e-06, |
|
"loss": 0.0002, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 87.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.031531721353530884, |
|
"eval_runtime": 0.2233, |
|
"eval_samples_per_second": 53.743, |
|
"eval_steps_per_second": 8.957, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 87.5, |
|
"learning_rate": 1.4722222222222225e-06, |
|
"loss": 0.0001, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.03811268135905266, |
|
"eval_runtime": 0.2096, |
|
"eval_samples_per_second": 57.255, |
|
"eval_steps_per_second": 9.542, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 88.75, |
|
"learning_rate": 1.3333333333333334e-06, |
|
"loss": 0.0001, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 89.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.04021235182881355, |
|
"eval_runtime": 0.2105, |
|
"eval_samples_per_second": 57.008, |
|
"eval_steps_per_second": 9.501, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"learning_rate": 1.1944444444444446e-06, |
|
"loss": 0.0, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.040413498878479004, |
|
"eval_runtime": 0.2044, |
|
"eval_samples_per_second": 58.696, |
|
"eval_steps_per_second": 9.783, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 91.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.0374186746776104, |
|
"eval_runtime": 0.316, |
|
"eval_samples_per_second": 37.973, |
|
"eval_steps_per_second": 6.329, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 91.25, |
|
"learning_rate": 1.0555555555555557e-06, |
|
"loss": 0.0, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.03140836954116821, |
|
"eval_runtime": 0.2142, |
|
"eval_samples_per_second": 56.034, |
|
"eval_steps_per_second": 9.339, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 92.5, |
|
"learning_rate": 9.166666666666666e-07, |
|
"loss": 0.0003, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 93.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.01769857667386532, |
|
"eval_runtime": 0.2018, |
|
"eval_samples_per_second": 59.478, |
|
"eval_steps_per_second": 9.913, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 93.75, |
|
"learning_rate": 7.777777777777779e-07, |
|
"loss": 0.0001, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 94.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.011687318794429302, |
|
"eval_runtime": 0.2027, |
|
"eval_samples_per_second": 59.211, |
|
"eval_steps_per_second": 9.869, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 95.0, |
|
"learning_rate": 6.388888888888889e-07, |
|
"loss": 0.0001, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 95.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.0088284807279706, |
|
"eval_runtime": 0.2065, |
|
"eval_samples_per_second": 58.106, |
|
"eval_steps_per_second": 9.684, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 96.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.007670491933822632, |
|
"eval_runtime": 0.2024, |
|
"eval_samples_per_second": 59.303, |
|
"eval_steps_per_second": 9.884, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 96.25, |
|
"learning_rate": 5.000000000000001e-07, |
|
"loss": 0.0, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 97.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.00695762038230896, |
|
"eval_runtime": 0.2135, |
|
"eval_samples_per_second": 56.218, |
|
"eval_steps_per_second": 9.37, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 97.5, |
|
"learning_rate": 3.611111111111111e-07, |
|
"loss": 0.0001, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 98.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.0064276158809661865, |
|
"eval_runtime": 0.2059, |
|
"eval_samples_per_second": 58.292, |
|
"eval_steps_per_second": 9.715, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 98.75, |
|
"learning_rate": 2.2222222222222224e-07, |
|
"loss": 0.0001, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 99.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.006080567836761475, |
|
"eval_runtime": 0.2091, |
|
"eval_samples_per_second": 57.386, |
|
"eval_steps_per_second": 9.564, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"learning_rate": 8.333333333333334e-08, |
|
"loss": 0.0001, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.005917658563703299, |
|
"eval_runtime": 0.2205, |
|
"eval_samples_per_second": 54.429, |
|
"eval_steps_per_second": 9.072, |
|
"step": 400 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 400, |
|
"num_train_epochs": 100, |
|
"save_steps": 500, |
|
"total_flos": 4.70073333768192e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|