|
{ |
|
"best_metric": 0.7954939341421143, |
|
"best_model_checkpoint": "cvt-13-normal/checkpoint-700", |
|
"epoch": 100.0, |
|
"eval_steps": 500, |
|
"global_step": 700, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7105719237435009, |
|
"eval_loss": 1.0209927558898926, |
|
"eval_runtime": 17.9138, |
|
"eval_samples_per_second": 64.419, |
|
"eval_steps_per_second": 0.558, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 1.4285714285714286, |
|
"grad_norm": 7.967917442321777, |
|
"learning_rate": 4.285714285714285e-05, |
|
"loss": 5.5642, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7097053726169844, |
|
"eval_loss": 1.0071666240692139, |
|
"eval_runtime": 17.7355, |
|
"eval_samples_per_second": 65.067, |
|
"eval_steps_per_second": 0.564, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 2.857142857142857, |
|
"grad_norm": 8.133280754089355, |
|
"learning_rate": 8.57142857142857e-05, |
|
"loss": 5.662, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.708838821490468, |
|
"eval_loss": 1.0150678157806396, |
|
"eval_runtime": 17.8577, |
|
"eval_samples_per_second": 64.622, |
|
"eval_steps_per_second": 0.56, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7140381282495667, |
|
"eval_loss": 1.0016363859176636, |
|
"eval_runtime": 17.837, |
|
"eval_samples_per_second": 64.697, |
|
"eval_steps_per_second": 0.561, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 4.285714285714286, |
|
"grad_norm": 8.433135986328125, |
|
"learning_rate": 0.00012857142857142855, |
|
"loss": 5.381, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.7123050259965338, |
|
"eval_loss": 1.0119163990020752, |
|
"eval_runtime": 17.7345, |
|
"eval_samples_per_second": 65.071, |
|
"eval_steps_per_second": 0.564, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 5.714285714285714, |
|
"grad_norm": 9.856744766235352, |
|
"learning_rate": 0.0001714285714285714, |
|
"loss": 5.3348, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.720103986135182, |
|
"eval_loss": 0.9661750793457031, |
|
"eval_runtime": 17.9039, |
|
"eval_samples_per_second": 64.455, |
|
"eval_steps_per_second": 0.559, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.7261698440207972, |
|
"eval_loss": 0.9513705372810364, |
|
"eval_runtime": 17.8649, |
|
"eval_samples_per_second": 64.596, |
|
"eval_steps_per_second": 0.56, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 7.142857142857143, |
|
"grad_norm": 10.7362699508667, |
|
"learning_rate": 0.00021428571428571427, |
|
"loss": 5.2423, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.7105719237435009, |
|
"eval_loss": 0.9588707685470581, |
|
"eval_runtime": 17.8964, |
|
"eval_samples_per_second": 64.482, |
|
"eval_steps_per_second": 0.559, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 8.571428571428571, |
|
"grad_norm": 11.099422454833984, |
|
"learning_rate": 0.0002571428571428571, |
|
"loss": 5.0251, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.7279029462738301, |
|
"eval_loss": 0.908963680267334, |
|
"eval_runtime": 17.9404, |
|
"eval_samples_per_second": 64.324, |
|
"eval_steps_per_second": 0.557, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 16.643394470214844, |
|
"learning_rate": 0.0003, |
|
"loss": 5.0547, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.7123050259965338, |
|
"eval_loss": 0.9352001547813416, |
|
"eval_runtime": 18.2788, |
|
"eval_samples_per_second": 63.133, |
|
"eval_steps_per_second": 0.547, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.6993067590987868, |
|
"eval_loss": 1.0062916278839111, |
|
"eval_runtime": 17.9901, |
|
"eval_samples_per_second": 64.146, |
|
"eval_steps_per_second": 0.556, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 11.428571428571429, |
|
"grad_norm": 9.376890182495117, |
|
"learning_rate": 0.0002952380952380952, |
|
"loss": 4.8246, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.7105719237435009, |
|
"eval_loss": 0.9190986752510071, |
|
"eval_runtime": 18.1793, |
|
"eval_samples_per_second": 63.479, |
|
"eval_steps_per_second": 0.55, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 12.857142857142858, |
|
"grad_norm": 7.629549026489258, |
|
"learning_rate": 0.00029047619047619045, |
|
"loss": 4.7811, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.7123050259965338, |
|
"eval_loss": 0.9947251677513123, |
|
"eval_runtime": 17.9036, |
|
"eval_samples_per_second": 64.456, |
|
"eval_steps_per_second": 0.559, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.7175043327556326, |
|
"eval_loss": 0.9671235084533691, |
|
"eval_runtime": 18.1306, |
|
"eval_samples_per_second": 63.649, |
|
"eval_steps_per_second": 0.552, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 14.285714285714286, |
|
"grad_norm": 13.771581649780273, |
|
"learning_rate": 0.0002857142857142857, |
|
"loss": 4.8234, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.7235701906412478, |
|
"eval_loss": 0.9055125117301941, |
|
"eval_runtime": 18.3144, |
|
"eval_samples_per_second": 63.01, |
|
"eval_steps_per_second": 0.546, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 15.714285714285714, |
|
"grad_norm": 9.288651466369629, |
|
"learning_rate": 0.0002809523809523809, |
|
"loss": 4.4787, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.744367417677643, |
|
"eval_loss": 0.8837802410125732, |
|
"eval_runtime": 18.2071, |
|
"eval_samples_per_second": 63.382, |
|
"eval_steps_per_second": 0.549, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.729636048526863, |
|
"eval_loss": 0.9059325456619263, |
|
"eval_runtime": 18.0331, |
|
"eval_samples_per_second": 63.994, |
|
"eval_steps_per_second": 0.555, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 17.142857142857142, |
|
"grad_norm": 8.790782928466797, |
|
"learning_rate": 0.00027619047619047615, |
|
"loss": 4.39, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.7461005199306759, |
|
"eval_loss": 0.8639523983001709, |
|
"eval_runtime": 18.0609, |
|
"eval_samples_per_second": 63.895, |
|
"eval_steps_per_second": 0.554, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 18.571428571428573, |
|
"grad_norm": 7.883941650390625, |
|
"learning_rate": 0.0002714285714285714, |
|
"loss": 4.1424, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.7487001733102253, |
|
"eval_loss": 0.8660562634468079, |
|
"eval_runtime": 17.7478, |
|
"eval_samples_per_second": 65.022, |
|
"eval_steps_per_second": 0.563, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 21.828325271606445, |
|
"learning_rate": 0.0002666666666666666, |
|
"loss": 4.1065, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.7305025996533796, |
|
"eval_loss": 0.9056758284568787, |
|
"eval_runtime": 17.8484, |
|
"eval_samples_per_second": 64.656, |
|
"eval_steps_per_second": 0.56, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.7348353552859619, |
|
"eval_loss": 0.8865219354629517, |
|
"eval_runtime": 18.0329, |
|
"eval_samples_per_second": 63.994, |
|
"eval_steps_per_second": 0.555, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 21.428571428571427, |
|
"grad_norm": 7.540792465209961, |
|
"learning_rate": 0.00026190476190476186, |
|
"loss": 4.0844, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.7391681109185442, |
|
"eval_loss": 0.8928019404411316, |
|
"eval_runtime": 17.9197, |
|
"eval_samples_per_second": 64.398, |
|
"eval_steps_per_second": 0.558, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 22.857142857142858, |
|
"grad_norm": 14.240620613098145, |
|
"learning_rate": 0.0002571428571428571, |
|
"loss": 3.9835, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.7538994800693241, |
|
"eval_loss": 0.8675404787063599, |
|
"eval_runtime": 18.0176, |
|
"eval_samples_per_second": 64.048, |
|
"eval_steps_per_second": 0.555, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.755632582322357, |
|
"eval_loss": 0.8828888535499573, |
|
"eval_runtime": 17.7466, |
|
"eval_samples_per_second": 65.027, |
|
"eval_steps_per_second": 0.563, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 24.285714285714285, |
|
"grad_norm": 8.749543190002441, |
|
"learning_rate": 0.0002523809523809524, |
|
"loss": 3.8199, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.7616984402079723, |
|
"eval_loss": 0.8176947832107544, |
|
"eval_runtime": 17.983, |
|
"eval_samples_per_second": 64.172, |
|
"eval_steps_per_second": 0.556, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 25.714285714285715, |
|
"grad_norm": 9.475801467895508, |
|
"learning_rate": 0.00024761904761904757, |
|
"loss": 3.7898, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.7461005199306759, |
|
"eval_loss": 0.8885547518730164, |
|
"eval_runtime": 18.0273, |
|
"eval_samples_per_second": 64.014, |
|
"eval_steps_per_second": 0.555, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.7461005199306759, |
|
"eval_loss": 0.9394861459732056, |
|
"eval_runtime": 18.1419, |
|
"eval_samples_per_second": 63.61, |
|
"eval_steps_per_second": 0.551, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 27.142857142857142, |
|
"grad_norm": 7.944543361663818, |
|
"learning_rate": 0.00024285714285714283, |
|
"loss": 3.7734, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.7608318890814558, |
|
"eval_loss": 0.8348239064216614, |
|
"eval_runtime": 17.9109, |
|
"eval_samples_per_second": 64.43, |
|
"eval_steps_per_second": 0.558, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 28.571428571428573, |
|
"grad_norm": 9.20173168182373, |
|
"learning_rate": 0.00023809523809523807, |
|
"loss": 3.7835, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.75736568457539, |
|
"eval_loss": 0.836903989315033, |
|
"eval_runtime": 18.1677, |
|
"eval_samples_per_second": 63.519, |
|
"eval_steps_per_second": 0.55, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"grad_norm": 17.463150024414062, |
|
"learning_rate": 0.0002333333333333333, |
|
"loss": 3.6414, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.7660311958405546, |
|
"eval_loss": 0.8668186664581299, |
|
"eval_runtime": 17.8247, |
|
"eval_samples_per_second": 64.742, |
|
"eval_steps_per_second": 0.561, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.7599653379549394, |
|
"eval_loss": 0.8909233808517456, |
|
"eval_runtime": 18.1581, |
|
"eval_samples_per_second": 63.553, |
|
"eval_steps_per_second": 0.551, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 31.428571428571427, |
|
"grad_norm": 13.756216049194336, |
|
"learning_rate": 0.00022857142857142854, |
|
"loss": 3.5076, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.7495667244367418, |
|
"eval_loss": 0.8795309066772461, |
|
"eval_runtime": 17.8514, |
|
"eval_samples_per_second": 64.645, |
|
"eval_steps_per_second": 0.56, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 32.857142857142854, |
|
"grad_norm": 9.03218936920166, |
|
"learning_rate": 0.0002238095238095238, |
|
"loss": 3.5447, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.7538994800693241, |
|
"eval_loss": 0.9227800369262695, |
|
"eval_runtime": 17.9657, |
|
"eval_samples_per_second": 64.233, |
|
"eval_steps_per_second": 0.557, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.7521663778162911, |
|
"eval_loss": 0.8850377798080444, |
|
"eval_runtime": 17.9906, |
|
"eval_samples_per_second": 64.144, |
|
"eval_steps_per_second": 0.556, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 34.285714285714285, |
|
"grad_norm": 7.675583839416504, |
|
"learning_rate": 0.000219047619047619, |
|
"loss": 3.5344, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.7651646447140381, |
|
"eval_loss": 0.8584573864936829, |
|
"eval_runtime": 18.1255, |
|
"eval_samples_per_second": 63.667, |
|
"eval_steps_per_second": 0.552, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 35.714285714285715, |
|
"grad_norm": 7.848378658294678, |
|
"learning_rate": 0.00021428571428571427, |
|
"loss": 3.3678, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.75736568457539, |
|
"eval_loss": 0.8631114959716797, |
|
"eval_runtime": 18.0275, |
|
"eval_samples_per_second": 64.013, |
|
"eval_steps_per_second": 0.555, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.770363951473137, |
|
"eval_loss": 0.8675860166549683, |
|
"eval_runtime": 18.0196, |
|
"eval_samples_per_second": 64.042, |
|
"eval_steps_per_second": 0.555, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 37.142857142857146, |
|
"grad_norm": 9.06800651550293, |
|
"learning_rate": 0.00020952380952380948, |
|
"loss": 3.4061, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.7616984402079723, |
|
"eval_loss": 0.9131080508232117, |
|
"eval_runtime": 17.9025, |
|
"eval_samples_per_second": 64.46, |
|
"eval_steps_per_second": 0.559, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 38.57142857142857, |
|
"grad_norm": 11.665525436401367, |
|
"learning_rate": 0.00020476190476190475, |
|
"loss": 3.3177, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.7677642980935875, |
|
"eval_loss": 0.8631002902984619, |
|
"eval_runtime": 17.9771, |
|
"eval_samples_per_second": 64.193, |
|
"eval_steps_per_second": 0.556, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"grad_norm": 15.023707389831543, |
|
"learning_rate": 0.00019999999999999998, |
|
"loss": 3.2767, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.7642980935875217, |
|
"eval_loss": 0.8802210092544556, |
|
"eval_runtime": 17.9247, |
|
"eval_samples_per_second": 64.381, |
|
"eval_steps_per_second": 0.558, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.7677642980935875, |
|
"eval_loss": 0.8518037796020508, |
|
"eval_runtime": 18.183, |
|
"eval_samples_per_second": 63.466, |
|
"eval_steps_per_second": 0.55, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 41.42857142857143, |
|
"grad_norm": 8.431020736694336, |
|
"learning_rate": 0.00019523809523809522, |
|
"loss": 3.1992, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.75736568457539, |
|
"eval_loss": 0.923156201839447, |
|
"eval_runtime": 18.0318, |
|
"eval_samples_per_second": 63.998, |
|
"eval_steps_per_second": 0.555, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 42.857142857142854, |
|
"grad_norm": 8.130815505981445, |
|
"learning_rate": 0.00019047619047619045, |
|
"loss": 3.2743, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.7521663778162911, |
|
"eval_loss": 0.9305623173713684, |
|
"eval_runtime": 17.9901, |
|
"eval_samples_per_second": 64.146, |
|
"eval_steps_per_second": 0.556, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.7755632582322357, |
|
"eval_loss": 0.8419708013534546, |
|
"eval_runtime": 17.9031, |
|
"eval_samples_per_second": 64.458, |
|
"eval_steps_per_second": 0.559, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 44.285714285714285, |
|
"grad_norm": 9.007019996643066, |
|
"learning_rate": 0.00018571428571428572, |
|
"loss": 3.1704, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.7564991334488734, |
|
"eval_loss": 0.8801714777946472, |
|
"eval_runtime": 17.8984, |
|
"eval_samples_per_second": 64.475, |
|
"eval_steps_per_second": 0.559, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 45.714285714285715, |
|
"grad_norm": 8.079572677612305, |
|
"learning_rate": 0.00018095238095238093, |
|
"loss": 3.2466, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.7677642980935875, |
|
"eval_loss": 0.878183901309967, |
|
"eval_runtime": 18.135, |
|
"eval_samples_per_second": 63.634, |
|
"eval_steps_per_second": 0.551, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_accuracy": 0.7746967071057193, |
|
"eval_loss": 0.844364583492279, |
|
"eval_runtime": 18.003, |
|
"eval_samples_per_second": 64.1, |
|
"eval_steps_per_second": 0.555, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 47.142857142857146, |
|
"grad_norm": 6.920067310333252, |
|
"learning_rate": 0.0001761904761904762, |
|
"loss": 3.0879, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.7694974003466204, |
|
"eval_loss": 0.8579216003417969, |
|
"eval_runtime": 17.8532, |
|
"eval_samples_per_second": 64.638, |
|
"eval_steps_per_second": 0.56, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 48.57142857142857, |
|
"grad_norm": 6.670530796051025, |
|
"learning_rate": 0.0001714285714285714, |
|
"loss": 3.1677, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 0.7712305025996534, |
|
"eval_loss": 0.858402669429779, |
|
"eval_runtime": 17.75, |
|
"eval_samples_per_second": 65.014, |
|
"eval_steps_per_second": 0.563, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"grad_norm": 13.106241226196289, |
|
"learning_rate": 0.00016666666666666666, |
|
"loss": 3.0965, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.7755632582322357, |
|
"eval_loss": 0.8400810956954956, |
|
"eval_runtime": 18.0075, |
|
"eval_samples_per_second": 64.084, |
|
"eval_steps_per_second": 0.555, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_accuracy": 0.7651646447140381, |
|
"eval_loss": 0.8724238872528076, |
|
"eval_runtime": 18.0097, |
|
"eval_samples_per_second": 64.077, |
|
"eval_steps_per_second": 0.555, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 51.42857142857143, |
|
"grad_norm": 8.85236930847168, |
|
"learning_rate": 0.00016190476190476187, |
|
"loss": 3.0611, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.7807625649913345, |
|
"eval_loss": 0.8638470768928528, |
|
"eval_runtime": 18.0439, |
|
"eval_samples_per_second": 63.955, |
|
"eval_steps_per_second": 0.554, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 52.857142857142854, |
|
"grad_norm": 7.648194789886475, |
|
"learning_rate": 0.00015714285714285713, |
|
"loss": 3.0204, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_accuracy": 0.7660311958405546, |
|
"eval_loss": 0.9167099595069885, |
|
"eval_runtime": 17.9056, |
|
"eval_samples_per_second": 64.449, |
|
"eval_steps_per_second": 0.558, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_accuracy": 0.7738301559792028, |
|
"eval_loss": 0.8322371244430542, |
|
"eval_runtime": 17.9741, |
|
"eval_samples_per_second": 64.204, |
|
"eval_steps_per_second": 0.556, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 54.285714285714285, |
|
"grad_norm": 6.742936611175537, |
|
"learning_rate": 0.00015238095238095237, |
|
"loss": 2.9704, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_accuracy": 0.7642980935875217, |
|
"eval_loss": 0.8577215671539307, |
|
"eval_runtime": 18.0258, |
|
"eval_samples_per_second": 64.019, |
|
"eval_steps_per_second": 0.555, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 55.714285714285715, |
|
"grad_norm": 6.2735395431518555, |
|
"learning_rate": 0.0001476190476190476, |
|
"loss": 2.939, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.7859618717504333, |
|
"eval_loss": 0.8296905755996704, |
|
"eval_runtime": 18.0649, |
|
"eval_samples_per_second": 63.881, |
|
"eval_steps_per_second": 0.554, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_accuracy": 0.7686308492201039, |
|
"eval_loss": 0.874596893787384, |
|
"eval_runtime": 17.9658, |
|
"eval_samples_per_second": 64.233, |
|
"eval_steps_per_second": 0.557, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 57.142857142857146, |
|
"grad_norm": 6.44887113571167, |
|
"learning_rate": 0.00014285714285714284, |
|
"loss": 3.0341, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_accuracy": 0.7824956672443674, |
|
"eval_loss": 0.8620171546936035, |
|
"eval_runtime": 17.939, |
|
"eval_samples_per_second": 64.329, |
|
"eval_steps_per_second": 0.557, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 58.57142857142857, |
|
"grad_norm": 6.199102401733398, |
|
"learning_rate": 0.00013809523809523808, |
|
"loss": 2.8997, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_accuracy": 0.75736568457539, |
|
"eval_loss": 0.8835130333900452, |
|
"eval_runtime": 18.2434, |
|
"eval_samples_per_second": 63.256, |
|
"eval_steps_per_second": 0.548, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"grad_norm": 27.795392990112305, |
|
"learning_rate": 0.0001333333333333333, |
|
"loss": 3.0187, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.7694974003466204, |
|
"eval_loss": 0.9018464684486389, |
|
"eval_runtime": 18.2513, |
|
"eval_samples_per_second": 63.228, |
|
"eval_steps_per_second": 0.548, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"eval_accuracy": 0.7772963604852686, |
|
"eval_loss": 0.8939943909645081, |
|
"eval_runtime": 18.1365, |
|
"eval_samples_per_second": 63.629, |
|
"eval_steps_per_second": 0.551, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 61.42857142857143, |
|
"grad_norm": 10.215301513671875, |
|
"learning_rate": 0.00012857142857142855, |
|
"loss": 2.9316, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"eval_accuracy": 0.7712305025996534, |
|
"eval_loss": 0.8858510851860046, |
|
"eval_runtime": 18.1655, |
|
"eval_samples_per_second": 63.527, |
|
"eval_steps_per_second": 0.55, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 62.857142857142854, |
|
"grad_norm": 5.105686187744141, |
|
"learning_rate": 0.00012380952380952378, |
|
"loss": 2.8746, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"eval_accuracy": 0.7764298093587522, |
|
"eval_loss": 0.8661392331123352, |
|
"eval_runtime": 17.9626, |
|
"eval_samples_per_second": 64.245, |
|
"eval_steps_per_second": 0.557, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_accuracy": 0.7712305025996534, |
|
"eval_loss": 0.8916440010070801, |
|
"eval_runtime": 17.94, |
|
"eval_samples_per_second": 64.326, |
|
"eval_steps_per_second": 0.557, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 64.28571428571429, |
|
"grad_norm": 9.268267631530762, |
|
"learning_rate": 0.00011904761904761903, |
|
"loss": 2.817, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"eval_accuracy": 0.7781629116117851, |
|
"eval_loss": 0.8645418286323547, |
|
"eval_runtime": 18.2441, |
|
"eval_samples_per_second": 63.253, |
|
"eval_steps_per_second": 0.548, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 65.71428571428571, |
|
"grad_norm": 6.703152179718018, |
|
"learning_rate": 0.00011428571428571427, |
|
"loss": 2.7593, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"eval_accuracy": 0.7686308492201039, |
|
"eval_loss": 0.8828719854354858, |
|
"eval_runtime": 18.1608, |
|
"eval_samples_per_second": 63.543, |
|
"eval_steps_per_second": 0.551, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"eval_accuracy": 0.7790294627383015, |
|
"eval_loss": 0.8883015513420105, |
|
"eval_runtime": 18.1166, |
|
"eval_samples_per_second": 63.698, |
|
"eval_steps_per_second": 0.552, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 67.14285714285714, |
|
"grad_norm": 5.34393310546875, |
|
"learning_rate": 0.0001095238095238095, |
|
"loss": 2.9212, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_accuracy": 0.7824956672443674, |
|
"eval_loss": 0.8507192134857178, |
|
"eval_runtime": 18.0504, |
|
"eval_samples_per_second": 63.932, |
|
"eval_steps_per_second": 0.554, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 68.57142857142857, |
|
"grad_norm": 6.5966668128967285, |
|
"learning_rate": 0.00010476190476190474, |
|
"loss": 2.8659, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"eval_accuracy": 0.7876949740034662, |
|
"eval_loss": 0.8553578853607178, |
|
"eval_runtime": 18.0681, |
|
"eval_samples_per_second": 63.869, |
|
"eval_steps_per_second": 0.553, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"grad_norm": 22.730220794677734, |
|
"learning_rate": 9.999999999999999e-05, |
|
"loss": 2.9068, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_accuracy": 0.7764298093587522, |
|
"eval_loss": 0.8812502026557922, |
|
"eval_runtime": 17.9671, |
|
"eval_samples_per_second": 64.229, |
|
"eval_steps_per_second": 0.557, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"eval_accuracy": 0.7859618717504333, |
|
"eval_loss": 0.8555229902267456, |
|
"eval_runtime": 18.0711, |
|
"eval_samples_per_second": 63.859, |
|
"eval_steps_per_second": 0.553, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 71.42857142857143, |
|
"grad_norm": 5.773199558258057, |
|
"learning_rate": 9.523809523809523e-05, |
|
"loss": 2.8334, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_accuracy": 0.7790294627383015, |
|
"eval_loss": 0.8665823340415955, |
|
"eval_runtime": 18.4819, |
|
"eval_samples_per_second": 62.439, |
|
"eval_steps_per_second": 0.541, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 72.85714285714286, |
|
"grad_norm": 6.063803672790527, |
|
"learning_rate": 9.047619047619046e-05, |
|
"loss": 2.7322, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"eval_accuracy": 0.7824956672443674, |
|
"eval_loss": 0.8682228922843933, |
|
"eval_runtime": 18.1239, |
|
"eval_samples_per_second": 63.673, |
|
"eval_steps_per_second": 0.552, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"eval_accuracy": 0.7885615251299827, |
|
"eval_loss": 0.881618320941925, |
|
"eval_runtime": 17.8842, |
|
"eval_samples_per_second": 64.526, |
|
"eval_steps_per_second": 0.559, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 74.28571428571429, |
|
"grad_norm": 5.207172870635986, |
|
"learning_rate": 8.57142857142857e-05, |
|
"loss": 2.8548, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"eval_accuracy": 0.7902946273830156, |
|
"eval_loss": 0.8523378968238831, |
|
"eval_runtime": 18.1134, |
|
"eval_samples_per_second": 63.71, |
|
"eval_steps_per_second": 0.552, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 75.71428571428571, |
|
"grad_norm": 6.294586658477783, |
|
"learning_rate": 8.095238095238093e-05, |
|
"loss": 2.8696, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_accuracy": 0.7894280762564991, |
|
"eval_loss": 0.8509147763252258, |
|
"eval_runtime": 18.182, |
|
"eval_samples_per_second": 63.469, |
|
"eval_steps_per_second": 0.55, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 77.0, |
|
"eval_accuracy": 0.7807625649913345, |
|
"eval_loss": 0.8682960867881775, |
|
"eval_runtime": 18.3628, |
|
"eval_samples_per_second": 62.845, |
|
"eval_steps_per_second": 0.545, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 77.14285714285714, |
|
"grad_norm": 5.558056831359863, |
|
"learning_rate": 7.619047619047618e-05, |
|
"loss": 2.6439, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"eval_accuracy": 0.7876949740034662, |
|
"eval_loss": 0.860653281211853, |
|
"eval_runtime": 18.2632, |
|
"eval_samples_per_second": 63.187, |
|
"eval_steps_per_second": 0.548, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 78.57142857142857, |
|
"grad_norm": 5.7894415855407715, |
|
"learning_rate": 7.142857142857142e-05, |
|
"loss": 2.9039, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 79.0, |
|
"eval_accuracy": 0.7842287694974004, |
|
"eval_loss": 0.8698387742042542, |
|
"eval_runtime": 18.1385, |
|
"eval_samples_per_second": 63.622, |
|
"eval_steps_per_second": 0.551, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"grad_norm": 28.787755966186523, |
|
"learning_rate": 6.666666666666666e-05, |
|
"loss": 2.6338, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_accuracy": 0.7876949740034662, |
|
"eval_loss": 0.8718376755714417, |
|
"eval_runtime": 18.0357, |
|
"eval_samples_per_second": 63.984, |
|
"eval_steps_per_second": 0.554, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 81.0, |
|
"eval_accuracy": 0.7902946273830156, |
|
"eval_loss": 0.8370843529701233, |
|
"eval_runtime": 18.1407, |
|
"eval_samples_per_second": 63.614, |
|
"eval_steps_per_second": 0.551, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 81.42857142857143, |
|
"grad_norm": 6.290432929992676, |
|
"learning_rate": 6.190476190476189e-05, |
|
"loss": 2.7271, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 82.0, |
|
"eval_accuracy": 0.792894280762565, |
|
"eval_loss": 0.8426641821861267, |
|
"eval_runtime": 17.8494, |
|
"eval_samples_per_second": 64.652, |
|
"eval_steps_per_second": 0.56, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 82.85714285714286, |
|
"grad_norm": 4.4193525314331055, |
|
"learning_rate": 5.7142857142857135e-05, |
|
"loss": 2.7555, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 83.0, |
|
"eval_accuracy": 0.7937608318890814, |
|
"eval_loss": 0.8621939420700073, |
|
"eval_runtime": 17.8242, |
|
"eval_samples_per_second": 64.743, |
|
"eval_steps_per_second": 0.561, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"eval_accuracy": 0.7859618717504333, |
|
"eval_loss": 0.8768612146377563, |
|
"eval_runtime": 17.9828, |
|
"eval_samples_per_second": 64.172, |
|
"eval_steps_per_second": 0.556, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 84.28571428571429, |
|
"grad_norm": 5.777393341064453, |
|
"learning_rate": 5.238095238095237e-05, |
|
"loss": 2.7702, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 85.0, |
|
"eval_accuracy": 0.7859618717504333, |
|
"eval_loss": 0.88438481092453, |
|
"eval_runtime": 17.8963, |
|
"eval_samples_per_second": 64.483, |
|
"eval_steps_per_second": 0.559, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 85.71428571428571, |
|
"grad_norm": 5.748138904571533, |
|
"learning_rate": 4.7619047619047614e-05, |
|
"loss": 2.8678, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 86.0, |
|
"eval_accuracy": 0.7824956672443674, |
|
"eval_loss": 0.8882182836532593, |
|
"eval_runtime": 17.8524, |
|
"eval_samples_per_second": 64.641, |
|
"eval_steps_per_second": 0.56, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 87.0, |
|
"eval_accuracy": 0.7824956672443674, |
|
"eval_loss": 0.8715818524360657, |
|
"eval_runtime": 17.8328, |
|
"eval_samples_per_second": 64.712, |
|
"eval_steps_per_second": 0.561, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 87.14285714285714, |
|
"grad_norm": 4.612086772918701, |
|
"learning_rate": 4.285714285714285e-05, |
|
"loss": 2.6334, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"eval_accuracy": 0.7781629116117851, |
|
"eval_loss": 0.8782148361206055, |
|
"eval_runtime": 17.9213, |
|
"eval_samples_per_second": 64.393, |
|
"eval_steps_per_second": 0.558, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 88.57142857142857, |
|
"grad_norm": 6.36035680770874, |
|
"learning_rate": 3.809523809523809e-05, |
|
"loss": 2.7782, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 89.0, |
|
"eval_accuracy": 0.7807625649913345, |
|
"eval_loss": 0.8752433657646179, |
|
"eval_runtime": 18.042, |
|
"eval_samples_per_second": 63.962, |
|
"eval_steps_per_second": 0.554, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"grad_norm": 6.581643581390381, |
|
"learning_rate": 3.333333333333333e-05, |
|
"loss": 2.5527, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"eval_accuracy": 0.7807625649913345, |
|
"eval_loss": 0.8674911856651306, |
|
"eval_runtime": 17.811, |
|
"eval_samples_per_second": 64.791, |
|
"eval_steps_per_second": 0.561, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 91.0, |
|
"eval_accuracy": 0.7842287694974004, |
|
"eval_loss": 0.8734576106071472, |
|
"eval_runtime": 17.906, |
|
"eval_samples_per_second": 64.448, |
|
"eval_steps_per_second": 0.558, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 91.42857142857143, |
|
"grad_norm": 6.266481399536133, |
|
"learning_rate": 2.8571428571428567e-05, |
|
"loss": 2.6812, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"eval_accuracy": 0.7885615251299827, |
|
"eval_loss": 0.8649889826774597, |
|
"eval_runtime": 18.1196, |
|
"eval_samples_per_second": 63.688, |
|
"eval_steps_per_second": 0.552, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 92.85714285714286, |
|
"grad_norm": 5.178635597229004, |
|
"learning_rate": 2.3809523809523807e-05, |
|
"loss": 2.6167, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 93.0, |
|
"eval_accuracy": 0.7946273830155979, |
|
"eval_loss": 0.8530935049057007, |
|
"eval_runtime": 17.8992, |
|
"eval_samples_per_second": 64.472, |
|
"eval_steps_per_second": 0.559, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 94.0, |
|
"eval_accuracy": 0.7868284228769498, |
|
"eval_loss": 0.8698766827583313, |
|
"eval_runtime": 17.9684, |
|
"eval_samples_per_second": 64.224, |
|
"eval_steps_per_second": 0.557, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 94.28571428571429, |
|
"grad_norm": 4.488171100616455, |
|
"learning_rate": 1.9047619047619046e-05, |
|
"loss": 2.6553, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 95.0, |
|
"eval_accuracy": 0.7894280762564991, |
|
"eval_loss": 0.8666642308235168, |
|
"eval_runtime": 17.9669, |
|
"eval_samples_per_second": 64.229, |
|
"eval_steps_per_second": 0.557, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 95.71428571428571, |
|
"grad_norm": 6.009092330932617, |
|
"learning_rate": 1.4285714285714284e-05, |
|
"loss": 2.7758, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 96.0, |
|
"eval_accuracy": 0.7920277296360485, |
|
"eval_loss": 0.8650416731834412, |
|
"eval_runtime": 18.0841, |
|
"eval_samples_per_second": 63.813, |
|
"eval_steps_per_second": 0.553, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 97.0, |
|
"eval_accuracy": 0.7902946273830156, |
|
"eval_loss": 0.8684815764427185, |
|
"eval_runtime": 17.8482, |
|
"eval_samples_per_second": 64.656, |
|
"eval_steps_per_second": 0.56, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 97.14285714285714, |
|
"grad_norm": 5.19600772857666, |
|
"learning_rate": 9.523809523809523e-06, |
|
"loss": 2.6592, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 98.0, |
|
"eval_accuracy": 0.7885615251299827, |
|
"eval_loss": 0.8592236042022705, |
|
"eval_runtime": 17.9065, |
|
"eval_samples_per_second": 64.446, |
|
"eval_steps_per_second": 0.558, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 98.57142857142857, |
|
"grad_norm": 5.676305770874023, |
|
"learning_rate": 4.7619047619047615e-06, |
|
"loss": 2.5202, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 99.0, |
|
"eval_accuracy": 0.7894280762564991, |
|
"eval_loss": 0.8744557499885559, |
|
"eval_runtime": 17.8619, |
|
"eval_samples_per_second": 64.607, |
|
"eval_steps_per_second": 0.56, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"grad_norm": 48.86530685424805, |
|
"learning_rate": 0.0, |
|
"loss": 2.6577, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"eval_accuracy": 0.7954939341421143, |
|
"eval_loss": 0.8635059595108032, |
|
"eval_runtime": 18.1084, |
|
"eval_samples_per_second": 63.727, |
|
"eval_steps_per_second": 0.552, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"step": 700, |
|
"total_flos": 6.134894724962304e+18, |
|
"train_loss": 3.4289448138645717, |
|
"train_runtime": 8283.6476, |
|
"train_samples_per_second": 41.793, |
|
"train_steps_per_second": 0.085 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 700, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 100, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 6.134894724962304e+18, |
|
"train_batch_size": 128, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|