|
{ |
|
"best_metric": 0.21295729279518127, |
|
"best_model_checkpoint": "./checkpoint/checkpoint-4500", |
|
"epoch": 1.8371855706923823, |
|
"eval_steps": 500, |
|
"global_step": 4500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.86, |
|
"eval_bleu": 44.0464, |
|
"eval_gen_len": 32.7836, |
|
"eval_loss": 8.00419807434082, |
|
"eval_runtime": 65.5675, |
|
"eval_samples_per_second": 15.434, |
|
"eval_steps_per_second": 0.229, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_bleu": 43.9567, |
|
"eval_gen_len": 32.7362, |
|
"eval_loss": 7.68654727935791, |
|
"eval_runtime": 63.9145, |
|
"eval_samples_per_second": 15.834, |
|
"eval_steps_per_second": 0.235, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"eval_bleu": 44.0558, |
|
"eval_gen_len": 32.6759, |
|
"eval_loss": 7.480804443359375, |
|
"eval_runtime": 63.5089, |
|
"eval_samples_per_second": 15.935, |
|
"eval_steps_per_second": 0.236, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_bleu": 44.0472, |
|
"eval_gen_len": 32.6146, |
|
"eval_loss": 7.244617938995361, |
|
"eval_runtime": 63.4492, |
|
"eval_samples_per_second": 15.95, |
|
"eval_steps_per_second": 0.236, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"eval_bleu": 44.0115, |
|
"eval_gen_len": 32.5652, |
|
"eval_loss": 7.082078456878662, |
|
"eval_runtime": 63.3306, |
|
"eval_samples_per_second": 15.98, |
|
"eval_steps_per_second": 0.237, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_bleu": 44.0109, |
|
"eval_gen_len": 32.5375, |
|
"eval_loss": 6.889991760253906, |
|
"eval_runtime": 63.3773, |
|
"eval_samples_per_second": 15.968, |
|
"eval_steps_per_second": 0.237, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 6.86, |
|
"eval_bleu": 43.9607, |
|
"eval_gen_len": 32.5188, |
|
"eval_loss": 6.759824752807617, |
|
"eval_runtime": 64.2688, |
|
"eval_samples_per_second": 15.746, |
|
"eval_steps_per_second": 0.233, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_bleu": 43.953, |
|
"eval_gen_len": 32.5, |
|
"eval_loss": 6.606079578399658, |
|
"eval_runtime": 63.9028, |
|
"eval_samples_per_second": 15.837, |
|
"eval_steps_per_second": 0.235, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 8.86, |
|
"eval_bleu": 44.0288, |
|
"eval_gen_len": 32.4664, |
|
"eval_loss": 6.503488063812256, |
|
"eval_runtime": 63.4627, |
|
"eval_samples_per_second": 15.946, |
|
"eval_steps_per_second": 0.236, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_bleu": 44.0341, |
|
"eval_gen_len": 32.4911, |
|
"eval_loss": 6.378995895385742, |
|
"eval_runtime": 63.6355, |
|
"eval_samples_per_second": 15.903, |
|
"eval_steps_per_second": 0.236, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 10.86, |
|
"eval_bleu": 43.9944, |
|
"eval_gen_len": 32.5089, |
|
"eval_loss": 6.294454574584961, |
|
"eval_runtime": 63.3078, |
|
"eval_samples_per_second": 15.985, |
|
"eval_steps_per_second": 0.237, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_bleu": 43.9623, |
|
"eval_gen_len": 32.5188, |
|
"eval_loss": 6.190367698669434, |
|
"eval_runtime": 63.4377, |
|
"eval_samples_per_second": 15.953, |
|
"eval_steps_per_second": 0.236, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 12.86, |
|
"eval_bleu": 43.8958, |
|
"eval_gen_len": 32.5395, |
|
"eval_loss": 6.120116233825684, |
|
"eval_runtime": 63.5351, |
|
"eval_samples_per_second": 15.928, |
|
"eval_steps_per_second": 0.236, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_bleu": 43.8162, |
|
"eval_gen_len": 32.5682, |
|
"eval_loss": 6.033730506896973, |
|
"eval_runtime": 63.5785, |
|
"eval_samples_per_second": 15.917, |
|
"eval_steps_per_second": 0.236, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 14.86, |
|
"eval_bleu": 43.7933, |
|
"eval_gen_len": 32.5919, |
|
"eval_loss": 5.97258996963501, |
|
"eval_runtime": 63.3511, |
|
"eval_samples_per_second": 15.974, |
|
"eval_steps_per_second": 0.237, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_bleu": 43.6535, |
|
"eval_gen_len": 32.6186, |
|
"eval_loss": 5.896927356719971, |
|
"eval_runtime": 63.427, |
|
"eval_samples_per_second": 15.955, |
|
"eval_steps_per_second": 0.236, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.591670069416088e-05, |
|
"loss": 3.3281, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_bleu": 42.9368, |
|
"eval_gen_len": 33.085, |
|
"eval_loss": 0.957375168800354, |
|
"eval_runtime": 76.1811, |
|
"eval_samples_per_second": 13.284, |
|
"eval_steps_per_second": 0.42, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.1833401388321765e-05, |
|
"loss": 0.2861, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_bleu": 43.7915, |
|
"eval_gen_len": 33.3686, |
|
"eval_loss": 0.21985910832881927, |
|
"eval_runtime": 77.3277, |
|
"eval_samples_per_second": 13.087, |
|
"eval_steps_per_second": 0.414, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 7.750102082482646e-06, |
|
"loss": 0.1388, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_bleu": 44.2777, |
|
"eval_gen_len": 33.0988, |
|
"eval_loss": 0.21508412063121796, |
|
"eval_runtime": 73.988, |
|
"eval_samples_per_second": 13.678, |
|
"eval_steps_per_second": 0.433, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.6668027766435283e-06, |
|
"loss": 0.1347, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_bleu": 44.1899, |
|
"eval_gen_len": 33.253, |
|
"eval_loss": 0.2146376669406891, |
|
"eval_runtime": 74.1113, |
|
"eval_samples_per_second": 13.655, |
|
"eval_steps_per_second": 0.432, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 9.791751735402207e-06, |
|
"loss": 0.1342, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"eval_bleu": 44.5959, |
|
"eval_gen_len": 33.2036, |
|
"eval_loss": 0.21368788182735443, |
|
"eval_runtime": 197.7615, |
|
"eval_samples_per_second": 5.117, |
|
"eval_steps_per_second": 1.279, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 7.750102082482646e-06, |
|
"loss": 0.1299, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"eval_bleu": 44.5261, |
|
"eval_gen_len": 33.2767, |
|
"eval_loss": 0.21383072435855865, |
|
"eval_runtime": 195.2598, |
|
"eval_samples_per_second": 5.183, |
|
"eval_steps_per_second": 1.296, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 5.708452429563087e-06, |
|
"loss": 0.1294, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"eval_bleu": 44.5117, |
|
"eval_gen_len": 33.2757, |
|
"eval_loss": 0.21339072287082672, |
|
"eval_runtime": 189.6194, |
|
"eval_samples_per_second": 5.337, |
|
"eval_steps_per_second": 1.334, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 3.6668027766435283e-06, |
|
"loss": 0.1286, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"eval_bleu": 44.4962, |
|
"eval_gen_len": 33.2717, |
|
"eval_loss": 0.2133340686559677, |
|
"eval_runtime": 188.3056, |
|
"eval_samples_per_second": 5.374, |
|
"eval_steps_per_second": 1.344, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 1.625153123723969e-06, |
|
"loss": 0.1283, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"eval_bleu": 44.5259, |
|
"eval_gen_len": 33.2796, |
|
"eval_loss": 0.21295729279518127, |
|
"eval_runtime": 188.3984, |
|
"eval_samples_per_second": 5.372, |
|
"eval_steps_per_second": 1.343, |
|
"step": 4500 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 4898, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"total_flos": 3.168187737534628e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|