{ "best_metric": 0.21295729279518127, "best_model_checkpoint": "./checkpoint/checkpoint-4500", "epoch": 1.8371855706923823, "eval_steps": 500, "global_step": 4500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.86, "eval_bleu": 44.0464, "eval_gen_len": 32.7836, "eval_loss": 8.00419807434082, "eval_runtime": 65.5675, "eval_samples_per_second": 15.434, "eval_steps_per_second": 0.229, "step": 3 }, { "epoch": 2.0, "eval_bleu": 43.9567, "eval_gen_len": 32.7362, "eval_loss": 7.68654727935791, "eval_runtime": 63.9145, "eval_samples_per_second": 15.834, "eval_steps_per_second": 0.235, "step": 7 }, { "epoch": 2.86, "eval_bleu": 44.0558, "eval_gen_len": 32.6759, "eval_loss": 7.480804443359375, "eval_runtime": 63.5089, "eval_samples_per_second": 15.935, "eval_steps_per_second": 0.236, "step": 10 }, { "epoch": 4.0, "eval_bleu": 44.0472, "eval_gen_len": 32.6146, "eval_loss": 7.244617938995361, "eval_runtime": 63.4492, "eval_samples_per_second": 15.95, "eval_steps_per_second": 0.236, "step": 14 }, { "epoch": 4.86, "eval_bleu": 44.0115, "eval_gen_len": 32.5652, "eval_loss": 7.082078456878662, "eval_runtime": 63.3306, "eval_samples_per_second": 15.98, "eval_steps_per_second": 0.237, "step": 17 }, { "epoch": 6.0, "eval_bleu": 44.0109, "eval_gen_len": 32.5375, "eval_loss": 6.889991760253906, "eval_runtime": 63.3773, "eval_samples_per_second": 15.968, "eval_steps_per_second": 0.237, "step": 21 }, { "epoch": 6.86, "eval_bleu": 43.9607, "eval_gen_len": 32.5188, "eval_loss": 6.759824752807617, "eval_runtime": 64.2688, "eval_samples_per_second": 15.746, "eval_steps_per_second": 0.233, "step": 24 }, { "epoch": 8.0, "eval_bleu": 43.953, "eval_gen_len": 32.5, "eval_loss": 6.606079578399658, "eval_runtime": 63.9028, "eval_samples_per_second": 15.837, "eval_steps_per_second": 0.235, "step": 28 }, { "epoch": 8.86, "eval_bleu": 44.0288, "eval_gen_len": 32.4664, "eval_loss": 6.503488063812256, "eval_runtime": 63.4627, "eval_samples_per_second": 15.946, "eval_steps_per_second": 0.236, "step": 31 }, { "epoch": 10.0, "eval_bleu": 44.0341, "eval_gen_len": 32.4911, "eval_loss": 6.378995895385742, "eval_runtime": 63.6355, "eval_samples_per_second": 15.903, "eval_steps_per_second": 0.236, "step": 35 }, { "epoch": 10.86, "eval_bleu": 43.9944, "eval_gen_len": 32.5089, "eval_loss": 6.294454574584961, "eval_runtime": 63.3078, "eval_samples_per_second": 15.985, "eval_steps_per_second": 0.237, "step": 38 }, { "epoch": 12.0, "eval_bleu": 43.9623, "eval_gen_len": 32.5188, "eval_loss": 6.190367698669434, "eval_runtime": 63.4377, "eval_samples_per_second": 15.953, "eval_steps_per_second": 0.236, "step": 42 }, { "epoch": 12.86, "eval_bleu": 43.8958, "eval_gen_len": 32.5395, "eval_loss": 6.120116233825684, "eval_runtime": 63.5351, "eval_samples_per_second": 15.928, "eval_steps_per_second": 0.236, "step": 45 }, { "epoch": 14.0, "eval_bleu": 43.8162, "eval_gen_len": 32.5682, "eval_loss": 6.033730506896973, "eval_runtime": 63.5785, "eval_samples_per_second": 15.917, "eval_steps_per_second": 0.236, "step": 49 }, { "epoch": 14.86, "eval_bleu": 43.7933, "eval_gen_len": 32.5919, "eval_loss": 5.97258996963501, "eval_runtime": 63.3511, "eval_samples_per_second": 15.974, "eval_steps_per_second": 0.237, "step": 52 }, { "epoch": 16.0, "eval_bleu": 43.6535, "eval_gen_len": 32.6186, "eval_loss": 5.896927356719971, "eval_runtime": 63.427, "eval_samples_per_second": 15.955, "eval_steps_per_second": 0.236, "step": 56 }, { "epoch": 0.2, "learning_rate": 1.591670069416088e-05, "loss": 3.3281, "step": 500 }, { "epoch": 0.2, "eval_bleu": 42.9368, "eval_gen_len": 33.085, "eval_loss": 0.957375168800354, "eval_runtime": 76.1811, "eval_samples_per_second": 13.284, "eval_steps_per_second": 0.42, "step": 500 }, { "epoch": 0.41, "learning_rate": 1.1833401388321765e-05, "loss": 0.2861, "step": 1000 }, { "epoch": 0.41, "eval_bleu": 43.7915, "eval_gen_len": 33.3686, "eval_loss": 0.21985910832881927, "eval_runtime": 77.3277, "eval_samples_per_second": 13.087, "eval_steps_per_second": 0.414, "step": 1000 }, { "epoch": 0.61, "learning_rate": 7.750102082482646e-06, "loss": 0.1388, "step": 1500 }, { "epoch": 0.61, "eval_bleu": 44.2777, "eval_gen_len": 33.0988, "eval_loss": 0.21508412063121796, "eval_runtime": 73.988, "eval_samples_per_second": 13.678, "eval_steps_per_second": 0.433, "step": 1500 }, { "epoch": 0.82, "learning_rate": 3.6668027766435283e-06, "loss": 0.1347, "step": 2000 }, { "epoch": 0.82, "eval_bleu": 44.1899, "eval_gen_len": 33.253, "eval_loss": 0.2146376669406891, "eval_runtime": 74.1113, "eval_samples_per_second": 13.655, "eval_steps_per_second": 0.432, "step": 2000 }, { "epoch": 1.02, "learning_rate": 9.791751735402207e-06, "loss": 0.1342, "step": 2500 }, { "epoch": 1.02, "eval_bleu": 44.5959, "eval_gen_len": 33.2036, "eval_loss": 0.21368788182735443, "eval_runtime": 197.7615, "eval_samples_per_second": 5.117, "eval_steps_per_second": 1.279, "step": 2500 }, { "epoch": 1.22, "learning_rate": 7.750102082482646e-06, "loss": 0.1299, "step": 3000 }, { "epoch": 1.22, "eval_bleu": 44.5261, "eval_gen_len": 33.2767, "eval_loss": 0.21383072435855865, "eval_runtime": 195.2598, "eval_samples_per_second": 5.183, "eval_steps_per_second": 1.296, "step": 3000 }, { "epoch": 1.43, "learning_rate": 5.708452429563087e-06, "loss": 0.1294, "step": 3500 }, { "epoch": 1.43, "eval_bleu": 44.5117, "eval_gen_len": 33.2757, "eval_loss": 0.21339072287082672, "eval_runtime": 189.6194, "eval_samples_per_second": 5.337, "eval_steps_per_second": 1.334, "step": 3500 }, { "epoch": 1.63, "learning_rate": 3.6668027766435283e-06, "loss": 0.1286, "step": 4000 }, { "epoch": 1.63, "eval_bleu": 44.4962, "eval_gen_len": 33.2717, "eval_loss": 0.2133340686559677, "eval_runtime": 188.3056, "eval_samples_per_second": 5.374, "eval_steps_per_second": 1.344, "step": 4000 }, { "epoch": 1.84, "learning_rate": 1.625153123723969e-06, "loss": 0.1283, "step": 4500 }, { "epoch": 1.84, "eval_bleu": 44.5259, "eval_gen_len": 33.2796, "eval_loss": 0.21295729279518127, "eval_runtime": 188.3984, "eval_samples_per_second": 5.372, "eval_steps_per_second": 1.343, "step": 4500 } ], "logging_steps": 500, "max_steps": 4898, "num_train_epochs": 2, "save_steps": 500, "total_flos": 3.168187737534628e+17, "trial_name": null, "trial_params": null }