{ "best_metric": null, "best_model_checkpoint": null, "epoch": 11.0, "global_step": 10329, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.53, "learning_rate": 5e-05, "loss": 4.5299, "step": 500 }, { "epoch": 1.0, "eval_bleu1_score": 0.1103, "eval_bleu2_score": 0.0516, "eval_bleu3_score": 0.0302, "eval_bleu4_score": 0.019, "eval_loss": 3.4503543376922607, "eval_rougeL": 0.1058, "eval_runtime": 174.17, "eval_samples_per_second": 9.588, "eval_steps_per_second": 0.603, "step": 939 }, { "epoch": 1.06, "learning_rate": 4.745650625699461e-05, "loss": 3.6998, "step": 1000 }, { "epoch": 1.6, "learning_rate": 4.4913012513989216e-05, "loss": 3.374, "step": 1500 }, { "epoch": 2.0, "eval_bleu1_score": 0.1251, "eval_bleu2_score": 0.0617, "eval_bleu3_score": 0.0349, "eval_bleu4_score": 0.0207, "eval_loss": 3.306809425354004, "eval_rougeL": 0.1227, "eval_runtime": 174.9295, "eval_samples_per_second": 9.547, "eval_steps_per_second": 0.6, "step": 1878 }, { "epoch": 2.13, "learning_rate": 4.2369518770983826e-05, "loss": 3.2617, "step": 2000 }, { "epoch": 2.66, "learning_rate": 3.982602502797843e-05, "loss": 2.9885, "step": 2500 }, { "epoch": 3.0, "eval_bleu1_score": 0.1377, "eval_bleu2_score": 0.0693, "eval_bleu3_score": 0.0386, "eval_bleu4_score": 0.0223, "eval_loss": 3.2763712406158447, "eval_rougeL": 0.1372, "eval_runtime": 174.431, "eval_samples_per_second": 9.574, "eval_steps_per_second": 0.602, "step": 2817 }, { "epoch": 3.19, "learning_rate": 3.7282531284973047e-05, "loss": 2.8648, "step": 3000 }, { "epoch": 3.73, "learning_rate": 3.473903754196765e-05, "loss": 2.6525, "step": 3500 }, { "epoch": 4.0, "eval_bleu1_score": 0.1382, "eval_bleu2_score": 0.073, "eval_bleu3_score": 0.0432, "eval_bleu4_score": 0.027, "eval_loss": 3.3136885166168213, "eval_rougeL": 0.1429, "eval_runtime": 173.1129, "eval_samples_per_second": 9.647, "eval_steps_per_second": 0.607, "step": 3756 }, { "epoch": 4.26, "learning_rate": 3.219554379896226e-05, "loss": 2.5232, "step": 4000 }, { "epoch": 4.79, "learning_rate": 2.9652050055956863e-05, "loss": 2.3911, "step": 4500 }, { "epoch": 5.0, "eval_bleu1_score": 0.1455, "eval_bleu2_score": 0.076, "eval_bleu3_score": 0.0451, "eval_bleu4_score": 0.0285, "eval_loss": 3.354043960571289, "eval_rougeL": 0.1431, "eval_runtime": 172.1208, "eval_samples_per_second": 9.702, "eval_steps_per_second": 0.61, "step": 4695 }, { "epoch": 5.32, "learning_rate": 2.710855631295147e-05, "loss": 2.2543, "step": 5000 }, { "epoch": 5.86, "learning_rate": 2.456506256994608e-05, "loss": 2.1732, "step": 5500 }, { "epoch": 6.0, "eval_bleu1_score": 0.1462, "eval_bleu2_score": 0.0784, "eval_bleu3_score": 0.0486, "eval_bleu4_score": 0.0326, "eval_loss": 3.422563314437866, "eval_rougeL": 0.1439, "eval_runtime": 172.488, "eval_samples_per_second": 9.682, "eval_steps_per_second": 0.609, "step": 5634 }, { "epoch": 6.39, "learning_rate": 2.2021568826940687e-05, "loss": 2.021, "step": 6000 }, { "epoch": 6.92, "learning_rate": 1.9478075083935294e-05, "loss": 1.966, "step": 6500 }, { "epoch": 7.0, "eval_bleu1_score": 0.1527, "eval_bleu2_score": 0.0836, "eval_bleu3_score": 0.0514, "eval_bleu4_score": 0.035, "eval_loss": 3.4865562915802, "eval_rougeL": 0.1446, "eval_runtime": 173.8607, "eval_samples_per_second": 9.605, "eval_steps_per_second": 0.604, "step": 6573 }, { "epoch": 7.45, "learning_rate": 1.6934581340929904e-05, "loss": 1.8372, "step": 7000 }, { "epoch": 7.99, "learning_rate": 1.4391087597924511e-05, "loss": 1.8303, "step": 7500 }, { "epoch": 8.0, "eval_bleu1_score": 0.1517, "eval_bleu2_score": 0.0854, "eval_bleu3_score": 0.0551, "eval_bleu4_score": 0.0387, "eval_loss": 3.54579758644104, "eval_rougeL": 0.1532, "eval_runtime": 172.6178, "eval_samples_per_second": 9.675, "eval_steps_per_second": 0.608, "step": 7512 }, { "epoch": 8.52, "learning_rate": 1.1847593854919118e-05, "loss": 1.6837, "step": 8000 }, { "epoch": 9.0, "eval_bleu1_score": 0.1575, "eval_bleu2_score": 0.0881, "eval_bleu3_score": 0.0575, "eval_bleu4_score": 0.0414, "eval_loss": 3.5976877212524414, "eval_rougeL": 0.1527, "eval_runtime": 173.4085, "eval_samples_per_second": 9.63, "eval_steps_per_second": 0.606, "step": 8451 }, { "epoch": 9.05, "learning_rate": 9.304100111913726e-06, "loss": 1.6791, "step": 8500 }, { "epoch": 9.58, "learning_rate": 6.760606368908333e-06, "loss": 1.591, "step": 9000 }, { "epoch": 10.0, "eval_bleu1_score": 0.1533, "eval_bleu2_score": 0.0887, "eval_bleu3_score": 0.0595, "eval_bleu4_score": 0.0437, "eval_loss": 3.6393702030181885, "eval_rougeL": 0.1505, "eval_runtime": 172.9918, "eval_samples_per_second": 9.654, "eval_steps_per_second": 0.607, "step": 9390 }, { "epoch": 10.12, "learning_rate": 4.217112625902941e-06, "loss": 1.5695, "step": 9500 }, { "epoch": 10.65, "learning_rate": 1.6736188828975481e-06, "loss": 1.5271, "step": 10000 }, { "epoch": 11.0, "eval_bleu1_score": 0.1599, "eval_bleu2_score": 0.0937, "eval_bleu3_score": 0.0638, "eval_bleu4_score": 0.0478, "eval_loss": 3.6564855575561523, "eval_rougeL": 0.1548, "eval_runtime": 172.8273, "eval_samples_per_second": 9.663, "eval_steps_per_second": 0.608, "step": 10329 } ], "max_steps": 10329, "num_train_epochs": 11, "total_flos": 1.0063211828871168e+17, "trial_name": null, "trial_params": null }