|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 11.0, |
|
"global_step": 10329, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 5e-05, |
|
"loss": 4.5299, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_bleu1_score": 0.1103, |
|
"eval_bleu2_score": 0.0516, |
|
"eval_bleu3_score": 0.0302, |
|
"eval_bleu4_score": 0.019, |
|
"eval_loss": 3.4503543376922607, |
|
"eval_rougeL": 0.1058, |
|
"eval_runtime": 174.17, |
|
"eval_samples_per_second": 9.588, |
|
"eval_steps_per_second": 0.603, |
|
"step": 939 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 4.745650625699461e-05, |
|
"loss": 3.6998, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 4.4913012513989216e-05, |
|
"loss": 3.374, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_bleu1_score": 0.1251, |
|
"eval_bleu2_score": 0.0617, |
|
"eval_bleu3_score": 0.0349, |
|
"eval_bleu4_score": 0.0207, |
|
"eval_loss": 3.306809425354004, |
|
"eval_rougeL": 0.1227, |
|
"eval_runtime": 174.9295, |
|
"eval_samples_per_second": 9.547, |
|
"eval_steps_per_second": 0.6, |
|
"step": 1878 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 4.2369518770983826e-05, |
|
"loss": 3.2617, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 3.982602502797843e-05, |
|
"loss": 2.9885, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_bleu1_score": 0.1377, |
|
"eval_bleu2_score": 0.0693, |
|
"eval_bleu3_score": 0.0386, |
|
"eval_bleu4_score": 0.0223, |
|
"eval_loss": 3.2763712406158447, |
|
"eval_rougeL": 0.1372, |
|
"eval_runtime": 174.431, |
|
"eval_samples_per_second": 9.574, |
|
"eval_steps_per_second": 0.602, |
|
"step": 2817 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 3.7282531284973047e-05, |
|
"loss": 2.8648, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 3.473903754196765e-05, |
|
"loss": 2.6525, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_bleu1_score": 0.1382, |
|
"eval_bleu2_score": 0.073, |
|
"eval_bleu3_score": 0.0432, |
|
"eval_bleu4_score": 0.027, |
|
"eval_loss": 3.3136885166168213, |
|
"eval_rougeL": 0.1429, |
|
"eval_runtime": 173.1129, |
|
"eval_samples_per_second": 9.647, |
|
"eval_steps_per_second": 0.607, |
|
"step": 3756 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"learning_rate": 3.219554379896226e-05, |
|
"loss": 2.5232, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"learning_rate": 2.9652050055956863e-05, |
|
"loss": 2.3911, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_bleu1_score": 0.1455, |
|
"eval_bleu2_score": 0.076, |
|
"eval_bleu3_score": 0.0451, |
|
"eval_bleu4_score": 0.0285, |
|
"eval_loss": 3.354043960571289, |
|
"eval_rougeL": 0.1431, |
|
"eval_runtime": 172.1208, |
|
"eval_samples_per_second": 9.702, |
|
"eval_steps_per_second": 0.61, |
|
"step": 4695 |
|
}, |
|
{ |
|
"epoch": 5.32, |
|
"learning_rate": 2.710855631295147e-05, |
|
"loss": 2.2543, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 5.86, |
|
"learning_rate": 2.456506256994608e-05, |
|
"loss": 2.1732, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_bleu1_score": 0.1462, |
|
"eval_bleu2_score": 0.0784, |
|
"eval_bleu3_score": 0.0486, |
|
"eval_bleu4_score": 0.0326, |
|
"eval_loss": 3.422563314437866, |
|
"eval_rougeL": 0.1439, |
|
"eval_runtime": 172.488, |
|
"eval_samples_per_second": 9.682, |
|
"eval_steps_per_second": 0.609, |
|
"step": 5634 |
|
}, |
|
{ |
|
"epoch": 6.39, |
|
"learning_rate": 2.2021568826940687e-05, |
|
"loss": 2.021, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 6.92, |
|
"learning_rate": 1.9478075083935294e-05, |
|
"loss": 1.966, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_bleu1_score": 0.1527, |
|
"eval_bleu2_score": 0.0836, |
|
"eval_bleu3_score": 0.0514, |
|
"eval_bleu4_score": 0.035, |
|
"eval_loss": 3.4865562915802, |
|
"eval_rougeL": 0.1446, |
|
"eval_runtime": 173.8607, |
|
"eval_samples_per_second": 9.605, |
|
"eval_steps_per_second": 0.604, |
|
"step": 6573 |
|
}, |
|
{ |
|
"epoch": 7.45, |
|
"learning_rate": 1.6934581340929904e-05, |
|
"loss": 1.8372, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 7.99, |
|
"learning_rate": 1.4391087597924511e-05, |
|
"loss": 1.8303, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_bleu1_score": 0.1517, |
|
"eval_bleu2_score": 0.0854, |
|
"eval_bleu3_score": 0.0551, |
|
"eval_bleu4_score": 0.0387, |
|
"eval_loss": 3.54579758644104, |
|
"eval_rougeL": 0.1532, |
|
"eval_runtime": 172.6178, |
|
"eval_samples_per_second": 9.675, |
|
"eval_steps_per_second": 0.608, |
|
"step": 7512 |
|
}, |
|
{ |
|
"epoch": 8.52, |
|
"learning_rate": 1.1847593854919118e-05, |
|
"loss": 1.6837, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_bleu1_score": 0.1575, |
|
"eval_bleu2_score": 0.0881, |
|
"eval_bleu3_score": 0.0575, |
|
"eval_bleu4_score": 0.0414, |
|
"eval_loss": 3.5976877212524414, |
|
"eval_rougeL": 0.1527, |
|
"eval_runtime": 173.4085, |
|
"eval_samples_per_second": 9.63, |
|
"eval_steps_per_second": 0.606, |
|
"step": 8451 |
|
}, |
|
{ |
|
"epoch": 9.05, |
|
"learning_rate": 9.304100111913726e-06, |
|
"loss": 1.6791, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 9.58, |
|
"learning_rate": 6.760606368908333e-06, |
|
"loss": 1.591, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_bleu1_score": 0.1533, |
|
"eval_bleu2_score": 0.0887, |
|
"eval_bleu3_score": 0.0595, |
|
"eval_bleu4_score": 0.0437, |
|
"eval_loss": 3.6393702030181885, |
|
"eval_rougeL": 0.1505, |
|
"eval_runtime": 172.9918, |
|
"eval_samples_per_second": 9.654, |
|
"eval_steps_per_second": 0.607, |
|
"step": 9390 |
|
}, |
|
{ |
|
"epoch": 10.12, |
|
"learning_rate": 4.217112625902941e-06, |
|
"loss": 1.5695, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 10.65, |
|
"learning_rate": 1.6736188828975481e-06, |
|
"loss": 1.5271, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_bleu1_score": 0.1599, |
|
"eval_bleu2_score": 0.0937, |
|
"eval_bleu3_score": 0.0638, |
|
"eval_bleu4_score": 0.0478, |
|
"eval_loss": 3.6564855575561523, |
|
"eval_rougeL": 0.1548, |
|
"eval_runtime": 172.8273, |
|
"eval_samples_per_second": 9.663, |
|
"eval_steps_per_second": 0.608, |
|
"step": 10329 |
|
} |
|
], |
|
"max_steps": 10329, |
|
"num_train_epochs": 11, |
|
"total_flos": 1.0063211828871168e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|