|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 15.0, |
|
"eval_steps": 500, |
|
"global_step": 1170, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_bleu": 0.09853224281091519, |
|
"eval_loss": 7.545652866363525, |
|
"eval_runtime": 21.8676, |
|
"eval_samples_per_second": 4.161, |
|
"eval_steps_per_second": 0.274, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_bleu": 0.12046292387193049, |
|
"eval_loss": 6.180108547210693, |
|
"eval_runtime": 22.0255, |
|
"eval_samples_per_second": 4.132, |
|
"eval_steps_per_second": 0.272, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_bleu": 0.12654760063349843, |
|
"eval_loss": 4.648669719696045, |
|
"eval_runtime": 17.7486, |
|
"eval_samples_per_second": 5.127, |
|
"eval_steps_per_second": 0.338, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_bleu": 0.029436101215112604, |
|
"eval_loss": 2.7668569087982178, |
|
"eval_runtime": 26.7718, |
|
"eval_samples_per_second": 3.399, |
|
"eval_steps_per_second": 0.224, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_bleu": 0.0, |
|
"eval_loss": 1.7722187042236328, |
|
"eval_runtime": 29.3037, |
|
"eval_samples_per_second": 3.105, |
|
"eval_steps_per_second": 0.205, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_bleu": 0.0, |
|
"eval_loss": 1.7318739891052246, |
|
"eval_runtime": 29.4055, |
|
"eval_samples_per_second": 3.095, |
|
"eval_steps_per_second": 0.204, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 6.410256410256411, |
|
"grad_norm": 0.5262104272842407, |
|
"learning_rate": 2.976e-05, |
|
"loss": 4.7993, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_bleu": 0.0, |
|
"eval_loss": 1.7198480367660522, |
|
"eval_runtime": 29.3889, |
|
"eval_samples_per_second": 3.096, |
|
"eval_steps_per_second": 0.204, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_bleu": 0.0, |
|
"eval_loss": 1.7133270502090454, |
|
"eval_runtime": 29.3225, |
|
"eval_samples_per_second": 3.103, |
|
"eval_steps_per_second": 0.205, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_bleu": 4.084254520071281e-12, |
|
"eval_loss": 1.712929368019104, |
|
"eval_runtime": 29.1381, |
|
"eval_samples_per_second": 3.123, |
|
"eval_steps_per_second": 0.206, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_bleu": 35.20367031086142, |
|
"eval_loss": 1.7163715362548828, |
|
"eval_runtime": 23.0062, |
|
"eval_samples_per_second": 3.955, |
|
"eval_steps_per_second": 0.261, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_bleu": 35.98950764336343, |
|
"eval_loss": 1.7173269987106323, |
|
"eval_runtime": 22.0054, |
|
"eval_samples_per_second": 4.135, |
|
"eval_steps_per_second": 0.273, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_bleu": 37.82081281422519, |
|
"eval_loss": 1.719827651977539, |
|
"eval_runtime": 21.4905, |
|
"eval_samples_per_second": 4.234, |
|
"eval_steps_per_second": 0.279, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 12.820512820512821, |
|
"grad_norm": 0.21546132862567902, |
|
"learning_rate": 7.791044776119404e-06, |
|
"loss": 1.574, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_bleu": 36.5983422231339, |
|
"eval_loss": 1.7211079597473145, |
|
"eval_runtime": 21.4628, |
|
"eval_samples_per_second": 4.24, |
|
"eval_steps_per_second": 0.28, |
|
"step": 1014 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_bleu": 36.61469737920473, |
|
"eval_loss": 1.721977710723877, |
|
"eval_runtime": 21.6237, |
|
"eval_samples_per_second": 4.208, |
|
"eval_steps_per_second": 0.277, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_bleu": 37.37984492445374, |
|
"eval_loss": 1.7229710817337036, |
|
"eval_runtime": 21.724, |
|
"eval_samples_per_second": 4.189, |
|
"eval_steps_per_second": 0.276, |
|
"step": 1170 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 1170, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 15, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.009328968433664e+16, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|