|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 10.0, |
|
"eval_steps": 500, |
|
"global_step": 2850, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_bleu": 0.07566256044426542, |
|
"eval_loss": 4.71478796005249, |
|
"eval_runtime": 94.3673, |
|
"eval_samples_per_second": 12.049, |
|
"eval_steps_per_second": 0.763, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 1.7543859649122808, |
|
"grad_norm": 0.7608898878097534, |
|
"learning_rate": 1.9840000000000003e-05, |
|
"loss": 5.6113, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_bleu": 2.043915291200077e-09, |
|
"eval_loss": 1.6729369163513184, |
|
"eval_runtime": 334.9077, |
|
"eval_samples_per_second": 3.395, |
|
"eval_steps_per_second": 0.215, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_bleu": 0.013493219939051442, |
|
"eval_loss": 1.6423040628433228, |
|
"eval_runtime": 326.9519, |
|
"eval_samples_per_second": 3.478, |
|
"eval_steps_per_second": 0.22, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 3.5087719298245617, |
|
"grad_norm": 0.5546914935112, |
|
"learning_rate": 1.5778723404255322e-05, |
|
"loss": 1.6436, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_bleu": 26.712968804132256, |
|
"eval_loss": 1.6389009952545166, |
|
"eval_runtime": 129.4881, |
|
"eval_samples_per_second": 8.781, |
|
"eval_steps_per_second": 0.556, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_bleu": 27.593238887604137, |
|
"eval_loss": 1.6381840705871582, |
|
"eval_runtime": 92.3845, |
|
"eval_samples_per_second": 12.307, |
|
"eval_steps_per_second": 0.779, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 5.2631578947368425, |
|
"grad_norm": 0.4653625190258026, |
|
"learning_rate": 1.152340425531915e-05, |
|
"loss": 1.5933, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_bleu": 27.85219103460007, |
|
"eval_loss": 1.639172077178955, |
|
"eval_runtime": 94.3628, |
|
"eval_samples_per_second": 12.049, |
|
"eval_steps_per_second": 0.763, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_bleu": 27.69478534477315, |
|
"eval_loss": 1.6408443450927734, |
|
"eval_runtime": 97.4176, |
|
"eval_samples_per_second": 11.671, |
|
"eval_steps_per_second": 0.739, |
|
"step": 1995 |
|
}, |
|
{ |
|
"epoch": 7.017543859649122, |
|
"grad_norm": 0.38791486620903015, |
|
"learning_rate": 7.268085106382979e-06, |
|
"loss": 1.5729, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_bleu": 28.056102822986603, |
|
"eval_loss": 1.6421233415603638, |
|
"eval_runtime": 88.6352, |
|
"eval_samples_per_second": 12.828, |
|
"eval_steps_per_second": 0.812, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 8.771929824561404, |
|
"grad_norm": 0.3788717985153198, |
|
"learning_rate": 3.012765957446809e-06, |
|
"loss": 1.5597, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_bleu": 27.829370325397207, |
|
"eval_loss": 1.6420503854751587, |
|
"eval_runtime": 88.6233, |
|
"eval_samples_per_second": 12.83, |
|
"eval_steps_per_second": 0.812, |
|
"step": 2565 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_bleu": 28.125503003363978, |
|
"eval_loss": 1.6436502933502197, |
|
"eval_runtime": 91.7782, |
|
"eval_samples_per_second": 12.389, |
|
"eval_steps_per_second": 0.785, |
|
"step": 2850 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2850, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.463997932601344e+16, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|