|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 50, |
|
"global_step": 625, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 4.338859558105469, |
|
"learning_rate": 2.9528747416929465e-06, |
|
"loss": 0.7159, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_loss": 0.6576318144798279, |
|
"eval_runtime": 12.5345, |
|
"eval_samples_per_second": 159.56, |
|
"eval_steps_per_second": 9.972, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 4.263615131378174, |
|
"learning_rate": 2.814460020065795e-06, |
|
"loss": 0.6551, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_loss": 0.6588318943977356, |
|
"eval_runtime": 12.1193, |
|
"eval_samples_per_second": 165.026, |
|
"eval_steps_per_second": 10.314, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 4.684665679931641, |
|
"learning_rate": 2.5934529411321173e-06, |
|
"loss": 0.6519, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_loss": 0.6581148505210876, |
|
"eval_runtime": 12.4709, |
|
"eval_samples_per_second": 160.373, |
|
"eval_steps_per_second": 10.023, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 4.372674942016602, |
|
"learning_rate": 2.303740192468495e-06, |
|
"loss": 0.6278, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_loss": 0.656774640083313, |
|
"eval_runtime": 12.0295, |
|
"eval_samples_per_second": 166.258, |
|
"eval_steps_per_second": 10.391, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 3.940370798110962, |
|
"learning_rate": 1.963525491562421e-06, |
|
"loss": 0.6394, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_loss": 0.6532722115516663, |
|
"eval_runtime": 12.0522, |
|
"eval_samples_per_second": 165.945, |
|
"eval_steps_per_second": 10.372, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 4.178117752075195, |
|
"learning_rate": 1.5941857792939703e-06, |
|
"loss": 0.6528, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_loss": 0.6502550840377808, |
|
"eval_runtime": 12.0395, |
|
"eval_samples_per_second": 166.12, |
|
"eval_steps_per_second": 10.383, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 3.7875773906707764, |
|
"learning_rate": 1.2189280281214128e-06, |
|
"loss": 0.6382, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_loss": 0.6453887820243835, |
|
"eval_runtime": 12.102, |
|
"eval_samples_per_second": 165.261, |
|
"eval_steps_per_second": 10.329, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 3.9947192668914795, |
|
"learning_rate": 8.613310626523911e-07, |
|
"loss": 0.638, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_loss": 0.6425909399986267, |
|
"eval_runtime": 12.0649, |
|
"eval_samples_per_second": 165.77, |
|
"eval_steps_per_second": 10.361, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 3.9734668731689453, |
|
"learning_rate": 5.438640153769653e-07, |
|
"loss": 0.618, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_loss": 0.6400230526924133, |
|
"eval_runtime": 12.0503, |
|
"eval_samples_per_second": 165.97, |
|
"eval_steps_per_second": 10.373, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 3.733959436416626, |
|
"learning_rate": 2.86474508437579e-07, |
|
"loss": 0.6378, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_loss": 0.6379128694534302, |
|
"eval_runtime": 12.1078, |
|
"eval_samples_per_second": 165.183, |
|
"eval_steps_per_second": 10.324, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 3.8779754638671875, |
|
"learning_rate": 1.0533527116762298e-07, |
|
"loss": 0.6338, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_loss": 0.636811375617981, |
|
"eval_runtime": 13.2199, |
|
"eval_samples_per_second": 151.287, |
|
"eval_steps_per_second": 9.455, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 3.924581527709961, |
|
"learning_rate": 1.1827948028283353e-08, |
|
"loss": 0.6284, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_loss": 0.6364374160766602, |
|
"eval_runtime": 14.3025, |
|
"eval_samples_per_second": 139.836, |
|
"eval_steps_per_second": 8.74, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 625, |
|
"total_flos": 9.391098276138189e+16, |
|
"train_loss": 0.6481949188232422, |
|
"train_runtime": 2478.7348, |
|
"train_samples_per_second": 4.034, |
|
"train_steps_per_second": 0.252 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 625, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 9.391098276138189e+16, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|