|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.3401360544217687, |
|
"eval_steps": 10, |
|
"global_step": 50, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.006802721088435374, |
|
"eval_loss": 7.88132381439209, |
|
"eval_runtime": 37.6145, |
|
"eval_samples_per_second": 26.32, |
|
"eval_steps_per_second": 3.297, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.06802721088435375, |
|
"grad_norm": 5.561442852020264, |
|
"learning_rate": 9.330127018922194e-05, |
|
"loss": 6.6527, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.06802721088435375, |
|
"eval_loss": 3.9917244911193848, |
|
"eval_runtime": 36.513, |
|
"eval_samples_per_second": 27.114, |
|
"eval_steps_per_second": 3.396, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.1360544217687075, |
|
"grad_norm": 2.8865180015563965, |
|
"learning_rate": 6.91341716182545e-05, |
|
"loss": 3.2544, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.1360544217687075, |
|
"eval_loss": 2.308483123779297, |
|
"eval_runtime": 36.6555, |
|
"eval_samples_per_second": 27.008, |
|
"eval_steps_per_second": 3.383, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.20408163265306123, |
|
"grad_norm": 2.2314932346343994, |
|
"learning_rate": 3.705904774487396e-05, |
|
"loss": 1.9778, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.20408163265306123, |
|
"eval_loss": 1.6901850700378418, |
|
"eval_runtime": 36.1306, |
|
"eval_samples_per_second": 27.401, |
|
"eval_steps_per_second": 3.432, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.272108843537415, |
|
"grad_norm": 2.5240602493286133, |
|
"learning_rate": 1.0332332985438248e-05, |
|
"loss": 1.447, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.272108843537415, |
|
"eval_loss": 1.3286805152893066, |
|
"eval_runtime": 36.3788, |
|
"eval_samples_per_second": 27.214, |
|
"eval_steps_per_second": 3.409, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.3401360544217687, |
|
"grad_norm": 2.9974277019500732, |
|
"learning_rate": 0.0, |
|
"loss": 1.2756, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.3401360544217687, |
|
"eval_loss": 1.2556947469711304, |
|
"eval_runtime": 36.4335, |
|
"eval_samples_per_second": 27.173, |
|
"eval_steps_per_second": 3.403, |
|
"step": 50 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 50, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 13, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5.02662300696576e+16, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|