|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.11025358324145534, |
|
"eval_steps": 10, |
|
"global_step": 50, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.002205071664829107, |
|
"eval_loss": 2.177356719970703, |
|
"eval_runtime": 9.5836, |
|
"eval_samples_per_second": 19.93, |
|
"eval_steps_per_second": 10.017, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.011025358324145534, |
|
"grad_norm": 3.0545544624328613, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1943, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.022050716648291068, |
|
"grad_norm": 2.2579193115234375, |
|
"learning_rate": 0.0001, |
|
"loss": 2.2168, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.022050716648291068, |
|
"eval_loss": 1.7749028205871582, |
|
"eval_runtime": 9.6049, |
|
"eval_samples_per_second": 19.886, |
|
"eval_steps_per_second": 9.995, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03307607497243661, |
|
"grad_norm": 2.798457622528076, |
|
"learning_rate": 9.619397662556435e-05, |
|
"loss": 1.7471, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.044101433296582136, |
|
"grad_norm": 2.5906262397766113, |
|
"learning_rate": 8.535533905932738e-05, |
|
"loss": 1.5741, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.044101433296582136, |
|
"eval_loss": 1.4727933406829834, |
|
"eval_runtime": 9.6402, |
|
"eval_samples_per_second": 19.813, |
|
"eval_steps_per_second": 9.958, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.05512679162072767, |
|
"grad_norm": 2.3336644172668457, |
|
"learning_rate": 6.91341716182545e-05, |
|
"loss": 1.4489, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.06615214994487321, |
|
"grad_norm": 2.168654680252075, |
|
"learning_rate": 5e-05, |
|
"loss": 1.4988, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.06615214994487321, |
|
"eval_loss": 1.4284926652908325, |
|
"eval_runtime": 9.9918, |
|
"eval_samples_per_second": 19.116, |
|
"eval_steps_per_second": 9.608, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.07717750826901874, |
|
"grad_norm": 2.2377634048461914, |
|
"learning_rate": 3.086582838174551e-05, |
|
"loss": 1.3625, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.08820286659316427, |
|
"grad_norm": 2.3646817207336426, |
|
"learning_rate": 1.4644660940672627e-05, |
|
"loss": 1.401, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.08820286659316427, |
|
"eval_loss": 1.4094713926315308, |
|
"eval_runtime": 9.8204, |
|
"eval_samples_per_second": 19.449, |
|
"eval_steps_per_second": 9.776, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.09922822491730982, |
|
"grad_norm": 2.390453577041626, |
|
"learning_rate": 3.8060233744356633e-06, |
|
"loss": 1.5494, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.11025358324145534, |
|
"grad_norm": 2.1996264457702637, |
|
"learning_rate": 0.0, |
|
"loss": 1.3396, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.11025358324145534, |
|
"eval_loss": 1.405466914176941, |
|
"eval_runtime": 9.8045, |
|
"eval_samples_per_second": 19.481, |
|
"eval_steps_per_second": 9.791, |
|
"step": 50 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 50, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 13, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2500486653542400.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|