|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.013036534889026497, |
|
"eval_steps": 50, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 6.518267444513249e-05, |
|
"eval_loss": 1.887902021408081, |
|
"eval_runtime": 333.4715, |
|
"eval_samples_per_second": 19.372, |
|
"eval_steps_per_second": 9.686, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0006518267444513248, |
|
"grad_norm": 0.9258672595024109, |
|
"learning_rate": 0.0002, |
|
"loss": 1.8293, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0013036534889026496, |
|
"grad_norm": 0.7830431461334229, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7838, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0019554802333539745, |
|
"grad_norm": 0.9389762282371521, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7733, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.002607306977805299, |
|
"grad_norm": 0.9967517852783203, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7201, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0032591337222566243, |
|
"grad_norm": 0.8355724215507507, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5985, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0032591337222566243, |
|
"eval_loss": 1.7376784086227417, |
|
"eval_runtime": 333.6711, |
|
"eval_samples_per_second": 19.36, |
|
"eval_steps_per_second": 9.68, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.003910960466707949, |
|
"grad_norm": 0.9053980708122253, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7509, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.004562787211159274, |
|
"grad_norm": 1.0709302425384521, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7415, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.005214613955610598, |
|
"grad_norm": 0.8847922682762146, |
|
"learning_rate": 0.0002, |
|
"loss": 1.8675, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.005866440700061924, |
|
"grad_norm": 0.8659989237785339, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7278, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.006518267444513249, |
|
"grad_norm": 1.0513916015625, |
|
"learning_rate": 0.0002, |
|
"loss": 1.787, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.006518267444513249, |
|
"eval_loss": 1.726641058921814, |
|
"eval_runtime": 333.7788, |
|
"eval_samples_per_second": 19.354, |
|
"eval_steps_per_second": 9.677, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.007170094188964573, |
|
"grad_norm": 0.9510365128517151, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6355, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.007821920933415898, |
|
"grad_norm": 0.7684184312820435, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6816, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.008473747677867223, |
|
"grad_norm": 0.7511278390884399, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7392, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.009125574422318547, |
|
"grad_norm": 1.2092989683151245, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5957, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.009777401166769872, |
|
"grad_norm": 0.7998082041740417, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5553, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.009777401166769872, |
|
"eval_loss": 1.726781964302063, |
|
"eval_runtime": 333.6825, |
|
"eval_samples_per_second": 19.36, |
|
"eval_steps_per_second": 9.68, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.010429227911221197, |
|
"grad_norm": 1.0430855751037598, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7549, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.011081054655672521, |
|
"grad_norm": 1.1361061334609985, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6927, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.011732881400123848, |
|
"grad_norm": 0.9144191741943359, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7662, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.012384708144575172, |
|
"grad_norm": 0.8297296762466431, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7277, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.013036534889026497, |
|
"grad_norm": 1.0495644807815552, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7698, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.013036534889026497, |
|
"eval_loss": 1.7259982824325562, |
|
"eval_runtime": 333.5702, |
|
"eval_samples_per_second": 19.366, |
|
"eval_steps_per_second": 9.683, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.386991357034496e+16, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|