|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.05116561670557385, |
|
"eval_steps": 500, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0025582808352786926, |
|
"grad_norm": 0.41173064708709717, |
|
"learning_rate": 0.00018, |
|
"loss": 2.2691, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.005116561670557385, |
|
"grad_norm": 0.35278061032295227, |
|
"learning_rate": 0.00019889478706014687, |
|
"loss": 2.2137, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.007674842505836078, |
|
"grad_norm": 0.32279932498931885, |
|
"learning_rate": 0.00019510565162951537, |
|
"loss": 2.2253, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.01023312334111477, |
|
"grad_norm": 0.32902857661247253, |
|
"learning_rate": 0.0001887222819443612, |
|
"loss": 2.2422, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.012791404176393463, |
|
"grad_norm": 0.4015531539916992, |
|
"learning_rate": 0.0001799187996894925, |
|
"loss": 2.1889, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.015349685011672156, |
|
"grad_norm": 0.3013351857662201, |
|
"learning_rate": 0.0001689353409118566, |
|
"loss": 2.2778, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.017907965846950848, |
|
"grad_norm": 0.2108200341463089, |
|
"learning_rate": 0.0001560715057351673, |
|
"loss": 2.1985, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.02046624668222954, |
|
"grad_norm": 0.44372543692588806, |
|
"learning_rate": 0.00014167818604952906, |
|
"loss": 1.9972, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.023024527517508234, |
|
"grad_norm": 0.36838993430137634, |
|
"learning_rate": 0.00012614799409538198, |
|
"loss": 2.17, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.025582808352786927, |
|
"grad_norm": 0.33015120029449463, |
|
"learning_rate": 0.0001099045530250463, |
|
"loss": 2.2036, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.02814108918806562, |
|
"grad_norm": 0.22048693895339966, |
|
"learning_rate": 9.339094156743007e-05, |
|
"loss": 2.2489, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.030699370023344313, |
|
"grad_norm": 0.23766057193279266, |
|
"learning_rate": 7.705760799532485e-05, |
|
"loss": 1.8966, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.033257650858623006, |
|
"grad_norm": 0.24646614491939545, |
|
"learning_rate": 6.135008307075481e-05, |
|
"loss": 1.9245, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.035815931693901695, |
|
"grad_norm": 0.393329918384552, |
|
"learning_rate": 4.669682712720065e-05, |
|
"loss": 2.2395, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.03837421252918039, |
|
"grad_norm": 0.23788638412952423, |
|
"learning_rate": 3.349754278861517e-05, |
|
"loss": 2.0628, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.04093249336445908, |
|
"grad_norm": 0.5308476686477661, |
|
"learning_rate": 2.2112272123788768e-05, |
|
"loss": 2.2382, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.04349077419973778, |
|
"grad_norm": 0.30160173773765564, |
|
"learning_rate": 1.2851575637272262e-05, |
|
"loss": 2.0997, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.04604905503501647, |
|
"grad_norm": 0.418215274810791, |
|
"learning_rate": 5.968060988383883e-06, |
|
"loss": 2.0202, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.048607335870295164, |
|
"grad_norm": 0.37242060899734497, |
|
"learning_rate": 1.6494925127617634e-06, |
|
"loss": 2.2179, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.05116561670557385, |
|
"grad_norm": 0.3666183352470398, |
|
"learning_rate": 1.3669500753099585e-08, |
|
"loss": 2.0904, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05116561670557385, |
|
"step": 200, |
|
"total_flos": 1.3791695468878234e+17, |
|
"train_loss": 2.1512529373168947, |
|
"train_runtime": 30852.7311, |
|
"train_samples_per_second": 0.052, |
|
"train_steps_per_second": 0.006 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.3791695468878234e+17, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|