|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.9952, |
|
"eval_steps": 500, |
|
"global_step": 468, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.192, |
|
"grad_norm": 1.5544959976587758, |
|
"learning_rate": 4.94947663010567e-06, |
|
"loss": 1.2661, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.384, |
|
"grad_norm": 1.58877277838815, |
|
"learning_rate": 4.799948609147061e-06, |
|
"loss": 1.1198, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.576, |
|
"grad_norm": 1.4076735500904218, |
|
"learning_rate": 4.5574596647341414e-06, |
|
"loss": 1.1018, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.768, |
|
"grad_norm": 1.4296012725480793, |
|
"learning_rate": 4.231810883773999e-06, |
|
"loss": 1.1007, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 1.405680058916635, |
|
"learning_rate": 3.836164565319503e-06, |
|
"loss": 1.0833, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.152, |
|
"grad_norm": 1.3339483109482262, |
|
"learning_rate": 3.386512217606339e-06, |
|
"loss": 0.9834, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.3439999999999999, |
|
"grad_norm": 1.3288150935651342, |
|
"learning_rate": 2.9010282021444008e-06, |
|
"loss": 0.9506, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.536, |
|
"grad_norm": 1.356933287143514, |
|
"learning_rate": 2.399335149726463e-06, |
|
"loss": 0.9435, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.728, |
|
"grad_norm": 1.3070430897157126, |
|
"learning_rate": 1.9017108392811065e-06, |
|
"loss": 0.9443, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"grad_norm": 1.3825627014048851, |
|
"learning_rate": 1.4282685964923643e-06, |
|
"loss": 0.9447, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.112, |
|
"grad_norm": 1.4058361561032482, |
|
"learning_rate": 9.981443394050525e-07, |
|
"loss": 0.8923, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 2.304, |
|
"grad_norm": 1.3575000714838341, |
|
"learning_rate": 6.28723129572247e-07, |
|
"loss": 0.8429, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 2.496, |
|
"grad_norm": 1.3763689831257202, |
|
"learning_rate": 3.3493649053890325e-07, |
|
"loss": 0.8478, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 2.6879999999999997, |
|
"grad_norm": 1.4156917358264154, |
|
"learning_rate": 1.286588951321363e-07, |
|
"loss": 0.8463, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"grad_norm": 1.3846493525562265, |
|
"learning_rate": 1.822781475486507e-08, |
|
"loss": 0.8508, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.9952, |
|
"step": 468, |
|
"total_flos": 80495178678272.0, |
|
"train_loss": 0.9764440487592648, |
|
"train_runtime": 2345.6045, |
|
"train_samples_per_second": 25.58, |
|
"train_steps_per_second": 0.2 |
|
} |
|
], |
|
"logging_steps": 30, |
|
"max_steps": 468, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 80495178678272.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|