|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9884169884169884, |
|
"eval_steps": 100, |
|
"global_step": 32, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"completion_length": 287.9553693771362, |
|
"epoch": 0.15444015444015444, |
|
"grad_norm": 712.992919921875, |
|
"kl": 3.120312976837158, |
|
"learning_rate": 1.9937122098932428e-05, |
|
"loss": 0.1248, |
|
"reward": 0.6392857432365417, |
|
"reward_std": 0.24748737085610628, |
|
"rewards/accuracy_reward": 0.11250000586733222, |
|
"rewards/format_reward": 0.5267857377417385, |
|
"step": 5 |
|
}, |
|
{ |
|
"completion_length": 76.98839626312255, |
|
"epoch": 0.3088803088803089, |
|
"grad_norm": 0.6054267287254333, |
|
"kl": 0.269256591796875, |
|
"learning_rate": 1.78183148246803e-05, |
|
"loss": 0.0108, |
|
"reward": 1.0053571820259095, |
|
"reward_std": 0.13131982944905757, |
|
"rewards/accuracy_reward": 0.058928574342280625, |
|
"rewards/format_reward": 0.946428595483303, |
|
"step": 10 |
|
}, |
|
{ |
|
"completion_length": 53.2321453332901, |
|
"epoch": 0.46332046332046334, |
|
"grad_norm": 0.6564269661903381, |
|
"kl": 0.46717529296875, |
|
"learning_rate": 1.3302790619551673e-05, |
|
"loss": 0.0187, |
|
"reward": 1.0392857573926448, |
|
"reward_std": 0.13637059200555085, |
|
"rewards/accuracy_reward": 0.09196429075673222, |
|
"rewards/format_reward": 0.9473214484751225, |
|
"step": 15 |
|
}, |
|
{ |
|
"completion_length": 94.31786141395568, |
|
"epoch": 0.6177606177606177, |
|
"grad_norm": 0.3075297772884369, |
|
"kl": 0.252783203125, |
|
"learning_rate": 7.774790660436857e-06, |
|
"loss": 0.0101, |
|
"reward": 1.0741071864962577, |
|
"reward_std": 0.13005714006721975, |
|
"rewards/accuracy_reward": 0.10446429094299674, |
|
"rewards/format_reward": 0.9696428716182709, |
|
"step": 20 |
|
}, |
|
{ |
|
"completion_length": 132.22054238319396, |
|
"epoch": 0.7722007722007722, |
|
"grad_norm": 0.4520126283168793, |
|
"kl": 0.225750732421875, |
|
"learning_rate": 2.9289321881345257e-06, |
|
"loss": 0.009, |
|
"reward": 1.0294643245637416, |
|
"reward_std": 0.17046323977410793, |
|
"rewards/accuracy_reward": 0.09375000428408384, |
|
"rewards/format_reward": 0.935714315623045, |
|
"step": 25 |
|
}, |
|
{ |
|
"completion_length": 143.1794707775116, |
|
"epoch": 0.9266409266409267, |
|
"grad_norm": 0.4325202405452728, |
|
"kl": 0.220745849609375, |
|
"learning_rate": 2.507208781817638e-07, |
|
"loss": 0.0088, |
|
"reward": 1.0116071835160256, |
|
"reward_std": 0.16036171615123748, |
|
"rewards/accuracy_reward": 0.08125000409781932, |
|
"rewards/format_reward": 0.9303571656346321, |
|
"step": 30 |
|
}, |
|
{ |
|
"completion_length": 134.81697022914886, |
|
"epoch": 0.9884169884169884, |
|
"kl": 0.248382568359375, |
|
"reward": 1.020089328289032, |
|
"reward_std": 0.12311234045773745, |
|
"rewards/accuracy_reward": 0.07589286053553224, |
|
"rewards/format_reward": 0.944196455180645, |
|
"step": 32, |
|
"total_flos": 0.0, |
|
"train_loss": 0.029145897831767797, |
|
"train_runtime": 1931.8778, |
|
"train_samples_per_second": 1.875, |
|
"train_steps_per_second": 0.017 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 32, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|