|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9893390191897654, |
|
"eval_steps": 100, |
|
"global_step": 58, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"completion_length": 606.2089546203613, |
|
"epoch": 0.08528784648187633, |
|
"grad_norm": 0.29036083817481995, |
|
"kl": 0.00022020339965820313, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.0464, |
|
"reward": 0.6397321701049805, |
|
"reward_std": 0.3235082272440195, |
|
"rewards/accuracy_reward": 0.6390625275671482, |
|
"rewards/format_reward": 0.0006696428870782256, |
|
"step": 5 |
|
}, |
|
{ |
|
"completion_length": 612.3254737854004, |
|
"epoch": 0.17057569296375266, |
|
"grad_norm": 2.9889135360717773, |
|
"kl": 0.7559915542602539, |
|
"learning_rate": 2.956412726139078e-06, |
|
"loss": 0.111, |
|
"reward": 0.696651816368103, |
|
"reward_std": 0.2815938711166382, |
|
"rewards/accuracy_reward": 0.6962053865194321, |
|
"rewards/format_reward": 0.00044642859138548373, |
|
"step": 10 |
|
}, |
|
{ |
|
"completion_length": 588.2087371826171, |
|
"epoch": 0.255863539445629, |
|
"grad_norm": 58.945621490478516, |
|
"kl": 0.013921737670898438, |
|
"learning_rate": 2.7836719084521715e-06, |
|
"loss": 0.0668, |
|
"reward": 0.7671875312924386, |
|
"reward_std": 0.21175616141408682, |
|
"rewards/accuracy_reward": 0.7671875312924386, |
|
"rewards/format_reward": 0.0, |
|
"step": 15 |
|
}, |
|
{ |
|
"completion_length": 577.985066986084, |
|
"epoch": 0.3411513859275053, |
|
"grad_norm": 8.070592880249023, |
|
"kl": 23.920117950439455, |
|
"learning_rate": 2.4946839873611927e-06, |
|
"loss": 2.1306, |
|
"reward": 0.7787946745753288, |
|
"reward_std": 0.18835734333842993, |
|
"rewards/accuracy_reward": 0.7787946745753288, |
|
"rewards/format_reward": 0.0, |
|
"step": 20 |
|
}, |
|
{ |
|
"completion_length": 589.9038261413574, |
|
"epoch": 0.42643923240938164, |
|
"grad_norm": 12.532156944274902, |
|
"kl": 0.11749801635742188, |
|
"learning_rate": 2.1156192081791355e-06, |
|
"loss": 0.0537, |
|
"reward": 0.7645089641213417, |
|
"reward_std": 0.1823441507294774, |
|
"rewards/accuracy_reward": 0.7645089641213417, |
|
"rewards/format_reward": 0.0, |
|
"step": 25 |
|
}, |
|
{ |
|
"completion_length": 597.8422134399414, |
|
"epoch": 0.511727078891258, |
|
"grad_norm": 338.9886169433594, |
|
"kl": 0.22518844604492189, |
|
"learning_rate": 1.6808050203829845e-06, |
|
"loss": 0.0479, |
|
"reward": 0.7513393253087998, |
|
"reward_std": 0.17386628594249487, |
|
"rewards/accuracy_reward": 0.7513393253087998, |
|
"rewards/format_reward": 0.0, |
|
"step": 30 |
|
}, |
|
{ |
|
"completion_length": 591.2631973266601, |
|
"epoch": 0.5970149253731343, |
|
"grad_norm": 10.277711868286133, |
|
"kl": 0.11962966918945313, |
|
"learning_rate": 1.2296174432791415e-06, |
|
"loss": 0.0388, |
|
"reward": 0.744419677555561, |
|
"reward_std": 0.16556358551606537, |
|
"rewards/accuracy_reward": 0.744419677555561, |
|
"rewards/format_reward": 0.0, |
|
"step": 35 |
|
}, |
|
{ |
|
"completion_length": 582.1995803833008, |
|
"epoch": 0.6823027718550106, |
|
"grad_norm": 2.5935020446777344, |
|
"kl": 0.018338775634765624, |
|
"learning_rate": 8.029152419343472e-07, |
|
"loss": 0.0428, |
|
"reward": 0.7609375298023224, |
|
"reward_std": 0.181213954789564, |
|
"rewards/accuracy_reward": 0.7609375298023224, |
|
"rewards/format_reward": 0.0, |
|
"step": 40 |
|
}, |
|
{ |
|
"completion_length": 602.1290435791016, |
|
"epoch": 0.767590618336887, |
|
"grad_norm": 14.890217781066895, |
|
"kl": 5.168456268310547, |
|
"learning_rate": 4.3933982822017883e-07, |
|
"loss": 0.3779, |
|
"reward": 0.7399553924798965, |
|
"reward_std": 0.19358455892652274, |
|
"rewards/accuracy_reward": 0.7399553924798965, |
|
"rewards/format_reward": 0.0, |
|
"step": 45 |
|
}, |
|
{ |
|
"completion_length": 598.5703392028809, |
|
"epoch": 0.8528784648187633, |
|
"grad_norm": 5.467597007751465, |
|
"kl": 0.12263717651367187, |
|
"learning_rate": 1.718159615201853e-07, |
|
"loss": 0.0443, |
|
"reward": 0.7386161044239998, |
|
"reward_std": 0.19092278694733977, |
|
"rewards/accuracy_reward": 0.7386161044239998, |
|
"rewards/format_reward": 0.0, |
|
"step": 50 |
|
}, |
|
{ |
|
"completion_length": 591.5207893371582, |
|
"epoch": 0.9381663113006397, |
|
"grad_norm": 74.10267639160156, |
|
"kl": 0.07649765014648438, |
|
"learning_rate": 2.4570139579284723e-08, |
|
"loss": 0.0465, |
|
"reward": 0.7707589626312256, |
|
"reward_std": 0.1925399899482727, |
|
"rewards/accuracy_reward": 0.7707589626312256, |
|
"rewards/format_reward": 0.0, |
|
"step": 55 |
|
}, |
|
{ |
|
"completion_length": 587.6321029663086, |
|
"epoch": 0.9893390191897654, |
|
"kl": 0.052463531494140625, |
|
"reward": 0.7678571715950966, |
|
"reward_std": 0.17418795668830475, |
|
"rewards/accuracy_reward": 0.7678571715950966, |
|
"rewards/format_reward": 0.0, |
|
"step": 58, |
|
"total_flos": 0.0, |
|
"train_loss": 0.25991881401117506, |
|
"train_runtime": 12262.8912, |
|
"train_samples_per_second": 0.612, |
|
"train_steps_per_second": 0.005 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 58, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|