|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.9872340425531916, |
|
"eval_steps": 100, |
|
"global_step": 58, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 971.7639999389648, |
|
"epoch": 0.03404255319148936, |
|
"grad_norm": 0.2631084113360146, |
|
"kl": 0.0, |
|
"learning_rate": 5e-07, |
|
"loss": 0.1238, |
|
"reward": 0.698086328804493, |
|
"reward_std": 0.13120541395619512, |
|
"rewards/accuracy_reward": 0.745535746216774, |
|
"rewards/format_reward": 0.0, |
|
"rewards/len_penalty": -0.04744941322132945, |
|
"step": 1 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 979.8407287597656, |
|
"epoch": 0.1702127659574468, |
|
"grad_norm": 0.22684407077545118, |
|
"kl": 7.544457912445068e-05, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.1155, |
|
"reward": 0.6869497802108526, |
|
"reward_std": 0.16120319138281047, |
|
"rewards/accuracy_reward": 0.734793558716774, |
|
"rewards/format_reward": 0.0, |
|
"rewards/len_penalty": -0.047843783744610846, |
|
"step": 5 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 872.467677307129, |
|
"epoch": 0.3404255319148936, |
|
"grad_norm": 0.4684158119972997, |
|
"kl": 0.00807795524597168, |
|
"learning_rate": 2.956412726139078e-06, |
|
"loss": 0.1146, |
|
"reward": 0.7163276463747025, |
|
"reward_std": 0.16980856116861104, |
|
"rewards/accuracy_reward": 0.7589286044239998, |
|
"rewards/format_reward": 0.0, |
|
"rewards/len_penalty": -0.04260096037760377, |
|
"step": 10 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 751.1273788452148, |
|
"epoch": 0.5106382978723404, |
|
"grad_norm": 1.2177198858526401, |
|
"kl": 0.34739990234375, |
|
"learning_rate": 2.7836719084521715e-06, |
|
"loss": 0.1264, |
|
"reward": 0.6330783508718014, |
|
"reward_std": 0.20811637695878743, |
|
"rewards/accuracy_reward": 0.6697544969618321, |
|
"rewards/format_reward": 0.0, |
|
"rewards/len_penalty": -0.03667614138685167, |
|
"step": 15 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 613.7534912109375, |
|
"epoch": 0.6808510638297872, |
|
"grad_norm": 1.3980210820254566, |
|
"kl": 0.744873046875, |
|
"learning_rate": 2.4946839873611927e-06, |
|
"loss": 0.1578, |
|
"reward": 0.5512815967202187, |
|
"reward_std": 0.23243679329752923, |
|
"rewards/accuracy_reward": 0.581250024586916, |
|
"rewards/format_reward": 0.0, |
|
"rewards/len_penalty": -0.0299684323836118, |
|
"step": 20 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 582.3771484375, |
|
"epoch": 0.851063829787234, |
|
"grad_norm": 1.8890671871166043, |
|
"kl": 0.9321044921875, |
|
"learning_rate": 2.1156192081791355e-06, |
|
"loss": 0.1965, |
|
"reward": 0.56687613427639, |
|
"reward_std": 0.21850477196276188, |
|
"rewards/accuracy_reward": 0.5953125260770321, |
|
"rewards/format_reward": 0.0, |
|
"rewards/len_penalty": -0.028436384443193675, |
|
"step": 25 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 557.1671844482422, |
|
"epoch": 1.0340425531914894, |
|
"grad_norm": 6.969148820152461, |
|
"kl": 0.90537109375, |
|
"learning_rate": 1.6808050203829845e-06, |
|
"loss": 0.1776, |
|
"reward": 0.609768246114254, |
|
"reward_std": 0.20966911502182484, |
|
"rewards/accuracy_reward": 0.6368303805589676, |
|
"rewards/format_reward": 0.0, |
|
"rewards/len_penalty": -0.027062145154923202, |
|
"step": 30 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 529.0748001098633, |
|
"epoch": 1.2042553191489362, |
|
"grad_norm": 1.6903572527831305, |
|
"kl": 0.88388671875, |
|
"learning_rate": 1.2296174432791415e-06, |
|
"loss": 0.1976, |
|
"reward": 0.6035189718008042, |
|
"reward_std": 0.20989050157368183, |
|
"rewards/accuracy_reward": 0.6293527103960515, |
|
"rewards/format_reward": 0.0, |
|
"rewards/len_penalty": -0.025833730399608613, |
|
"step": 35 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 518.8400924682617, |
|
"epoch": 1.374468085106383, |
|
"grad_norm": 1.140228131246192, |
|
"kl": 1.342578125, |
|
"learning_rate": 8.029152419343472e-07, |
|
"loss": 0.2382, |
|
"reward": 0.5803579963743687, |
|
"reward_std": 0.23556350730359554, |
|
"rewards/accuracy_reward": 0.6056919865310192, |
|
"rewards/format_reward": 0.0, |
|
"rewards/len_penalty": -0.025333988945931196, |
|
"step": 40 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 512.1945533752441, |
|
"epoch": 1.5446808510638297, |
|
"grad_norm": 1.4935623924313746, |
|
"kl": 0.83896484375, |
|
"learning_rate": 4.3933982822017883e-07, |
|
"loss": 0.2043, |
|
"reward": 0.5334726713597775, |
|
"reward_std": 0.25458414256572726, |
|
"rewards/accuracy_reward": 0.5584821730852128, |
|
"rewards/format_reward": 0.0, |
|
"rewards/len_penalty": -0.025009499955922367, |
|
"step": 45 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 510.34209899902345, |
|
"epoch": 1.7148936170212767, |
|
"grad_norm": 2.5529208106611776, |
|
"kl": 1.512890625, |
|
"learning_rate": 1.718159615201853e-07, |
|
"loss": 0.2564, |
|
"reward": 0.5088979430496693, |
|
"reward_std": 0.26402820013463496, |
|
"rewards/accuracy_reward": 0.5338169865310192, |
|
"rewards/format_reward": 0.0, |
|
"rewards/len_penalty": -0.02491904767230153, |
|
"step": 50 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 509.2735771179199, |
|
"epoch": 1.8851063829787233, |
|
"grad_norm": 1.2580199216017853, |
|
"kl": 1.3421875, |
|
"learning_rate": 2.4570139579284723e-08, |
|
"loss": 0.2429, |
|
"reward": 0.5227894008159637, |
|
"reward_std": 0.24820317029953004, |
|
"rewards/accuracy_reward": 0.5476562783122063, |
|
"rewards/format_reward": 0.0, |
|
"rewards/len_penalty": -0.02486687391065061, |
|
"step": 55 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 513.0390764872233, |
|
"epoch": 1.9872340425531916, |
|
"kl": 1.3177083333333333, |
|
"reward": 0.5240900913874308, |
|
"reward_std": 0.256128067150712, |
|
"rewards/accuracy_reward": 0.54892115543286, |
|
"rewards/format_reward": 0.0, |
|
"rewards/len_penalty": -0.024831065131972235, |
|
"step": 58, |
|
"total_flos": 0.0, |
|
"train_loss": 0.1866766851523827, |
|
"train_runtime": 17842.1504, |
|
"train_samples_per_second": 0.841, |
|
"train_steps_per_second": 0.003 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 58, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 10, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|