|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.9872340425531916, |
|
"eval_steps": 100, |
|
"global_step": 58, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 971.7639999389648, |
|
"epoch": 0.03404255319148936, |
|
"grad_norm": 0.28463842001337397, |
|
"kl": 0.0, |
|
"learning_rate": 5e-07, |
|
"loss": 0.134, |
|
"reward": 0.5082886815071106, |
|
"reward_std": 0.15963527746498585, |
|
"rewards/accuracy_reward": 0.745535746216774, |
|
"rewards/format_reward": 0.0, |
|
"rewards/len_penalty": -0.23724707029759884, |
|
"step": 1 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 974.6809825897217, |
|
"epoch": 0.1702127659574468, |
|
"grad_norm": 0.23728338609141092, |
|
"kl": 7.671117782592773e-05, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.132, |
|
"reward": 0.4996245112270117, |
|
"reward_std": 0.1835802630521357, |
|
"rewards/accuracy_reward": 0.737583739683032, |
|
"rewards/format_reward": 0.0, |
|
"rewards/len_penalty": -0.23795922426506877, |
|
"step": 5 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 867.940104675293, |
|
"epoch": 0.3404255319148936, |
|
"grad_norm": 0.6283062851573521, |
|
"kl": 0.008861899375915527, |
|
"learning_rate": 2.956412726139078e-06, |
|
"loss": 0.1317, |
|
"reward": 0.5490380935370922, |
|
"reward_std": 0.1904804239049554, |
|
"rewards/accuracy_reward": 0.7609375298023224, |
|
"rewards/format_reward": 0.0, |
|
"rewards/len_penalty": -0.21189943961799146, |
|
"step": 10 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 730.2546096801758, |
|
"epoch": 0.5106382978723404, |
|
"grad_norm": 1.310897088360317, |
|
"kl": 0.26617431640625, |
|
"learning_rate": 2.7836719084521715e-06, |
|
"loss": 0.1354, |
|
"reward": 0.47696074694395063, |
|
"reward_std": 0.23283899687230586, |
|
"rewards/accuracy_reward": 0.6552455656230449, |
|
"rewards/format_reward": 0.0, |
|
"rewards/len_penalty": -0.17828481681644917, |
|
"step": 15 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 575.1072868347168, |
|
"epoch": 0.6808510638297872, |
|
"grad_norm": 2.0875900059869346, |
|
"kl": 0.91943359375, |
|
"learning_rate": 2.4946839873611927e-06, |
|
"loss": 0.178, |
|
"reward": 0.44084297828376295, |
|
"reward_std": 0.2517234329134226, |
|
"rewards/accuracy_reward": 0.5812500230967999, |
|
"rewards/format_reward": 0.0, |
|
"rewards/len_penalty": -0.14040705244988202, |
|
"step": 20 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 543.0458961486817, |
|
"epoch": 0.851063829787234, |
|
"grad_norm": 8.217171705965015, |
|
"kl": 1.1748046875, |
|
"learning_rate": 2.1156192081791355e-06, |
|
"loss": 0.2204, |
|
"reward": 0.4586034990847111, |
|
"reward_std": 0.23402524627745153, |
|
"rewards/accuracy_reward": 0.5911830633878707, |
|
"rewards/format_reward": 0.0, |
|
"rewards/len_penalty": -0.13257956448942423, |
|
"step": 25 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 521.1100387573242, |
|
"epoch": 1.0340425531914894, |
|
"grad_norm": 6.762082627347706, |
|
"kl": 0.7322998046875, |
|
"learning_rate": 1.6808050203829845e-06, |
|
"loss": 0.1845, |
|
"reward": 0.5005072712898254, |
|
"reward_std": 0.23026448152959347, |
|
"rewards/accuracy_reward": 0.627120566368103, |
|
"rewards/format_reward": 0.0, |
|
"rewards/len_penalty": -0.1266132965683937, |
|
"step": 30 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 494.15995559692385, |
|
"epoch": 1.2042553191489362, |
|
"grad_norm": 3.1612480284068982, |
|
"kl": 1.55126953125, |
|
"learning_rate": 1.2296174432791415e-06, |
|
"loss": 0.2725, |
|
"reward": 0.48895372450351715, |
|
"reward_std": 0.24462027363479139, |
|
"rewards/accuracy_reward": 0.6095982402563095, |
|
"rewards/format_reward": 0.0, |
|
"rewards/len_penalty": -0.12064452040940524, |
|
"step": 35 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 483.87390594482423, |
|
"epoch": 1.374468085106383, |
|
"grad_norm": 4.294437421320427, |
|
"kl": 1.28154296875, |
|
"learning_rate": 8.029152419343472e-07, |
|
"loss": 0.2563, |
|
"reward": 0.47126407250761987, |
|
"reward_std": 0.2647111125290394, |
|
"rewards/accuracy_reward": 0.5893973462283612, |
|
"rewards/format_reward": 0.0, |
|
"rewards/len_penalty": -0.1181332778185606, |
|
"step": 40 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 481.56776733398436, |
|
"epoch": 1.5446808510638297, |
|
"grad_norm": 4.491684991262943, |
|
"kl": 1.42666015625, |
|
"learning_rate": 4.3933982822017883e-07, |
|
"loss": 0.2752, |
|
"reward": 0.4363360181450844, |
|
"reward_std": 0.2855977095663548, |
|
"rewards/accuracy_reward": 0.5539062716066837, |
|
"rewards/format_reward": 0.0, |
|
"rewards/len_penalty": -0.11757025569677353, |
|
"step": 45 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 484.0088371276855, |
|
"epoch": 1.7148936170212767, |
|
"grad_norm": 2.1484729806652747, |
|
"kl": 1.5796875, |
|
"learning_rate": 1.718159615201853e-07, |
|
"loss": 0.2947, |
|
"reward": 0.4461195237934589, |
|
"reward_std": 0.28383450768887997, |
|
"rewards/accuracy_reward": 0.564285734295845, |
|
"rewards/format_reward": 0.0, |
|
"rewards/len_penalty": -0.11816622000187635, |
|
"step": 50 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 489.8710029602051, |
|
"epoch": 1.8851063829787233, |
|
"grad_norm": 1.8313195035052112, |
|
"kl": 1.49287109375, |
|
"learning_rate": 2.4570139579284723e-08, |
|
"loss": 0.2968, |
|
"reward": 0.44390707574784755, |
|
"reward_std": 0.281308988109231, |
|
"rewards/accuracy_reward": 0.5635044880211353, |
|
"rewards/format_reward": 0.0, |
|
"rewards/len_penalty": -0.11959741283208132, |
|
"step": 55 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 490.9578692118327, |
|
"epoch": 1.9872340425531916, |
|
"kl": 1.4850260416666667, |
|
"reward": 0.4616190791130066, |
|
"reward_std": 0.27286908278862637, |
|
"rewards/accuracy_reward": 0.5811012176175913, |
|
"rewards/format_reward": 0.0, |
|
"rewards/len_penalty": -0.1194821372628212, |
|
"step": 58, |
|
"total_flos": 0.0, |
|
"train_loss": 0.2185068104801507, |
|
"train_runtime": 17682.4744, |
|
"train_samples_per_second": 0.848, |
|
"train_steps_per_second": 0.003 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 58, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 10, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|