|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9893390191897654, |
|
"eval_steps": 100, |
|
"global_step": 58, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 607.8761520385742, |
|
"epoch": 0.017057569296375266, |
|
"grad_norm": 0.21163204312324524, |
|
"kl": 0.0, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0347, |
|
"reward": 0.7064732536673546, |
|
"reward_std": 0.2907280754297972, |
|
"rewards/accuracy_reward": 0.6964286044239998, |
|
"rewards/format_reward": 0.010044643422588706, |
|
"step": 1 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 598.3354082107544, |
|
"epoch": 0.08528784648187633, |
|
"grad_norm": 0.22014343738555908, |
|
"kl": 0.00016763806343078613, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.0443, |
|
"reward": 0.6668527107685804, |
|
"reward_std": 0.30428835609927773, |
|
"rewards/accuracy_reward": 0.6609933376312256, |
|
"rewards/format_reward": 0.005859375291038305, |
|
"step": 5 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 602.3207862854003, |
|
"epoch": 0.17057569296375266, |
|
"grad_norm": 0.1867409348487854, |
|
"kl": 0.006086993217468262, |
|
"learning_rate": 2.956412726139078e-06, |
|
"loss": 0.0561, |
|
"reward": 0.6991071760654449, |
|
"reward_std": 0.28411166220903394, |
|
"rewards/accuracy_reward": 0.6957589611411095, |
|
"rewards/format_reward": 0.0033482144586741923, |
|
"step": 10 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 591.975025177002, |
|
"epoch": 0.255863539445629, |
|
"grad_norm": 0.272446870803833, |
|
"kl": 0.006195259094238281, |
|
"learning_rate": 2.7836719084521715e-06, |
|
"loss": 0.0687, |
|
"reward": 0.7625000327825546, |
|
"reward_std": 0.21445324290543794, |
|
"rewards/accuracy_reward": 0.7611607477068901, |
|
"rewards/format_reward": 0.0013392857741564511, |
|
"step": 15 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 580.5134178161621, |
|
"epoch": 0.3411513859275053, |
|
"grad_norm": 0.4638945162296295, |
|
"kl": 0.0028181076049804688, |
|
"learning_rate": 2.4946839873611927e-06, |
|
"loss": 0.0489, |
|
"reward": 0.7725446745753288, |
|
"reward_std": 0.1972122782841325, |
|
"rewards/accuracy_reward": 0.7714286059141159, |
|
"rewards/format_reward": 0.0011160714784637094, |
|
"step": 20 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 586.8551612854004, |
|
"epoch": 0.42643923240938164, |
|
"grad_norm": 0.13729101419448853, |
|
"kl": 0.0033502578735351562, |
|
"learning_rate": 2.1156192081791355e-06, |
|
"loss": 0.0425, |
|
"reward": 0.7703125372529029, |
|
"reward_std": 0.19164941012859343, |
|
"rewards/accuracy_reward": 0.769866107404232, |
|
"rewards/format_reward": 0.00044642859138548373, |
|
"step": 25 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 589.1917663574219, |
|
"epoch": 0.511727078891258, |
|
"grad_norm": 0.10644034296274185, |
|
"kl": 0.004234695434570312, |
|
"learning_rate": 1.6808050203829845e-06, |
|
"loss": 0.034, |
|
"reward": 0.7531250387430191, |
|
"reward_std": 0.18948373831808568, |
|
"rewards/accuracy_reward": 0.7511161044239998, |
|
"rewards/format_reward": 0.002008928661234677, |
|
"step": 30 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 578.0815017700195, |
|
"epoch": 0.5970149253731343, |
|
"grad_norm": 0.09403481334447861, |
|
"kl": 0.004046249389648438, |
|
"learning_rate": 1.2296174432791415e-06, |
|
"loss": 0.0411, |
|
"reward": 0.7562500327825546, |
|
"reward_std": 0.18806953858584166, |
|
"rewards/accuracy_reward": 0.7495536088943482, |
|
"rewards/format_reward": 0.006696428894065321, |
|
"step": 35 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 564.9935516357422, |
|
"epoch": 0.6823027718550106, |
|
"grad_norm": 0.22079423069953918, |
|
"kl": 0.00465545654296875, |
|
"learning_rate": 8.029152419343472e-07, |
|
"loss": 0.0326, |
|
"reward": 0.7790178969502449, |
|
"reward_std": 0.18834841772913932, |
|
"rewards/accuracy_reward": 0.7680803954601287, |
|
"rewards/format_reward": 0.010937500628642738, |
|
"step": 40 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 582.787744140625, |
|
"epoch": 0.767590618336887, |
|
"grad_norm": 0.16064484417438507, |
|
"kl": 0.005054092407226563, |
|
"learning_rate": 4.3933982822017883e-07, |
|
"loss": 0.0423, |
|
"reward": 0.7665178939700127, |
|
"reward_std": 0.22603920232504607, |
|
"rewards/accuracy_reward": 0.7470982491970062, |
|
"rewards/format_reward": 0.019419643795117736, |
|
"step": 45 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 580.5013664245605, |
|
"epoch": 0.8528784648187633, |
|
"grad_norm": 0.18539777398109436, |
|
"kl": 0.00699310302734375, |
|
"learning_rate": 1.718159615201853e-07, |
|
"loss": 0.0301, |
|
"reward": 0.7915178954601287, |
|
"reward_std": 0.2188779940828681, |
|
"rewards/accuracy_reward": 0.755357176065445, |
|
"rewards/format_reward": 0.036160716065205635, |
|
"step": 50 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 580.8031463623047, |
|
"epoch": 0.9381663113006397, |
|
"grad_norm": 0.2504195272922516, |
|
"kl": 0.009212875366210937, |
|
"learning_rate": 2.4570139579284723e-08, |
|
"loss": 0.038, |
|
"reward": 0.8113839596509933, |
|
"reward_std": 0.2401964722201228, |
|
"rewards/accuracy_reward": 0.7790178954601288, |
|
"rewards/format_reward": 0.03236607303842902, |
|
"step": 55 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 567.5023994445801, |
|
"epoch": 0.9893390191897654, |
|
"kl": 0.011606852213541666, |
|
"reward": 0.8020833730697632, |
|
"reward_std": 0.23279494047164917, |
|
"rewards/accuracy_reward": 0.7678571765621504, |
|
"rewards/format_reward": 0.03422619208383063, |
|
"step": 58, |
|
"total_flos": 0.0, |
|
"train_loss": 0.04428564981910689, |
|
"train_runtime": 7921.619, |
|
"train_samples_per_second": 0.947, |
|
"train_steps_per_second": 0.007 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 58, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|