|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.9211087420042645, |
|
"eval_steps": 100, |
|
"global_step": 170, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"completion_length": 613.281721496582, |
|
"epoch": 0.08528784648187633, |
|
"grad_norm": 4.2232208251953125, |
|
"kl": 0.00017681121826171876, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.0, |
|
"reward": 0.6537946701049805, |
|
"reward_std": 0.3218739811331034, |
|
"rewards/accuracy_reward": 0.6537946701049805, |
|
"rewards/format_reward2": 0.0, |
|
"step": 5 |
|
}, |
|
{ |
|
"completion_length": 618.741323852539, |
|
"epoch": 0.17057569296375266, |
|
"grad_norm": 0.7694841027259827, |
|
"kl": 0.23766450881958007, |
|
"learning_rate": 2.956412726139078e-06, |
|
"loss": 0.0095, |
|
"reward": 0.7125000342726707, |
|
"reward_std": 0.26975345350801944, |
|
"rewards/accuracy_reward": 0.7125000342726707, |
|
"rewards/format_reward2": 0.0, |
|
"step": 10 |
|
}, |
|
{ |
|
"completion_length": 625.0417678833007, |
|
"epoch": 0.255863539445629, |
|
"grad_norm": 0.11407410353422165, |
|
"kl": 0.00294036865234375, |
|
"learning_rate": 2.7836719084521715e-06, |
|
"loss": 0.0001, |
|
"reward": 0.7500000342726707, |
|
"reward_std": 0.2328610809519887, |
|
"rewards/accuracy_reward": 0.7500000342726707, |
|
"rewards/format_reward2": 0.0, |
|
"step": 15 |
|
}, |
|
{ |
|
"completion_length": 604.2779296875, |
|
"epoch": 0.3411513859275053, |
|
"grad_norm": 0.10821383446455002, |
|
"kl": 0.0032527923583984377, |
|
"learning_rate": 2.4946839873611927e-06, |
|
"loss": 0.0001, |
|
"reward": 0.7553571820259094, |
|
"reward_std": 0.21391915678977966, |
|
"rewards/accuracy_reward": 0.7553571820259094, |
|
"rewards/format_reward2": 0.0, |
|
"step": 20 |
|
}, |
|
{ |
|
"completion_length": 611.8196701049804, |
|
"epoch": 0.42643923240938164, |
|
"grad_norm": 0.1587938815355301, |
|
"kl": 0.003980827331542969, |
|
"learning_rate": 2.1156192081791355e-06, |
|
"loss": 0.0002, |
|
"reward": 0.755357176065445, |
|
"reward_std": 0.1927801643498242, |
|
"rewards/accuracy_reward": 0.755357176065445, |
|
"rewards/format_reward2": 0.0, |
|
"step": 25 |
|
}, |
|
{ |
|
"completion_length": 614.662525177002, |
|
"epoch": 0.511727078891258, |
|
"grad_norm": 0.1258433312177658, |
|
"kl": 0.0038448333740234374, |
|
"learning_rate": 1.6808050203829845e-06, |
|
"loss": 0.0002, |
|
"reward": 0.7575893223285675, |
|
"reward_std": 0.18457154743373394, |
|
"rewards/accuracy_reward": 0.7575893223285675, |
|
"rewards/format_reward2": 0.0, |
|
"step": 30 |
|
}, |
|
{ |
|
"completion_length": 599.8576171875, |
|
"epoch": 0.5970149253731343, |
|
"grad_norm": 0.088959701359272, |
|
"kl": 0.004636383056640625, |
|
"learning_rate": 1.2296174432791415e-06, |
|
"loss": 0.0002, |
|
"reward": 0.7524553865194321, |
|
"reward_std": 0.1772271953523159, |
|
"rewards/accuracy_reward": 0.7524553865194321, |
|
"rewards/format_reward2": 0.0, |
|
"step": 35 |
|
}, |
|
{ |
|
"completion_length": 587.4435562133789, |
|
"epoch": 0.6823027718550106, |
|
"grad_norm": 0.08021266013383865, |
|
"kl": 0.004290008544921875, |
|
"learning_rate": 8.029152419343472e-07, |
|
"loss": 0.0002, |
|
"reward": 0.7700893208384514, |
|
"reward_std": 0.1737084028776735, |
|
"rewards/accuracy_reward": 0.7700893208384514, |
|
"rewards/format_reward2": 0.0, |
|
"step": 40 |
|
}, |
|
{ |
|
"completion_length": 605.4605155944824, |
|
"epoch": 0.767590618336887, |
|
"grad_norm": 0.0632205456495285, |
|
"kl": 0.003778839111328125, |
|
"learning_rate": 4.3933982822017883e-07, |
|
"loss": 0.0002, |
|
"reward": 0.7537946745753288, |
|
"reward_std": 0.1909334436058998, |
|
"rewards/accuracy_reward": 0.7537946745753288, |
|
"rewards/format_reward2": 0.0, |
|
"step": 45 |
|
}, |
|
{ |
|
"completion_length": 606.9638671875, |
|
"epoch": 0.8528784648187633, |
|
"grad_norm": 0.08492382615804672, |
|
"kl": 0.003963851928710937, |
|
"learning_rate": 1.718159615201853e-07, |
|
"loss": 0.0002, |
|
"reward": 0.7578125342726707, |
|
"reward_std": 0.1673258093651384, |
|
"rewards/accuracy_reward": 0.7578125342726707, |
|
"rewards/format_reward2": 0.0, |
|
"step": 50 |
|
}, |
|
{ |
|
"completion_length": 599.2339523315429, |
|
"epoch": 0.9381663113006397, |
|
"grad_norm": 0.07253226637840271, |
|
"kl": 0.011330032348632812, |
|
"learning_rate": 2.4570139579284723e-08, |
|
"loss": 0.0005, |
|
"reward": 0.7895089671015739, |
|
"reward_std": 0.17561167925596238, |
|
"rewards/accuracy_reward": 0.7895089671015739, |
|
"rewards/format_reward2": 0.0, |
|
"step": 55 |
|
}, |
|
{ |
|
"completion_length": 578.6864166259766, |
|
"epoch": 1.0341151385927505, |
|
"grad_norm": 0.12633396685123444, |
|
"kl": 0.003711700439453125, |
|
"learning_rate": 5.358185854701909e-07, |
|
"loss": 0.0001, |
|
"reward": 0.7918527200818062, |
|
"reward_std": 0.17118105152621865, |
|
"rewards/accuracy_reward": 0.7918527200818062, |
|
"rewards/format_reward4": 0.0, |
|
"step": 60 |
|
}, |
|
{ |
|
"completion_length": 579.6618591308594, |
|
"epoch": 1.1194029850746268, |
|
"grad_norm": 0.0748809352517128, |
|
"kl": 0.00350341796875, |
|
"learning_rate": 3.0996998956314745e-07, |
|
"loss": 0.0001, |
|
"reward": 0.7732143223285675, |
|
"reward_std": 0.1733042700216174, |
|
"rewards/accuracy_reward": 0.7732143223285675, |
|
"rewards/format_reward4": 0.0, |
|
"step": 65 |
|
}, |
|
{ |
|
"completion_length": 564.5884208679199, |
|
"epoch": 1.2046908315565032, |
|
"grad_norm": 0.17819823324680328, |
|
"kl": 0.003855133056640625, |
|
"learning_rate": 1.405383194450251e-07, |
|
"loss": 0.0002, |
|
"reward": 0.8015625357627869, |
|
"reward_std": 0.16864687129855155, |
|
"rewards/accuracy_reward": 0.8015625357627869, |
|
"rewards/format_reward4": 0.0, |
|
"step": 70 |
|
}, |
|
{ |
|
"completion_length": 561.2647575378418, |
|
"epoch": 1.2899786780383795, |
|
"grad_norm": 0.08583667129278183, |
|
"kl": 0.0035480499267578126, |
|
"learning_rate": 3.5555989320099955e-08, |
|
"loss": 0.0001, |
|
"reward": 0.7986607506871224, |
|
"reward_std": 0.15244121365249158, |
|
"rewards/accuracy_reward": 0.7986607506871224, |
|
"rewards/format_reward4": 0.0, |
|
"step": 75 |
|
}, |
|
{ |
|
"completion_length": 572.7236892700196, |
|
"epoch": 1.375266524520256, |
|
"grad_norm": 0.13771697878837585, |
|
"kl": 0.0034299850463867187, |
|
"learning_rate": 0.0, |
|
"loss": 0.0001, |
|
"reward": 0.8002232491970063, |
|
"reward_std": 0.15689483480527996, |
|
"rewards/accuracy_reward": 0.8002232491970063, |
|
"rewards/format_reward4": 0.0, |
|
"step": 80 |
|
}, |
|
{ |
|
"completion_length": 578.9810539245606, |
|
"epoch": 1.4605543710021323, |
|
"grad_norm": 2.014723777770996, |
|
"kl": 0.010892486572265625, |
|
"learning_rate": 2.0096189432334195e-07, |
|
"loss": 0.0004, |
|
"reward": 0.7611607536673546, |
|
"reward_std": 0.18238836526870728, |
|
"rewards/accuracy_reward": 0.7611607536673546, |
|
"rewards/format_reward4": 0.0, |
|
"step": 85 |
|
}, |
|
{ |
|
"completion_length": 589.071681213379, |
|
"epoch": 1.5458422174840085, |
|
"grad_norm": 0.07733023166656494, |
|
"kl": 0.00322265625, |
|
"learning_rate": 9.046106882113752e-08, |
|
"loss": 0.0001, |
|
"reward": 0.7718750327825546, |
|
"reward_std": 0.16937124980613588, |
|
"rewards/accuracy_reward": 0.7718750327825546, |
|
"rewards/format_reward4": 0.0, |
|
"step": 90 |
|
}, |
|
{ |
|
"completion_length": 584.4857421875, |
|
"epoch": 1.6311300639658848, |
|
"grad_norm": 0.08772465586662292, |
|
"kl": 0.003212738037109375, |
|
"learning_rate": 2.278837048168797e-08, |
|
"loss": 0.0001, |
|
"reward": 0.765625037252903, |
|
"reward_std": 0.17046672012656927, |
|
"rewards/accuracy_reward": 0.765625037252903, |
|
"rewards/format_reward4": 0.0, |
|
"step": 95 |
|
}, |
|
{ |
|
"completion_length": 584.8821662902832, |
|
"epoch": 1.716417910447761, |
|
"grad_norm": 0.09350935369729996, |
|
"kl": 0.0032810211181640626, |
|
"learning_rate": 0.0, |
|
"loss": 0.0001, |
|
"reward": 0.7743303939700127, |
|
"reward_std": 0.17159467502497136, |
|
"rewards/accuracy_reward": 0.7743303939700127, |
|
"rewards/format_reward4": 0.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.716417910447761, |
|
"eval_completion_length": 572.5716213433507, |
|
"eval_kl": 0.004271193434255192, |
|
"eval_loss": 0.00017078800010494888, |
|
"eval_reward": 0.6897820954124767, |
|
"eval_reward_std": 0.20645368708589207, |
|
"eval_rewards/accuracy_reward": 0.6897820954124767, |
|
"eval_rewards/format_reward4": 0.0, |
|
"eval_runtime": 10542.252, |
|
"eval_samples_per_second": 0.474, |
|
"eval_steps_per_second": 0.004, |
|
"step": 100 |
|
}, |
|
{ |
|
"completion_length": 593.587084197998, |
|
"epoch": 1.8017057569296375, |
|
"grad_norm": 0.07390806823968887, |
|
"kl": 0.00316009521484375, |
|
"learning_rate": 3.2546120637356677e-07, |
|
"loss": 0.0001, |
|
"reward": 0.7497768238186836, |
|
"reward_std": 0.18519118977710605, |
|
"rewards/accuracy_reward": 0.7497768238186836, |
|
"rewards/format_reward4": 0.0, |
|
"step": 105 |
|
}, |
|
{ |
|
"completion_length": 589.4265899658203, |
|
"epoch": 1.886993603411514, |
|
"grad_norm": 0.07380172610282898, |
|
"kl": 0.0030284881591796874, |
|
"learning_rate": 2.1114787115667477e-07, |
|
"loss": 0.0001, |
|
"reward": 0.7691964611411095, |
|
"reward_std": 0.1665174851194024, |
|
"rewards/accuracy_reward": 0.7691964611411095, |
|
"rewards/format_reward4": 0.0, |
|
"step": 110 |
|
}, |
|
{ |
|
"completion_length": 581.2207862854004, |
|
"epoch": 1.9722814498933903, |
|
"grad_norm": 0.06949003785848618, |
|
"kl": 0.003309822082519531, |
|
"learning_rate": 1.2003083451176365e-07, |
|
"loss": 0.0001, |
|
"reward": 0.7843750312924385, |
|
"reward_std": 0.17008549151942134, |
|
"rewards/accuracy_reward": 0.7843750312924385, |
|
"rewards/format_reward4": 0.0, |
|
"step": 115 |
|
}, |
|
{ |
|
"completion_length": 576.8970560709636, |
|
"epoch": 2.068230277185501, |
|
"grad_norm": 0.0717669427394867, |
|
"kl": 0.0029771592881944445, |
|
"learning_rate": 5.374998819965654e-08, |
|
"loss": 0.0001, |
|
"reward": 0.7791667024294535, |
|
"reward_std": 0.1727727702094449, |
|
"rewards/accuracy_reward": 0.7791667024294535, |
|
"rewards/format_reward4": 0.0, |
|
"step": 120 |
|
}, |
|
{ |
|
"completion_length": 552.3944480895996, |
|
"epoch": 2.1535181236673773, |
|
"grad_norm": 3.9083805084228516, |
|
"kl": 0.00341339111328125, |
|
"learning_rate": 1.3498231131137295e-08, |
|
"loss": 0.0001, |
|
"reward": 0.7910714641213417, |
|
"reward_std": 0.15105125531554223, |
|
"rewards/accuracy_reward": 0.7910714641213417, |
|
"rewards/format_reward4": 0.0, |
|
"step": 125 |
|
}, |
|
{ |
|
"completion_length": 586.4814987182617, |
|
"epoch": 2.2388059701492535, |
|
"grad_norm": 0.08841477334499359, |
|
"kl": 0.0033771514892578123, |
|
"learning_rate": 0.0, |
|
"loss": 0.0001, |
|
"reward": 0.7645089626312256, |
|
"reward_std": 0.16334964451380074, |
|
"rewards/accuracy_reward": 0.7645089626312256, |
|
"rewards/format_reward4": 0.0, |
|
"step": 130 |
|
}, |
|
{ |
|
"completion_length": 562.0841804504395, |
|
"epoch": 2.3240938166311302, |
|
"grad_norm": 0.0749669224023819, |
|
"kl": 0.003040122985839844, |
|
"learning_rate": 3.709719800782133e-07, |
|
"loss": 0.0001, |
|
"reward": 0.8017857521772385, |
|
"reward_std": 0.1651908096857369, |
|
"rewards/accuracy_reward": 0.8017857521772385, |
|
"rewards/format_reward4": 0.0, |
|
"step": 135 |
|
}, |
|
{ |
|
"completion_length": 568.1428817749023, |
|
"epoch": 2.4093816631130065, |
|
"grad_norm": 0.06772708147764206, |
|
"kl": 0.0032039642333984374, |
|
"learning_rate": 2.757046314656676e-07, |
|
"loss": 0.0001, |
|
"reward": 0.799776816368103, |
|
"reward_std": 0.15835139667615294, |
|
"rewards/accuracy_reward": 0.799776816368103, |
|
"rewards/format_reward4": 0.0, |
|
"step": 140 |
|
}, |
|
{ |
|
"completion_length": 576.2410957336426, |
|
"epoch": 2.4946695095948828, |
|
"grad_norm": 0.07914450764656067, |
|
"kl": 0.0030412673950195312, |
|
"learning_rate": 1.9333050887001336e-07, |
|
"loss": 0.0001, |
|
"reward": 0.772544676065445, |
|
"reward_std": 0.1686524854041636, |
|
"rewards/accuracy_reward": 0.772544676065445, |
|
"rewards/format_reward4": 0.0, |
|
"step": 145 |
|
}, |
|
{ |
|
"completion_length": 580.0102951049805, |
|
"epoch": 2.579957356076759, |
|
"grad_norm": 0.09259422868490219, |
|
"kl": 0.0027721405029296877, |
|
"learning_rate": 1.2471710571470578e-07, |
|
"loss": 0.0001, |
|
"reward": 0.7595982506871224, |
|
"reward_std": 0.15487177977338434, |
|
"rewards/accuracy_reward": 0.7595982506871224, |
|
"rewards/format_reward4": 0.0, |
|
"step": 150 |
|
}, |
|
{ |
|
"completion_length": 568.7931098937988, |
|
"epoch": 2.6652452025586353, |
|
"grad_norm": 0.33623555302619934, |
|
"kl": 0.003169822692871094, |
|
"learning_rate": 7.058699935926527e-08, |
|
"loss": 0.0001, |
|
"reward": 0.7776786103844643, |
|
"reward_std": 0.1605815477669239, |
|
"rewards/accuracy_reward": 0.7776786103844643, |
|
"rewards/format_reward4": 0.0, |
|
"step": 155 |
|
}, |
|
{ |
|
"completion_length": 597.3964576721191, |
|
"epoch": 2.750533049040512, |
|
"grad_norm": 0.07021904736757278, |
|
"kl": 0.0028789520263671877, |
|
"learning_rate": 3.151024153589321e-08, |
|
"loss": 0.0001, |
|
"reward": 0.7546875327825546, |
|
"reward_std": 0.18458664841018618, |
|
"rewards/accuracy_reward": 0.7546875327825546, |
|
"rewards/format_reward4": 0.0, |
|
"step": 160 |
|
}, |
|
{ |
|
"completion_length": 582.8522552490234, |
|
"epoch": 2.835820895522388, |
|
"grad_norm": 0.10628537088632584, |
|
"kl": 0.0029039382934570312, |
|
"learning_rate": 7.898355054830719e-09, |
|
"loss": 0.0001, |
|
"reward": 0.7602678939700127, |
|
"reward_std": 0.17046223413199185, |
|
"rewards/accuracy_reward": 0.7602678939700127, |
|
"rewards/format_reward4": 0.0, |
|
"step": 165 |
|
}, |
|
{ |
|
"completion_length": 577.0710075378418, |
|
"epoch": 2.9211087420042645, |
|
"grad_norm": 0.09094743430614471, |
|
"kl": 0.0032497406005859374, |
|
"learning_rate": 0.0, |
|
"loss": 0.0001, |
|
"reward": 0.779241107404232, |
|
"reward_std": 0.16634787572547793, |
|
"rewards/accuracy_reward": 0.779241107404232, |
|
"rewards/format_reward4": 0.0, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 2.9211087420042645, |
|
"step": 170, |
|
"total_flos": 0.0, |
|
"train_loss": 2.8551620540811734e-05, |
|
"train_runtime": 11823.9785, |
|
"train_samples_per_second": 12.882, |
|
"train_steps_per_second": 0.014 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 170, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|