|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9893390191897654, |
|
"eval_steps": 100, |
|
"global_step": 174, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 606.0714492797852, |
|
"epoch": 0.005685856432125089, |
|
"grad_norm": 0.4518553614616394, |
|
"kl": 0.0, |
|
"learning_rate": 1.6666666666666665e-07, |
|
"loss": 0.0324, |
|
"reward": 0.6272321715950966, |
|
"reward_std": 0.3481696490198374, |
|
"rewards/accuracy_reward": 0.6272321715950966, |
|
"rewards/format_reward": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"clip_ratio": 0.00032178488891076995, |
|
"completion_length": 595.1618576049805, |
|
"epoch": 0.028429282160625444, |
|
"grad_norm": 0.3228553831577301, |
|
"kl": 0.00016799569129943848, |
|
"learning_rate": 8.333333333333334e-07, |
|
"loss": 0.0257, |
|
"reward": 0.616071455180645, |
|
"reward_std": 0.3413039892911911, |
|
"rewards/accuracy_reward": 0.616071455180645, |
|
"rewards/format_reward": 0.0, |
|
"step": 5 |
|
}, |
|
{ |
|
"clip_ratio": 0.0005511968294740655, |
|
"completion_length": 622.5971336364746, |
|
"epoch": 0.05685856432125089, |
|
"grad_norm": 0.5083388686180115, |
|
"kl": 0.0019717693328857424, |
|
"learning_rate": 1.6666666666666669e-06, |
|
"loss": 0.0222, |
|
"reward": 0.6210937760770321, |
|
"reward_std": 0.34381177742034197, |
|
"rewards/accuracy_reward": 0.6210937760770321, |
|
"rewards/format_reward": 0.0, |
|
"step": 10 |
|
}, |
|
{ |
|
"clip_ratio": 0.0008204746780393179, |
|
"completion_length": 605.372802734375, |
|
"epoch": 0.08528784648187633, |
|
"grad_norm": 3.3226027488708496, |
|
"kl": 0.05862216949462891, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.0525, |
|
"reward": 0.678571455180645, |
|
"reward_std": 0.267782362177968, |
|
"rewards/accuracy_reward": 0.678571455180645, |
|
"rewards/format_reward": 0.0, |
|
"step": 15 |
|
}, |
|
{ |
|
"clip_ratio": 0.0004576380066282582, |
|
"completion_length": 613.8236846923828, |
|
"epoch": 0.11371712864250177, |
|
"grad_norm": 0.32018226385116577, |
|
"kl": 0.01548614501953125, |
|
"learning_rate": 2.9987989426927397e-06, |
|
"loss": 0.0725, |
|
"reward": 0.7137277089059353, |
|
"reward_std": 0.24586139619350433, |
|
"rewards/accuracy_reward": 0.7137277089059353, |
|
"rewards/format_reward": 0.0, |
|
"step": 20 |
|
}, |
|
{ |
|
"clip_ratio": 0.00030219302170735317, |
|
"completion_length": 607.8783798217773, |
|
"epoch": 0.14214641080312723, |
|
"grad_norm": 0.25089940428733826, |
|
"kl": 0.004609870910644531, |
|
"learning_rate": 2.9853091271324575e-06, |
|
"loss": 0.0621, |
|
"reward": 0.7572545073926449, |
|
"reward_std": 0.22519285418093204, |
|
"rewards/accuracy_reward": 0.7572545073926449, |
|
"rewards/format_reward": 0.0, |
|
"step": 25 |
|
}, |
|
{ |
|
"clip_ratio": 0.0003304076311906101, |
|
"completion_length": 593.8370819091797, |
|
"epoch": 0.17057569296375266, |
|
"grad_norm": 0.0912187322974205, |
|
"kl": 0.004130172729492188, |
|
"learning_rate": 2.9569635476423816e-06, |
|
"loss": 0.0519, |
|
"reward": 0.7421875298023224, |
|
"reward_std": 0.22574844770133495, |
|
"rewards/accuracy_reward": 0.7421875298023224, |
|
"rewards/format_reward": 0.0, |
|
"step": 30 |
|
}, |
|
{ |
|
"clip_ratio": 0.00020269225624360842, |
|
"completion_length": 576.3767032623291, |
|
"epoch": 0.19900497512437812, |
|
"grad_norm": 0.2136966437101364, |
|
"kl": 0.006443023681640625, |
|
"learning_rate": 2.9140457110223196e-06, |
|
"loss": 0.0436, |
|
"reward": 0.7890625335276127, |
|
"reward_std": 0.18770856643095613, |
|
"rewards/accuracy_reward": 0.7890625335276127, |
|
"rewards/format_reward": 0.0, |
|
"step": 35 |
|
}, |
|
{ |
|
"clip_ratio": 0.00019782203207796556, |
|
"completion_length": 575.7327270507812, |
|
"epoch": 0.22743425728500355, |
|
"grad_norm": 0.20037101209163666, |
|
"kl": 0.005139541625976562, |
|
"learning_rate": 2.8569848728646314e-06, |
|
"loss": 0.0472, |
|
"reward": 0.7767857424914837, |
|
"reward_std": 0.17583139101043344, |
|
"rewards/accuracy_reward": 0.7767857424914837, |
|
"rewards/format_reward": 0.0, |
|
"step": 40 |
|
}, |
|
{ |
|
"clip_ratio": 0.00027204428533877945, |
|
"completion_length": 569.5413284301758, |
|
"epoch": 0.255863539445629, |
|
"grad_norm": 0.14864350855350494, |
|
"kl": 0.00561370849609375, |
|
"learning_rate": 2.786351744225906e-06, |
|
"loss": 0.028, |
|
"reward": 0.7868304029107094, |
|
"reward_std": 0.1821051500737667, |
|
"rewards/accuracy_reward": 0.7868304029107094, |
|
"rewards/format_reward": 0.0, |
|
"step": 45 |
|
}, |
|
{ |
|
"clip_ratio": 0.00019200436654500663, |
|
"completion_length": 561.1612930297852, |
|
"epoch": 0.28429282160625446, |
|
"grad_norm": 0.18077002465724945, |
|
"kl": 0.0052585601806640625, |
|
"learning_rate": 2.70285278348946e-06, |
|
"loss": 0.0254, |
|
"reward": 0.8041295073926449, |
|
"reward_std": 0.151426344178617, |
|
"rewards/accuracy_reward": 0.8041295073926449, |
|
"rewards/format_reward": 0.0, |
|
"step": 50 |
|
}, |
|
{ |
|
"clip_ratio": 0.0002560705053838319, |
|
"completion_length": 578.782392501831, |
|
"epoch": 0.31272210376687987, |
|
"grad_norm": 0.13589029014110565, |
|
"kl": 0.037276077270507815, |
|
"learning_rate": 2.607323130510307e-06, |
|
"loss": 0.0773, |
|
"reward": 0.7767857499420643, |
|
"reward_std": 0.17133072740398347, |
|
"rewards/accuracy_reward": 0.7767857499420643, |
|
"rewards/format_reward": 0.0, |
|
"step": 55 |
|
}, |
|
{ |
|
"clip_ratio": 0.00025125542724708795, |
|
"completion_length": 534.5167694091797, |
|
"epoch": 0.3411513859275053, |
|
"grad_norm": 0.13941466808319092, |
|
"kl": 0.006060791015625, |
|
"learning_rate": 2.5007182537138604e-06, |
|
"loss": 0.0176, |
|
"reward": 0.8069196790456772, |
|
"reward_std": 0.14778494741767645, |
|
"rewards/accuracy_reward": 0.8069196790456772, |
|
"rewards/format_reward": 0.0, |
|
"step": 60 |
|
}, |
|
{ |
|
"clip_ratio": 0.00017752129824657458, |
|
"completion_length": 572.3789253234863, |
|
"epoch": 0.3695806680881308, |
|
"grad_norm": 0.12366141378879547, |
|
"kl": 0.008144378662109375, |
|
"learning_rate": 2.3841043936924138e-06, |
|
"loss": 0.0264, |
|
"reward": 0.7974330671131611, |
|
"reward_std": 0.16408017370849848, |
|
"rewards/accuracy_reward": 0.7974330671131611, |
|
"rewards/format_reward": 0.0, |
|
"step": 65 |
|
}, |
|
{ |
|
"clip_ratio": 0.0002240494894067524, |
|
"completion_length": 573.7991371154785, |
|
"epoch": 0.39800995024875624, |
|
"grad_norm": 0.3389054536819458, |
|
"kl": 0.0106170654296875, |
|
"learning_rate": 2.2586478988806294e-06, |
|
"loss": 0.0254, |
|
"reward": 0.7712053991854191, |
|
"reward_std": 0.1784203890711069, |
|
"rewards/accuracy_reward": 0.7712053991854191, |
|
"rewards/format_reward": 0.0, |
|
"step": 70 |
|
}, |
|
{ |
|
"clip_ratio": 0.00038792909363110083, |
|
"completion_length": 561.3504638671875, |
|
"epoch": 0.42643923240938164, |
|
"grad_norm": 408230.84375, |
|
"kl": 369.6084808349609, |
|
"learning_rate": 2.1256035599724704e-06, |
|
"loss": 20.6357, |
|
"reward": 0.733258955180645, |
|
"reward_std": 0.1863593803718686, |
|
"rewards/accuracy_reward": 0.733258955180645, |
|
"rewards/format_reward": 0.0, |
|
"step": 75 |
|
}, |
|
{ |
|
"clip_ratio": 0.0003454070749285165, |
|
"completion_length": 570.4168720245361, |
|
"epoch": 0.4548685145700071, |
|
"grad_norm": 0.29222372174263, |
|
"kl": 0.011145782470703126, |
|
"learning_rate": 1.986302059756393e-06, |
|
"loss": 0.0441, |
|
"reward": 0.7550223544239998, |
|
"reward_std": 0.19908471265807748, |
|
"rewards/accuracy_reward": 0.7550223544239998, |
|
"rewards/format_reward": 0.0, |
|
"step": 80 |
|
}, |
|
{ |
|
"clip_ratio": 0.0004956771896104329, |
|
"completion_length": 582.5736961364746, |
|
"epoch": 0.48329779673063256, |
|
"grad_norm": 0.44193872809410095, |
|
"kl": 0.020770263671875, |
|
"learning_rate": 1.8421366638930446e-06, |
|
"loss": 0.0448, |
|
"reward": 0.7533482499420643, |
|
"reward_std": 0.21024669613689184, |
|
"rewards/accuracy_reward": 0.7533482499420643, |
|
"rewards/format_reward": 0.0, |
|
"step": 85 |
|
}, |
|
{ |
|
"clip_ratio": 0.0008349075542355422, |
|
"completion_length": 619.9062728881836, |
|
"epoch": 0.511727078891258, |
|
"grad_norm": 0.3540225625038147, |
|
"kl": 0.032684326171875, |
|
"learning_rate": 1.6945492857516126e-06, |
|
"loss": 0.7814, |
|
"reward": 0.6886161044239998, |
|
"reward_std": 0.24419322237372398, |
|
"rewards/accuracy_reward": 0.6886161044239998, |
|
"rewards/format_reward": 0.0, |
|
"step": 90 |
|
}, |
|
{ |
|
"clip_ratio": 0.0012230878186528572, |
|
"completion_length": 597.3794937133789, |
|
"epoch": 0.5401563610518835, |
|
"grad_norm": 0.8603457808494568, |
|
"kl": 12.069256591796876, |
|
"learning_rate": 1.545016064681567e-06, |
|
"loss": 0.4244, |
|
"reward": 0.6863839533179998, |
|
"reward_std": 0.27123062824830413, |
|
"rewards/accuracy_reward": 0.6863839533179998, |
|
"rewards/format_reward": 0.0, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.5685856432125089, |
|
"grad_norm": 3.7414331436157227, |
|
"learning_rate": 1.3950326019630044e-06, |
|
"loss": 0.1093, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.5685856432125089, |
|
"eval_clip_ratio": 0.0, |
|
"eval_completion_length": 597.2208096927728, |
|
"eval_kl": 0.3533604046026358, |
|
"eval_loss": 0.09554016590118408, |
|
"eval_reward": 0.6108797613424234, |
|
"eval_reward_std": 0.2930047449926599, |
|
"eval_rewards/accuracy_reward": 0.6108797613424234, |
|
"eval_rewards/format_reward": 0.0, |
|
"eval_runtime": 4704.6499, |
|
"eval_samples_per_second": 1.063, |
|
"eval_steps_per_second": 0.01, |
|
"step": 100 |
|
}, |
|
{ |
|
"clip_ratio": 0.0036425826241611504, |
|
"completion_length": 612.2809066772461, |
|
"epoch": 0.5970149253731343, |
|
"grad_norm": 3.4328973293304443, |
|
"kl": 0.23067626953125, |
|
"learning_rate": 1.246099002102669e-06, |
|
"loss": 0.0648, |
|
"reward": 0.6726190770665804, |
|
"reward_std": 0.2866488757232825, |
|
"rewards/accuracy_reward": 0.6726190770665804, |
|
"rewards/format_reward": 0.0, |
|
"step": 105 |
|
}, |
|
{ |
|
"clip_ratio": 0.002448439208092168, |
|
"completion_length": 575.006721496582, |
|
"epoch": 0.6254442075337597, |
|
"grad_norm": 6.977348327636719, |
|
"kl": 0.39283447265625, |
|
"learning_rate": 1.0997048690896047e-06, |
|
"loss": 0.1102, |
|
"reward": 0.7215402089059353, |
|
"reward_std": 0.2666306449100375, |
|
"rewards/accuracy_reward": 0.7215402089059353, |
|
"rewards/format_reward": 0.0, |
|
"step": 110 |
|
}, |
|
{ |
|
"clip_ratio": 0.0015252021345077082, |
|
"completion_length": 571.7087326049805, |
|
"epoch": 0.6538734896943852, |
|
"grad_norm": 18.035612106323242, |
|
"kl": 0.4377197265625, |
|
"learning_rate": 9.57314407674877e-07, |
|
"loss": 0.1127, |
|
"reward": 0.726004496216774, |
|
"reward_std": 0.2556505873799324, |
|
"rewards/accuracy_reward": 0.726004496216774, |
|
"rewards/format_reward": 0.0, |
|
"step": 115 |
|
}, |
|
{ |
|
"clip_ratio": 0.0011677921371301635, |
|
"completion_length": 625.1451110839844, |
|
"epoch": 0.6823027718550106, |
|
"grad_norm": 11.473644256591797, |
|
"kl": 0.5058837890625, |
|
"learning_rate": 8.203517786893802e-07, |
|
"loss": 0.1134, |
|
"reward": 0.6819196790456772, |
|
"reward_std": 0.2964627929031849, |
|
"rewards/accuracy_reward": 0.6819196790456772, |
|
"rewards/format_reward": 0.0, |
|
"step": 120 |
|
}, |
|
{ |
|
"clip_ratio": 0.0015482078888453543, |
|
"completion_length": 606.9152030944824, |
|
"epoch": 0.7107320540156361, |
|
"grad_norm": 15.141037940979004, |
|
"kl": 0.71937255859375, |
|
"learning_rate": 6.901868548728988e-07, |
|
"loss": 0.1873, |
|
"reward": 0.7176339589059353, |
|
"reward_std": 0.26815181598067284, |
|
"rewards/accuracy_reward": 0.7176339589059353, |
|
"rewards/format_reward": 0.0, |
|
"step": 125 |
|
}, |
|
{ |
|
"clip_ratio": 0.0011899313576577697, |
|
"completion_length": 614.9598484039307, |
|
"epoch": 0.7391613361762616, |
|
"grad_norm": 19.52332878112793, |
|
"kl": 0.57347412109375, |
|
"learning_rate": 5.681215196817688e-07, |
|
"loss": 0.1139, |
|
"reward": 0.6986607536673546, |
|
"reward_std": 0.26849195174872875, |
|
"rewards/accuracy_reward": 0.6986607536673546, |
|
"rewards/format_reward": 0.0, |
|
"step": 130 |
|
}, |
|
{ |
|
"clip_ratio": 0.000972740968427388, |
|
"completion_length": 574.955379486084, |
|
"epoch": 0.767590618336887, |
|
"grad_norm": 84.11812591552734, |
|
"kl": 6.191357421875, |
|
"learning_rate": 4.553766461117076e-07, |
|
"loss": 0.5015, |
|
"reward": 0.6919643133878708, |
|
"reward_std": 0.27763173170387745, |
|
"rewards/accuracy_reward": 0.6919643133878708, |
|
"rewards/format_reward": 0.0, |
|
"step": 135 |
|
}, |
|
{ |
|
"clip_ratio": 0.0004989029925127397, |
|
"completion_length": 600.3588371276855, |
|
"epoch": 0.7960199004975125, |
|
"grad_norm": 7.396132469177246, |
|
"kl": 0.93154296875, |
|
"learning_rate": 3.5307988577102803e-07, |
|
"loss": 0.1247, |
|
"reward": 0.6891741380095482, |
|
"reward_std": 0.26927561219781637, |
|
"rewards/accuracy_reward": 0.6891741380095482, |
|
"rewards/format_reward": 0.0, |
|
"step": 140 |
|
}, |
|
{ |
|
"clip_ratio": 0.0012033914652420207, |
|
"completion_length": 599.4185523986816, |
|
"epoch": 0.8244491826581379, |
|
"grad_norm": 229.0263214111328, |
|
"kl": 1.950244140625, |
|
"learning_rate": 2.6225439033546046e-07, |
|
"loss": 0.1983, |
|
"reward": 0.7025670036673546, |
|
"reward_std": 0.2759845983237028, |
|
"rewards/accuracy_reward": 0.7025670036673546, |
|
"rewards/format_reward": 0.0, |
|
"step": 145 |
|
}, |
|
{ |
|
"clip_ratio": 0.002918653353117406, |
|
"completion_length": 614.3248138427734, |
|
"epoch": 0.8528784648187633, |
|
"grad_norm": 32.9469108581543, |
|
"kl": 1.07236328125, |
|
"learning_rate": 1.838085781903433e-07, |
|
"loss": 0.1503, |
|
"reward": 0.6852678880095482, |
|
"reward_std": 0.2656104937195778, |
|
"rewards/accuracy_reward": 0.6852678880095482, |
|
"rewards/format_reward": 0.0, |
|
"step": 150 |
|
}, |
|
{ |
|
"clip_ratio": 0.0016865622681507375, |
|
"completion_length": 595.2756938934326, |
|
"epoch": 0.8813077469793887, |
|
"grad_norm": 29.517351150512695, |
|
"kl": 2.5845703125, |
|
"learning_rate": 1.1852704861216351e-07, |
|
"loss": 0.2295, |
|
"reward": 0.7081473581492901, |
|
"reward_std": 0.28944313153624535, |
|
"rewards/accuracy_reward": 0.7081473581492901, |
|
"rewards/format_reward": 0.0, |
|
"step": 155 |
|
}, |
|
{ |
|
"clip_ratio": 0.0005666015515089385, |
|
"completion_length": 587.8806037902832, |
|
"epoch": 0.9097370291400142, |
|
"grad_norm": 34.70378494262695, |
|
"kl": 0.88984375, |
|
"learning_rate": 6.706273436399024e-08, |
|
"loss": 0.1321, |
|
"reward": 0.6880580671131611, |
|
"reward_std": 0.3093922510743141, |
|
"rewards/accuracy_reward": 0.6880580671131611, |
|
"rewards/format_reward": 0.0, |
|
"step": 160 |
|
}, |
|
{ |
|
"clip_ratio": 0.00019017228769371285, |
|
"completion_length": 611.3214645385742, |
|
"epoch": 0.9381663113006397, |
|
"grad_norm": 20.277708053588867, |
|
"kl": 0.973486328125, |
|
"learning_rate": 2.993037119295694e-08, |
|
"loss": 0.1165, |
|
"reward": 0.746651828289032, |
|
"reward_std": 0.2960765212774277, |
|
"rewards/accuracy_reward": 0.746651828289032, |
|
"rewards/format_reward": 0.0, |
|
"step": 165 |
|
}, |
|
{ |
|
"clip_ratio": 0.0001261272067495156, |
|
"completion_length": 583.6373023986816, |
|
"epoch": 0.9665955934612651, |
|
"grad_norm": 26.07307243347168, |
|
"kl": 3.51806640625, |
|
"learning_rate": 7.501349546579329e-09, |
|
"loss": 0.3474, |
|
"reward": 0.7053571753203869, |
|
"reward_std": 0.27511973679065704, |
|
"rewards/accuracy_reward": 0.7053571753203869, |
|
"rewards/format_reward": 0.0, |
|
"step": 170 |
|
}, |
|
{ |
|
"clip_ratio": 0.00015486263464481453, |
|
"completion_length": 589.4955673217773, |
|
"epoch": 0.9893390191897654, |
|
"kl": 0.896728515625, |
|
"reward": 0.6707589626312256, |
|
"reward_std": 0.2826360985636711, |
|
"rewards/accuracy_reward": 0.6707589626312256, |
|
"rewards/format_reward": 0.0, |
|
"step": 174, |
|
"total_flos": 0.0, |
|
"train_loss": 0.7242226951338094, |
|
"train_runtime": 14838.9521, |
|
"train_samples_per_second": 0.505, |
|
"train_steps_per_second": 0.012 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 175, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|