|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9965010496850945, |
|
"eval_steps": 100, |
|
"global_step": 89, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"completion_length": 776.6575088500977, |
|
"epoch": 0.01119664100769769, |
|
"grad_norm": 1.4259217977523804, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.4336734637618065, |
|
"reward_std": 0.13708234671503305, |
|
"rewards/accuracy_reward": 0.4336734637618065, |
|
"step": 1 |
|
}, |
|
{ |
|
"completion_length": 775.5478172302246, |
|
"epoch": 0.02239328201539538, |
|
"grad_norm": 1.6899404525756836, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.3609693795442581, |
|
"reward_std": 0.12760126357898116, |
|
"rewards/accuracy_reward": 0.3609693795442581, |
|
"step": 2 |
|
}, |
|
{ |
|
"completion_length": 770.4444999694824, |
|
"epoch": 0.03358992302309307, |
|
"grad_norm": 1.1099838018417358, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.41772958543151617, |
|
"reward_std": 0.0995770595036447, |
|
"rewards/accuracy_reward": 0.41772958543151617, |
|
"step": 3 |
|
}, |
|
{ |
|
"completion_length": 763.253173828125, |
|
"epoch": 0.04478656403079076, |
|
"grad_norm": 1.734800100326538, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.42602039594203234, |
|
"reward_std": 0.10463267145678401, |
|
"rewards/accuracy_reward": 0.42602039594203234, |
|
"step": 4 |
|
}, |
|
{ |
|
"completion_length": 728.0452613830566, |
|
"epoch": 0.05598320503848846, |
|
"grad_norm": 1.9766837358474731, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.412627543322742, |
|
"reward_std": 0.1070892985444516, |
|
"rewards/accuracy_reward": 0.412627543322742, |
|
"step": 5 |
|
}, |
|
{ |
|
"completion_length": 771.2640113830566, |
|
"epoch": 0.06717984604618614, |
|
"grad_norm": 1.5028815269470215, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.3877550968900323, |
|
"reward_std": 0.11859837244264781, |
|
"rewards/accuracy_reward": 0.3877550968900323, |
|
"step": 6 |
|
}, |
|
{ |
|
"completion_length": 744.927921295166, |
|
"epoch": 0.07837648705388384, |
|
"grad_norm": 2.0006868839263916, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.426658152602613, |
|
"reward_std": 0.11621210724115372, |
|
"rewards/accuracy_reward": 0.426658152602613, |
|
"step": 7 |
|
}, |
|
{ |
|
"completion_length": 776.862865447998, |
|
"epoch": 0.08957312806158152, |
|
"grad_norm": 2.4789130687713623, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.3558673383668065, |
|
"reward_std": 0.12130605196580291, |
|
"rewards/accuracy_reward": 0.3558673383668065, |
|
"step": 8 |
|
}, |
|
{ |
|
"completion_length": 778.5867118835449, |
|
"epoch": 0.10076976906927922, |
|
"grad_norm": 1.8755781650543213, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.41581631638109684, |
|
"reward_std": 0.10032712062820792, |
|
"rewards/accuracy_reward": 0.41581631638109684, |
|
"step": 9 |
|
}, |
|
{ |
|
"completion_length": 749.0809860229492, |
|
"epoch": 0.11196641007697691, |
|
"grad_norm": 2.071565628051758, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.4030612176284194, |
|
"reward_std": 0.11739562568254769, |
|
"rewards/accuracy_reward": 0.4030612176284194, |
|
"step": 10 |
|
}, |
|
{ |
|
"completion_length": 814.1294441223145, |
|
"epoch": 0.1231630510846746, |
|
"grad_norm": 5.151350021362305, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.37436224054545164, |
|
"reward_std": 0.14050176995806396, |
|
"rewards/accuracy_reward": 0.37436224054545164, |
|
"step": 11 |
|
}, |
|
{ |
|
"completion_length": 757.7876129150391, |
|
"epoch": 0.13435969209237228, |
|
"grad_norm": 1.5203074216842651, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.41709182877093554, |
|
"reward_std": 0.1278091778513044, |
|
"rewards/accuracy_reward": 0.41709182877093554, |
|
"step": 12 |
|
}, |
|
{ |
|
"completion_length": 777.004451751709, |
|
"epoch": 0.14555633310007, |
|
"grad_norm": 2.49013614654541, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.42410713247954845, |
|
"reward_std": 0.13054731115698814, |
|
"rewards/accuracy_reward": 0.42410713247954845, |
|
"step": 13 |
|
}, |
|
{ |
|
"completion_length": 757.3265151977539, |
|
"epoch": 0.15675297410776767, |
|
"grad_norm": 3.2910609245300293, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.38201529905200005, |
|
"reward_std": 0.10587374167516828, |
|
"rewards/accuracy_reward": 0.38201529905200005, |
|
"step": 14 |
|
}, |
|
{ |
|
"completion_length": 806.7212867736816, |
|
"epoch": 0.16794961511546536, |
|
"grad_norm": 3.803057909011841, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.3826530510559678, |
|
"reward_std": 0.1126359230838716, |
|
"rewards/accuracy_reward": 0.3826530510559678, |
|
"step": 15 |
|
}, |
|
{ |
|
"completion_length": 783.6211624145508, |
|
"epoch": 0.17914625612316304, |
|
"grad_norm": 5.312828540802002, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.3998724380508065, |
|
"reward_std": 0.1370647200383246, |
|
"rewards/accuracy_reward": 0.3998724380508065, |
|
"step": 16 |
|
}, |
|
{ |
|
"completion_length": 751.4138832092285, |
|
"epoch": 0.19034289713086075, |
|
"grad_norm": 5.630074977874756, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.3985969312489033, |
|
"reward_std": 0.1044071288779378, |
|
"rewards/accuracy_reward": 0.3985969312489033, |
|
"step": 17 |
|
}, |
|
{ |
|
"completion_length": 740.3303375244141, |
|
"epoch": 0.20153953813855843, |
|
"grad_norm": 3.4527852535247803, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.45153060369193554, |
|
"reward_std": 0.13771249912679195, |
|
"rewards/accuracy_reward": 0.45153060369193554, |
|
"step": 18 |
|
}, |
|
{ |
|
"completion_length": 762.7110862731934, |
|
"epoch": 0.21273617914625612, |
|
"grad_norm": 2.000288724899292, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.44005101174116135, |
|
"reward_std": 0.11361152515746653, |
|
"rewards/accuracy_reward": 0.44005101174116135, |
|
"step": 19 |
|
}, |
|
{ |
|
"completion_length": 754.8182258605957, |
|
"epoch": 0.22393282015395383, |
|
"grad_norm": 2.939009189605713, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.4623724361881614, |
|
"reward_std": 0.1443291292525828, |
|
"rewards/accuracy_reward": 0.4623724361881614, |
|
"step": 20 |
|
}, |
|
{ |
|
"completion_length": 804.9897804260254, |
|
"epoch": 0.2351294611616515, |
|
"grad_norm": 3.3701393604278564, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.42219387367367744, |
|
"reward_std": 0.12525973934680223, |
|
"rewards/accuracy_reward": 0.42219387367367744, |
|
"step": 21 |
|
}, |
|
{ |
|
"completion_length": 760.9719276428223, |
|
"epoch": 0.2463261021693492, |
|
"grad_norm": 4.5709004402160645, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.4604591690003872, |
|
"reward_std": 0.13864337070845068, |
|
"rewards/accuracy_reward": 0.4604591690003872, |
|
"step": 22 |
|
}, |
|
{ |
|
"completion_length": 747.7378578186035, |
|
"epoch": 0.2575227431770469, |
|
"grad_norm": 4.603999137878418, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.4559948891401291, |
|
"reward_std": 0.11444317712448537, |
|
"rewards/accuracy_reward": 0.4559948891401291, |
|
"step": 23 |
|
}, |
|
{ |
|
"completion_length": 776.5950050354004, |
|
"epoch": 0.26871938418474456, |
|
"grad_norm": 3.4199488162994385, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.38711733650416136, |
|
"reward_std": 0.12604024447500706, |
|
"rewards/accuracy_reward": 0.38711733650416136, |
|
"step": 24 |
|
}, |
|
{ |
|
"completion_length": 805.4457778930664, |
|
"epoch": 0.27991602519244224, |
|
"grad_norm": 2.1585614681243896, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.34757652413100004, |
|
"reward_std": 0.09647424682043493, |
|
"rewards/accuracy_reward": 0.34757652413100004, |
|
"step": 25 |
|
}, |
|
{ |
|
"completion_length": 756.6740798950195, |
|
"epoch": 0.29111266620014, |
|
"grad_norm": 4.483582973480225, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.4100765222683549, |
|
"reward_std": 0.11235282756388187, |
|
"rewards/accuracy_reward": 0.4100765222683549, |
|
"step": 26 |
|
}, |
|
{ |
|
"completion_length": 783.984676361084, |
|
"epoch": 0.30230930720783766, |
|
"grad_norm": 4.353560447692871, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.4081632560119033, |
|
"reward_std": 0.12413217849098146, |
|
"rewards/accuracy_reward": 0.4081632560119033, |
|
"step": 27 |
|
}, |
|
{ |
|
"completion_length": 785.7461547851562, |
|
"epoch": 0.31350594821553535, |
|
"grad_norm": 2.4794628620147705, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.3628826476633549, |
|
"reward_std": 0.0991243754979223, |
|
"rewards/accuracy_reward": 0.3628826476633549, |
|
"step": 28 |
|
}, |
|
{ |
|
"completion_length": 730.373706817627, |
|
"epoch": 0.32470258922323303, |
|
"grad_norm": 4.818853855133057, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.48596937395632267, |
|
"reward_std": 0.14916561311110854, |
|
"rewards/accuracy_reward": 0.48596937395632267, |
|
"step": 29 |
|
}, |
|
{ |
|
"completion_length": 798.7933502197266, |
|
"epoch": 0.3358992302309307, |
|
"grad_norm": 4.875724792480469, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.35650509130209684, |
|
"reward_std": 0.13356371596455574, |
|
"rewards/accuracy_reward": 0.35650509130209684, |
|
"step": 30 |
|
}, |
|
{ |
|
"completion_length": 796.2346839904785, |
|
"epoch": 0.3470958712386284, |
|
"grad_norm": 10.593175888061523, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.3966836603358388, |
|
"reward_std": 0.1197371541056782, |
|
"rewards/accuracy_reward": 0.3966836603358388, |
|
"step": 31 |
|
}, |
|
{ |
|
"completion_length": 770.2442512512207, |
|
"epoch": 0.3582925122463261, |
|
"grad_norm": 2.9329323768615723, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.4113520346581936, |
|
"reward_std": 0.10089330864138901, |
|
"rewards/accuracy_reward": 0.4113520346581936, |
|
"step": 32 |
|
}, |
|
{ |
|
"completion_length": 775.821418762207, |
|
"epoch": 0.3694891532540238, |
|
"grad_norm": 12.537503242492676, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.4483418297022581, |
|
"reward_std": 0.125624421518296, |
|
"rewards/accuracy_reward": 0.4483418297022581, |
|
"step": 33 |
|
}, |
|
{ |
|
"completion_length": 783.8558540344238, |
|
"epoch": 0.3806857942617215, |
|
"grad_norm": 4.941224575042725, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.40752549935132265, |
|
"reward_std": 0.1238795283716172, |
|
"rewards/accuracy_reward": 0.40752549935132265, |
|
"step": 34 |
|
}, |
|
{ |
|
"completion_length": 835.3316078186035, |
|
"epoch": 0.3918824352694192, |
|
"grad_norm": 4.293496608734131, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.36670917458832264, |
|
"reward_std": 0.13812832674011588, |
|
"rewards/accuracy_reward": 0.36670917458832264, |
|
"step": 35 |
|
}, |
|
{ |
|
"completion_length": 800.0905456542969, |
|
"epoch": 0.40307907627711687, |
|
"grad_norm": 19.928516387939453, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.4234693804755807, |
|
"reward_std": 0.11749003268778324, |
|
"rewards/accuracy_reward": 0.4234693804755807, |
|
"step": 36 |
|
}, |
|
{ |
|
"completion_length": 767.5561103820801, |
|
"epoch": 0.41427571728481455, |
|
"grad_norm": 6.884653568267822, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.4445152971893549, |
|
"reward_std": 0.09231905196793377, |
|
"rewards/accuracy_reward": 0.4445152971893549, |
|
"step": 37 |
|
}, |
|
{ |
|
"completion_length": 805.4872283935547, |
|
"epoch": 0.42547235829251223, |
|
"grad_norm": 24.017255783081055, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.40114795323461294, |
|
"reward_std": 0.10263178893364966, |
|
"rewards/accuracy_reward": 0.40114795323461294, |
|
"step": 38 |
|
}, |
|
{ |
|
"completion_length": 801.1536827087402, |
|
"epoch": 0.4366689993002099, |
|
"grad_norm": 5.969362258911133, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.39604590833187103, |
|
"reward_std": 0.10426798998378217, |
|
"rewards/accuracy_reward": 0.39604590833187103, |
|
"step": 39 |
|
}, |
|
{ |
|
"completion_length": 778.0669441223145, |
|
"epoch": 0.44786564030790765, |
|
"grad_norm": 7.667646884918213, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.44068876653909683, |
|
"reward_std": 0.13503032876178622, |
|
"rewards/accuracy_reward": 0.44068876653909683, |
|
"step": 40 |
|
}, |
|
{ |
|
"completion_length": 777.1989631652832, |
|
"epoch": 0.45906228131560534, |
|
"grad_norm": 8.530767440795898, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.45854590740054846, |
|
"reward_std": 0.1331926230341196, |
|
"rewards/accuracy_reward": 0.45854590740054846, |
|
"step": 41 |
|
}, |
|
{ |
|
"completion_length": 709.0452651977539, |
|
"epoch": 0.470258922323303, |
|
"grad_norm": 2.948634386062622, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.4279336668550968, |
|
"reward_std": 0.13054730370640755, |
|
"rewards/accuracy_reward": 0.4279336668550968, |
|
"step": 42 |
|
}, |
|
{ |
|
"completion_length": 812.6842880249023, |
|
"epoch": 0.4814555633310007, |
|
"grad_norm": 63.21303939819336, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.4056122303009033, |
|
"reward_std": 0.11122526740655303, |
|
"rewards/accuracy_reward": 0.4056122303009033, |
|
"step": 43 |
|
}, |
|
{ |
|
"completion_length": 794.6052093505859, |
|
"epoch": 0.4926522043386984, |
|
"grad_norm": 36.811729431152344, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.4387754984200001, |
|
"reward_std": 0.11845282535068691, |
|
"rewards/accuracy_reward": 0.4387754984200001, |
|
"step": 44 |
|
}, |
|
{ |
|
"completion_length": 774.257640838623, |
|
"epoch": 0.5038488453463961, |
|
"grad_norm": 15.306803703308105, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.4419642761349678, |
|
"reward_std": 0.12792909424751997, |
|
"rewards/accuracy_reward": 0.4419642761349678, |
|
"step": 45 |
|
}, |
|
{ |
|
"completion_length": 809.9763870239258, |
|
"epoch": 0.5150454863540938, |
|
"grad_norm": 18.062774658203125, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.39158162381500006, |
|
"reward_std": 0.1342562234494835, |
|
"rewards/accuracy_reward": 0.39158162381500006, |
|
"step": 46 |
|
}, |
|
{ |
|
"completion_length": 801.1211700439453, |
|
"epoch": 0.5262421273617914, |
|
"grad_norm": 14.511787414550781, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.40752550307661295, |
|
"reward_std": 0.13466564589180052, |
|
"rewards/accuracy_reward": 0.40752550307661295, |
|
"step": 47 |
|
}, |
|
{ |
|
"completion_length": 799.5650329589844, |
|
"epoch": 0.5374387683694891, |
|
"grad_norm": 11.552735328674316, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.3839285643771291, |
|
"reward_std": 0.12077671871520579, |
|
"rewards/accuracy_reward": 0.3839285643771291, |
|
"step": 48 |
|
}, |
|
{ |
|
"completion_length": 796.6683540344238, |
|
"epoch": 0.5486354093771868, |
|
"grad_norm": 13.472460746765137, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.3966836668550968, |
|
"reward_std": 0.12616657256148756, |
|
"rewards/accuracy_reward": 0.3966836668550968, |
|
"step": 49 |
|
}, |
|
{ |
|
"completion_length": 750.8067474365234, |
|
"epoch": 0.5598320503848845, |
|
"grad_norm": 13.930930137634277, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.4438775470480323, |
|
"reward_std": 0.1101488508284092, |
|
"rewards/accuracy_reward": 0.4438775470480323, |
|
"step": 50 |
|
}, |
|
{ |
|
"completion_length": 780.1970520019531, |
|
"epoch": 0.5710286913925823, |
|
"grad_norm": 5.438718795776367, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.4017857061699033, |
|
"reward_std": 0.09486208576709032, |
|
"rewards/accuracy_reward": 0.4017857061699033, |
|
"step": 51 |
|
}, |
|
{ |
|
"completion_length": 772.9202728271484, |
|
"epoch": 0.58222533240028, |
|
"grad_norm": 6.467463493347168, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.39795918203890324, |
|
"reward_std": 0.11394576611928642, |
|
"rewards/accuracy_reward": 0.39795918203890324, |
|
"step": 52 |
|
}, |
|
{ |
|
"completion_length": 751.3756217956543, |
|
"epoch": 0.5934219734079776, |
|
"grad_norm": 7.1286139488220215, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.5063775442540646, |
|
"reward_std": 0.152797756716609, |
|
"rewards/accuracy_reward": 0.5063775442540646, |
|
"step": 53 |
|
}, |
|
{ |
|
"completion_length": 773.2212867736816, |
|
"epoch": 0.6046186144156753, |
|
"grad_norm": 5.8168840408325195, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.3852040721103549, |
|
"reward_std": 0.11522368853911757, |
|
"rewards/accuracy_reward": 0.3852040721103549, |
|
"step": 54 |
|
}, |
|
{ |
|
"completion_length": 720.9132423400879, |
|
"epoch": 0.615815255423373, |
|
"grad_norm": 4.343114852905273, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.4336734600365162, |
|
"reward_std": 0.10637756483629346, |
|
"rewards/accuracy_reward": 0.4336734600365162, |
|
"step": 55 |
|
}, |
|
{ |
|
"completion_length": 769.654956817627, |
|
"epoch": 0.6270118964310707, |
|
"grad_norm": 4.029993534088135, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.4202806008979678, |
|
"reward_std": 0.13102709129452705, |
|
"rewards/accuracy_reward": 0.4202806008979678, |
|
"step": 56 |
|
}, |
|
{ |
|
"completion_length": 750.3571243286133, |
|
"epoch": 0.6382085374387684, |
|
"grad_norm": 2.320124864578247, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.4547193767502904, |
|
"reward_std": 0.12525333184748888, |
|
"rewards/accuracy_reward": 0.4547193767502904, |
|
"step": 57 |
|
}, |
|
{ |
|
"completion_length": 756.4540672302246, |
|
"epoch": 0.6494051784464661, |
|
"grad_norm": 5.077786922454834, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.42793366126716137, |
|
"reward_std": 0.1169798003975302, |
|
"rewards/accuracy_reward": 0.42793366126716137, |
|
"step": 58 |
|
}, |
|
{ |
|
"completion_length": 741.6307258605957, |
|
"epoch": 0.6606018194541637, |
|
"grad_norm": 3.857607364654541, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.398596934042871, |
|
"reward_std": 0.11093576485291123, |
|
"rewards/accuracy_reward": 0.398596934042871, |
|
"step": 59 |
|
}, |
|
{ |
|
"completion_length": 779.0318717956543, |
|
"epoch": 0.6717984604618614, |
|
"grad_norm": 2.3024513721466064, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.40624999161809683, |
|
"reward_std": 0.11923973984085023, |
|
"rewards/accuracy_reward": 0.40624999161809683, |
|
"step": 60 |
|
}, |
|
{ |
|
"completion_length": 814.8584022521973, |
|
"epoch": 0.6829951014695591, |
|
"grad_norm": 4.7456374168396, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.36415816005319357, |
|
"reward_std": 0.13503673416562378, |
|
"rewards/accuracy_reward": 0.36415816005319357, |
|
"step": 61 |
|
}, |
|
{ |
|
"completion_length": 744.0248603820801, |
|
"epoch": 0.6941917424772568, |
|
"grad_norm": 7.634659767150879, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.4336734553799033, |
|
"reward_std": 0.13652257318608463, |
|
"rewards/accuracy_reward": 0.4336734553799033, |
|
"step": 62 |
|
}, |
|
{ |
|
"completion_length": 748.6332778930664, |
|
"epoch": 0.7053883834849545, |
|
"grad_norm": 5.460710525512695, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.45854590460658073, |
|
"reward_std": 0.10201446153223515, |
|
"rewards/accuracy_reward": 0.45854590460658073, |
|
"step": 63 |
|
}, |
|
{ |
|
"completion_length": 766.7691230773926, |
|
"epoch": 0.7165850244926522, |
|
"grad_norm": 10.574377059936523, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.43558672722429037, |
|
"reward_std": 0.11734448280185461, |
|
"rewards/accuracy_reward": 0.43558672722429037, |
|
"step": 64 |
|
}, |
|
{ |
|
"completion_length": 774.1874847412109, |
|
"epoch": 0.72778166550035, |
|
"grad_norm": 13.465957641601562, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.4088010136038065, |
|
"reward_std": 0.1347600498702377, |
|
"rewards/accuracy_reward": 0.4088010136038065, |
|
"step": 65 |
|
}, |
|
{ |
|
"completion_length": 767.0720520019531, |
|
"epoch": 0.7389783065080476, |
|
"grad_norm": 18.965850830078125, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.3392857080325484, |
|
"reward_std": 0.10657906858250499, |
|
"rewards/accuracy_reward": 0.3392857080325484, |
|
"step": 66 |
|
}, |
|
{ |
|
"completion_length": 743.1543273925781, |
|
"epoch": 0.7501749475157453, |
|
"grad_norm": 4.266128063201904, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.4547193758189678, |
|
"reward_std": 0.14951107138767838, |
|
"rewards/accuracy_reward": 0.4547193758189678, |
|
"step": 67 |
|
}, |
|
{ |
|
"completion_length": 746.9189910888672, |
|
"epoch": 0.761371588523443, |
|
"grad_norm": 7.374969959259033, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.43048468325287104, |
|
"reward_std": 0.13727903924882412, |
|
"rewards/accuracy_reward": 0.43048468325287104, |
|
"step": 68 |
|
}, |
|
{ |
|
"completion_length": 772.3635101318359, |
|
"epoch": 0.7725682295311407, |
|
"grad_norm": 2.557593822479248, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.4036989789456129, |
|
"reward_std": 0.12583233416080475, |
|
"rewards/accuracy_reward": 0.4036989789456129, |
|
"step": 69 |
|
}, |
|
{ |
|
"completion_length": 725.642204284668, |
|
"epoch": 0.7837648705388384, |
|
"grad_norm": 2.71532940864563, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.45854590460658073, |
|
"reward_std": 0.13771890476346016, |
|
"rewards/accuracy_reward": 0.45854590460658073, |
|
"step": 70 |
|
}, |
|
{ |
|
"completion_length": 762.9929695129395, |
|
"epoch": 0.794961511546536, |
|
"grad_norm": 6.598220348358154, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.38392856158316135, |
|
"reward_std": 0.1016802228987217, |
|
"rewards/accuracy_reward": 0.38392856158316135, |
|
"step": 71 |
|
}, |
|
{ |
|
"completion_length": 734.3813591003418, |
|
"epoch": 0.8061581525542337, |
|
"grad_norm": 4.22649621963501, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.4272959092631936, |
|
"reward_std": 0.09739230386912823, |
|
"rewards/accuracy_reward": 0.4272959092631936, |
|
"step": 72 |
|
}, |
|
{ |
|
"completion_length": 756.7136306762695, |
|
"epoch": 0.8173547935619314, |
|
"grad_norm": 5.3303704261779785, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.4713010126724839, |
|
"reward_std": 0.13973407400771976, |
|
"rewards/accuracy_reward": 0.4713010126724839, |
|
"step": 73 |
|
}, |
|
{ |
|
"completion_length": 764.8596725463867, |
|
"epoch": 0.8285514345696291, |
|
"grad_norm": 4.296087265014648, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.37436223961412907, |
|
"reward_std": 0.1403882629238069, |
|
"rewards/accuracy_reward": 0.37436223961412907, |
|
"step": 74 |
|
}, |
|
{ |
|
"completion_length": 703.3316268920898, |
|
"epoch": 0.8397480755773268, |
|
"grad_norm": 6.208092212677002, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.4279336668550968, |
|
"reward_std": 0.09254459687508643, |
|
"rewards/accuracy_reward": 0.4279336668550968, |
|
"step": 75 |
|
}, |
|
{ |
|
"completion_length": 816.643482208252, |
|
"epoch": 0.8509447165850245, |
|
"grad_norm": 5.470515727996826, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.4081632560119033, |
|
"reward_std": 0.12125490978360176, |
|
"rewards/accuracy_reward": 0.4081632560119033, |
|
"step": 76 |
|
}, |
|
{ |
|
"completion_length": 741.5395240783691, |
|
"epoch": 0.8621413575927221, |
|
"grad_norm": 7.999130725860596, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.4687499925494194, |
|
"reward_std": 0.12569960486143827, |
|
"rewards/accuracy_reward": 0.4687499925494194, |
|
"step": 77 |
|
}, |
|
{ |
|
"completion_length": 758.1237106323242, |
|
"epoch": 0.8733379986004198, |
|
"grad_norm": 14.48597526550293, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.3903061128221452, |
|
"reward_std": 0.1084871394559741, |
|
"rewards/accuracy_reward": 0.3903061128221452, |
|
"step": 78 |
|
}, |
|
{ |
|
"completion_length": 761.9495964050293, |
|
"epoch": 0.8845346396081175, |
|
"grad_norm": 9.992022514343262, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.3998724417760968, |
|
"reward_std": 0.10686216223984957, |
|
"rewards/accuracy_reward": 0.3998724417760968, |
|
"step": 79 |
|
}, |
|
{ |
|
"completion_length": 755.2149085998535, |
|
"epoch": 0.8957312806158153, |
|
"grad_norm": 1.8447043895721436, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.422193868085742, |
|
"reward_std": 0.12513341289013624, |
|
"rewards/accuracy_reward": 0.422193868085742, |
|
"step": 80 |
|
}, |
|
{ |
|
"completion_length": 787.5254936218262, |
|
"epoch": 0.906927921623513, |
|
"grad_norm": 2.829308271408081, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.45408162102103233, |
|
"reward_std": 0.11606655921787024, |
|
"rewards/accuracy_reward": 0.45408162102103233, |
|
"step": 81 |
|
}, |
|
{ |
|
"completion_length": 750.6084022521973, |
|
"epoch": 0.9181245626312107, |
|
"grad_norm": 9.926461219787598, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.41709183249622583, |
|
"reward_std": 0.11887505534105003, |
|
"rewards/accuracy_reward": 0.41709183249622583, |
|
"step": 82 |
|
}, |
|
{ |
|
"completion_length": 791.3246002197266, |
|
"epoch": 0.9293212036389084, |
|
"grad_norm": 8.353261947631836, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.37882652413100004, |
|
"reward_std": 0.0963351079262793, |
|
"rewards/accuracy_reward": 0.37882652413100004, |
|
"step": 83 |
|
}, |
|
{ |
|
"completion_length": 731.2653007507324, |
|
"epoch": 0.940517844646606, |
|
"grad_norm": 54.08208465576172, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.415816318243742, |
|
"reward_std": 0.14536869549192488, |
|
"rewards/accuracy_reward": 0.415816318243742, |
|
"step": 84 |
|
}, |
|
{ |
|
"completion_length": 743.2423324584961, |
|
"epoch": 0.9517144856543037, |
|
"grad_norm": 3.901226043701172, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.4451530482620001, |
|
"reward_std": 0.1309119921643287, |
|
"rewards/accuracy_reward": 0.4451530482620001, |
|
"step": 85 |
|
}, |
|
{ |
|
"completion_length": 751.2391338348389, |
|
"epoch": 0.9629111266620014, |
|
"grad_norm": 5.942202568054199, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.43367346189916134, |
|
"reward_std": 0.10790172568522394, |
|
"rewards/accuracy_reward": 0.43367346189916134, |
|
"step": 86 |
|
}, |
|
{ |
|
"completion_length": 719.3335266113281, |
|
"epoch": 0.9741077676696991, |
|
"grad_norm": 5.666225433349609, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.44451529532670975, |
|
"reward_std": 0.11002893466502428, |
|
"rewards/accuracy_reward": 0.44451529532670975, |
|
"step": 87 |
|
}, |
|
{ |
|
"completion_length": 752.9968070983887, |
|
"epoch": 0.9853044086773968, |
|
"grad_norm": 10.701108932495117, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.411352033726871, |
|
"reward_std": 0.11254792334511876, |
|
"rewards/accuracy_reward": 0.411352033726871, |
|
"step": 88 |
|
}, |
|
{ |
|
"completion_length": 763.3092956542969, |
|
"epoch": 0.9965010496850945, |
|
"grad_norm": 7.832119941711426, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0, |
|
"reward": 0.40051019471138716, |
|
"reward_std": 0.10468381433747709, |
|
"rewards/accuracy_reward": 0.40051019471138716, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.9965010496850945, |
|
"step": 89, |
|
"total_flos": 0.0, |
|
"train_loss": 2.155859272374791e-08, |
|
"train_runtime": 56373.7111, |
|
"train_samples_per_second": 0.355, |
|
"train_steps_per_second": 0.002 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 89, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 20, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|