|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9965010496850945, |
|
"eval_steps": 100, |
|
"global_step": 178, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"completion_length": 621.5997104644775, |
|
"epoch": 0.005598320503848845, |
|
"grad_norm": 0.01685222238302231, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.21130952710518613, |
|
"reward_std": 0.1508616954088211, |
|
"rewards/accuracy_reward": 0.21130952710518613, |
|
"step": 1 |
|
}, |
|
{ |
|
"completion_length": 593.0461368560791, |
|
"epoch": 0.01119664100769769, |
|
"grad_norm": 0.017173435539007187, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.21502976398915052, |
|
"reward_std": 0.1765604354441166, |
|
"rewards/accuracy_reward": 0.21502976398915052, |
|
"step": 2 |
|
}, |
|
{ |
|
"completion_length": 625.5074501037598, |
|
"epoch": 0.016794961511546535, |
|
"grad_norm": 0.013760975562036037, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.15401786100119352, |
|
"reward_std": 0.13073960039764643, |
|
"rewards/accuracy_reward": 0.15401786100119352, |
|
"step": 3 |
|
}, |
|
{ |
|
"completion_length": 647.694206237793, |
|
"epoch": 0.02239328201539538, |
|
"grad_norm": 0.015759378671646118, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.1808035772992298, |
|
"reward_std": 0.1654246775433421, |
|
"rewards/accuracy_reward": 0.1808035772992298, |
|
"step": 4 |
|
}, |
|
{ |
|
"completion_length": 629.4486751556396, |
|
"epoch": 0.02799160251924423, |
|
"grad_norm": 0.014322967268526554, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.23883929010480642, |
|
"reward_std": 0.15154671855270863, |
|
"rewards/accuracy_reward": 0.23883929010480642, |
|
"step": 5 |
|
}, |
|
{ |
|
"completion_length": 604.1823024749756, |
|
"epoch": 0.03358992302309307, |
|
"grad_norm": 0.014635809697210789, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.2194940543267876, |
|
"reward_std": 0.1607616674154997, |
|
"rewards/accuracy_reward": 0.2194940543267876, |
|
"step": 6 |
|
}, |
|
{ |
|
"completion_length": 659.3504657745361, |
|
"epoch": 0.03918824352694192, |
|
"grad_norm": 0.015097221359610558, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.16666666860692203, |
|
"reward_std": 0.13727260008454323, |
|
"rewards/accuracy_reward": 0.16666666860692203, |
|
"step": 7 |
|
}, |
|
{ |
|
"completion_length": 587.1049137115479, |
|
"epoch": 0.04478656403079076, |
|
"grad_norm": 0.01467145700007677, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.16815476398915052, |
|
"reward_std": 0.15577294118702412, |
|
"rewards/accuracy_reward": 0.16815476398915052, |
|
"step": 8 |
|
}, |
|
{ |
|
"completion_length": 675.1614761352539, |
|
"epoch": 0.05038488453463961, |
|
"grad_norm": 0.015891388058662415, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.1755952414823696, |
|
"reward_std": 0.16377168311737478, |
|
"rewards/accuracy_reward": 0.1755952414823696, |
|
"step": 9 |
|
}, |
|
{ |
|
"completion_length": 651.4523887634277, |
|
"epoch": 0.05598320503848846, |
|
"grad_norm": 0.014163345098495483, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.18080357555299997, |
|
"reward_std": 0.13789755944162607, |
|
"rewards/accuracy_reward": 0.18080357555299997, |
|
"step": 10 |
|
}, |
|
{ |
|
"completion_length": 652.5982303619385, |
|
"epoch": 0.0615815255423373, |
|
"grad_norm": 0.013874271884560585, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.17187500558793545, |
|
"reward_std": 0.1334617892280221, |
|
"rewards/accuracy_reward": 0.17187500558793545, |
|
"step": 11 |
|
}, |
|
{ |
|
"completion_length": 629.9330501556396, |
|
"epoch": 0.06717984604618614, |
|
"grad_norm": 0.015252678655087948, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.15997024276293814, |
|
"reward_std": 0.13702514953911304, |
|
"rewards/accuracy_reward": 0.15997024276293814, |
|
"step": 12 |
|
}, |
|
{ |
|
"completion_length": 635.7953948974609, |
|
"epoch": 0.072778166550035, |
|
"grad_norm": 0.014308245852589607, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.1398809556849301, |
|
"reward_std": 0.13629459426738322, |
|
"rewards/accuracy_reward": 0.1398809556849301, |
|
"step": 13 |
|
}, |
|
{ |
|
"completion_length": 655.9814014434814, |
|
"epoch": 0.07837648705388384, |
|
"grad_norm": 0.014390479773283005, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.17485119425691664, |
|
"reward_std": 0.14875314245000482, |
|
"rewards/accuracy_reward": 0.17485119425691664, |
|
"step": 14 |
|
}, |
|
{ |
|
"completion_length": 656.0022430419922, |
|
"epoch": 0.08397480755773268, |
|
"grad_norm": 0.017086399719119072, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.17633928690338507, |
|
"reward_std": 0.13736709533259273, |
|
"rewards/accuracy_reward": 0.17633928690338507, |
|
"step": 15 |
|
}, |
|
{ |
|
"completion_length": 578.8586463928223, |
|
"epoch": 0.08957312806158152, |
|
"grad_norm": 0.015446292236447334, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.21205357555299997, |
|
"reward_std": 0.15961862821131945, |
|
"rewards/accuracy_reward": 0.21205357555299997, |
|
"step": 16 |
|
}, |
|
{ |
|
"completion_length": 624.680814743042, |
|
"epoch": 0.09517144856543037, |
|
"grad_norm": 0.016500283032655716, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.18601190892513841, |
|
"reward_std": 0.14123531756922603, |
|
"rewards/accuracy_reward": 0.18601190892513841, |
|
"step": 17 |
|
}, |
|
{ |
|
"completion_length": 661.7500152587891, |
|
"epoch": 0.10076976906927922, |
|
"grad_norm": 0.014794868417084217, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.18229167046956718, |
|
"reward_std": 0.15161667275242507, |
|
"rewards/accuracy_reward": 0.18229167046956718, |
|
"step": 18 |
|
}, |
|
{ |
|
"completion_length": 632.3244209289551, |
|
"epoch": 0.10636808957312806, |
|
"grad_norm": 0.01493909489363432, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.1636904807528481, |
|
"reward_std": 0.1566695892252028, |
|
"rewards/accuracy_reward": 0.1636904807528481, |
|
"step": 19 |
|
}, |
|
{ |
|
"completion_length": 627.7477836608887, |
|
"epoch": 0.11196641007697691, |
|
"grad_norm": 0.01570167765021324, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.2046130986418575, |
|
"reward_std": 0.14942223858088255, |
|
"rewards/accuracy_reward": 0.2046130986418575, |
|
"step": 20 |
|
}, |
|
{ |
|
"completion_length": 625.4613227844238, |
|
"epoch": 0.11756473058082575, |
|
"grad_norm": 0.016287673264741898, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.22991071827709675, |
|
"reward_std": 0.1482053459621966, |
|
"rewards/accuracy_reward": 0.22991071827709675, |
|
"step": 21 |
|
}, |
|
{ |
|
"completion_length": 695.4895973205566, |
|
"epoch": 0.1231630510846746, |
|
"grad_norm": 0.013338581658899784, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.129464287601877, |
|
"reward_std": 0.13608421152457595, |
|
"rewards/accuracy_reward": 0.129464287601877, |
|
"step": 22 |
|
}, |
|
{ |
|
"completion_length": 666.1235198974609, |
|
"epoch": 0.12876137158852344, |
|
"grad_norm": 0.015841346234083176, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.20386905269697309, |
|
"reward_std": 0.14255746873095632, |
|
"rewards/accuracy_reward": 0.20386905269697309, |
|
"step": 23 |
|
}, |
|
{ |
|
"completion_length": 649.9732265472412, |
|
"epoch": 0.13435969209237228, |
|
"grad_norm": 0.016053833067417145, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.17038690764456987, |
|
"reward_std": 0.12253910815343261, |
|
"rewards/accuracy_reward": 0.17038690764456987, |
|
"step": 24 |
|
}, |
|
{ |
|
"completion_length": 613.6108741760254, |
|
"epoch": 0.13995801259622112, |
|
"grad_norm": 0.020091773942112923, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.22470238478854299, |
|
"reward_std": 0.13420080952346325, |
|
"rewards/accuracy_reward": 0.22470238478854299, |
|
"step": 25 |
|
}, |
|
{ |
|
"completion_length": 629.3846836090088, |
|
"epoch": 0.14555633310007, |
|
"grad_norm": 0.016363132745027542, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.19642857508733869, |
|
"reward_std": 0.14178543630987406, |
|
"rewards/accuracy_reward": 0.19642857508733869, |
|
"step": 26 |
|
}, |
|
{ |
|
"completion_length": 641.9903411865234, |
|
"epoch": 0.15115465360391883, |
|
"grad_norm": 0.016413498669862747, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.19642857648432255, |
|
"reward_std": 0.16177992545999587, |
|
"rewards/accuracy_reward": 0.19642857648432255, |
|
"step": 27 |
|
}, |
|
{ |
|
"completion_length": 647.2522392272949, |
|
"epoch": 0.15675297410776767, |
|
"grad_norm": 0.015543129295110703, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.19345238601090387, |
|
"reward_std": 0.13141631055623293, |
|
"rewards/accuracy_reward": 0.19345238601090387, |
|
"step": 28 |
|
}, |
|
{ |
|
"completion_length": 635.9523849487305, |
|
"epoch": 0.16235129461161651, |
|
"grad_norm": 0.02027256041765213, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.22023809660458937, |
|
"reward_std": 0.12436413252726197, |
|
"rewards/accuracy_reward": 0.22023809660458937, |
|
"step": 29 |
|
}, |
|
{ |
|
"completion_length": 658.7291736602783, |
|
"epoch": 0.16794961511546536, |
|
"grad_norm": 0.019577788189053535, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.19642857584403828, |
|
"reward_std": 0.161903060041368, |
|
"rewards/accuracy_reward": 0.19642857584403828, |
|
"step": 30 |
|
}, |
|
{ |
|
"completion_length": 666.1145973205566, |
|
"epoch": 0.1735479356193142, |
|
"grad_norm": 0.016409732401371002, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.19494048005435616, |
|
"reward_std": 0.12808324676007032, |
|
"rewards/accuracy_reward": 0.19494048005435616, |
|
"step": 31 |
|
}, |
|
{ |
|
"completion_length": 662.9114723205566, |
|
"epoch": 0.17914625612316304, |
|
"grad_norm": 0.02575683780014515, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.1733630993985571, |
|
"reward_std": 0.14043171424418688, |
|
"rewards/accuracy_reward": 0.1733630993985571, |
|
"step": 32 |
|
}, |
|
{ |
|
"completion_length": 672.6763496398926, |
|
"epoch": 0.1847445766270119, |
|
"grad_norm": 0.02134682983160019, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.1979166700039059, |
|
"reward_std": 0.1591111128218472, |
|
"rewards/accuracy_reward": 0.1979166700039059, |
|
"step": 33 |
|
}, |
|
{ |
|
"completion_length": 640.2604217529297, |
|
"epoch": 0.19034289713086075, |
|
"grad_norm": 0.023335738107562065, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.22172619309276342, |
|
"reward_std": 0.16011474281549454, |
|
"rewards/accuracy_reward": 0.22172619309276342, |
|
"step": 34 |
|
}, |
|
{ |
|
"completion_length": 679.5119152069092, |
|
"epoch": 0.1959412176347096, |
|
"grad_norm": 0.026544196531176567, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.21651785960420966, |
|
"reward_std": 0.1384496740065515, |
|
"rewards/accuracy_reward": 0.21651785960420966, |
|
"step": 35 |
|
}, |
|
{ |
|
"completion_length": 644.0104331970215, |
|
"epoch": 0.20153953813855843, |
|
"grad_norm": 0.02789580263197422, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.23065476911142468, |
|
"reward_std": 0.14074094174429774, |
|
"rewards/accuracy_reward": 0.23065476911142468, |
|
"step": 36 |
|
}, |
|
{ |
|
"completion_length": 686.8742713928223, |
|
"epoch": 0.20713785864240727, |
|
"grad_norm": 0.02931089699268341, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.2165178587893024, |
|
"reward_std": 0.1497473274357617, |
|
"rewards/accuracy_reward": 0.2165178587893024, |
|
"step": 37 |
|
}, |
|
{ |
|
"completion_length": 707.8519477844238, |
|
"epoch": 0.21273617914625612, |
|
"grad_norm": 0.02348598837852478, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.17857143143191934, |
|
"reward_std": 0.12875390285626054, |
|
"rewards/accuracy_reward": 0.17857143143191934, |
|
"step": 38 |
|
}, |
|
{ |
|
"completion_length": 739.4196586608887, |
|
"epoch": 0.21833449965010496, |
|
"grad_norm": 0.025258367881178856, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.16294643242144957, |
|
"reward_std": 0.10602654609829187, |
|
"rewards/accuracy_reward": 0.16294643242144957, |
|
"step": 39 |
|
}, |
|
{ |
|
"completion_length": 781.2157859802246, |
|
"epoch": 0.22393282015395383, |
|
"grad_norm": 0.02543455921113491, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.17038690770277753, |
|
"reward_std": 0.1598060461692512, |
|
"rewards/accuracy_reward": 0.17038690770277753, |
|
"step": 40 |
|
}, |
|
{ |
|
"completion_length": 806.7745704650879, |
|
"epoch": 0.22953114065780267, |
|
"grad_norm": 0.02464163675904274, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.1443452417734079, |
|
"reward_std": 0.13860655017197132, |
|
"rewards/accuracy_reward": 0.1443452417734079, |
|
"step": 41 |
|
}, |
|
{ |
|
"completion_length": 797.2678718566895, |
|
"epoch": 0.2351294611616515, |
|
"grad_norm": 0.022209392860531807, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.1465773843228817, |
|
"reward_std": 0.1135863265953958, |
|
"rewards/accuracy_reward": 0.1465773843228817, |
|
"step": 42 |
|
}, |
|
{ |
|
"completion_length": 800.4032897949219, |
|
"epoch": 0.24072778166550035, |
|
"grad_norm": 0.027684131637215614, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.1502976210322231, |
|
"reward_std": 0.14116185018792748, |
|
"rewards/accuracy_reward": 0.1502976210322231, |
|
"step": 43 |
|
}, |
|
{ |
|
"completion_length": 802.7254638671875, |
|
"epoch": 0.2463261021693492, |
|
"grad_norm": 0.02655387856066227, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.16294643009314314, |
|
"reward_std": 0.11001868033781648, |
|
"rewards/accuracy_reward": 0.16294643009314314, |
|
"step": 44 |
|
}, |
|
{ |
|
"completion_length": 825.2031326293945, |
|
"epoch": 0.25192442267319803, |
|
"grad_norm": 0.02530819922685623, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.16889881389215589, |
|
"reward_std": 0.12393559236079454, |
|
"rewards/accuracy_reward": 0.16889881389215589, |
|
"step": 45 |
|
}, |
|
{ |
|
"completion_length": 809.6086502075195, |
|
"epoch": 0.2575227431770469, |
|
"grad_norm": 0.027012314647436142, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.1339285749127157, |
|
"reward_std": 0.12476211739704013, |
|
"rewards/accuracy_reward": 0.1339285749127157, |
|
"step": 46 |
|
}, |
|
{ |
|
"completion_length": 822.8742713928223, |
|
"epoch": 0.2631210636808957, |
|
"grad_norm": 0.027949512004852295, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.1458333374466747, |
|
"reward_std": 0.12774629099294543, |
|
"rewards/accuracy_reward": 0.1458333374466747, |
|
"step": 47 |
|
}, |
|
{ |
|
"completion_length": 830.1458473205566, |
|
"epoch": 0.26871938418474456, |
|
"grad_norm": 0.040896009653806686, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.09077381109818816, |
|
"reward_std": 0.09958610264584422, |
|
"rewards/accuracy_reward": 0.09077381109818816, |
|
"step": 48 |
|
}, |
|
{ |
|
"completion_length": 858.4375228881836, |
|
"epoch": 0.2743177046885934, |
|
"grad_norm": 0.025837421417236328, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.12276786024449393, |
|
"reward_std": 0.10703055281192064, |
|
"rewards/accuracy_reward": 0.12276786024449393, |
|
"step": 49 |
|
}, |
|
{ |
|
"completion_length": 837.8504638671875, |
|
"epoch": 0.27991602519244224, |
|
"grad_norm": 0.029402956366539, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.10639881080714986, |
|
"reward_std": 0.09327335562556982, |
|
"rewards/accuracy_reward": 0.10639881080714986, |
|
"step": 50 |
|
}, |
|
{ |
|
"completion_length": 795.3244209289551, |
|
"epoch": 0.28551434569629114, |
|
"grad_norm": 0.035774268209934235, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.1502976214978844, |
|
"reward_std": 0.15580066060647368, |
|
"rewards/accuracy_reward": 0.1502976214978844, |
|
"step": 51 |
|
}, |
|
{ |
|
"completion_length": 804.2485198974609, |
|
"epoch": 0.29111266620014, |
|
"grad_norm": 0.028347313404083252, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.13764881319366395, |
|
"reward_std": 0.10927183693274856, |
|
"rewards/accuracy_reward": 0.13764881319366395, |
|
"step": 52 |
|
}, |
|
{ |
|
"completion_length": 806.589298248291, |
|
"epoch": 0.2967109867039888, |
|
"grad_norm": 0.04255390539765358, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.16815476428018883, |
|
"reward_std": 0.11745391692966223, |
|
"rewards/accuracy_reward": 0.16815476428018883, |
|
"step": 53 |
|
}, |
|
{ |
|
"completion_length": 827.7753105163574, |
|
"epoch": 0.30230930720783766, |
|
"grad_norm": 0.028443966060876846, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.13690476393094286, |
|
"reward_std": 0.09341412922367454, |
|
"rewards/accuracy_reward": 0.13690476393094286, |
|
"step": 54 |
|
}, |
|
{ |
|
"completion_length": 793.2686157226562, |
|
"epoch": 0.3079076277116865, |
|
"grad_norm": 0.06588064879179001, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.14434524159878492, |
|
"reward_std": 0.14150456665083766, |
|
"rewards/accuracy_reward": 0.14434524159878492, |
|
"step": 55 |
|
}, |
|
{ |
|
"completion_length": 808.3698043823242, |
|
"epoch": 0.31350594821553535, |
|
"grad_norm": 0.07028539478778839, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.15476190706249326, |
|
"reward_std": 0.08559148758649826, |
|
"rewards/accuracy_reward": 0.15476190706249326, |
|
"step": 56 |
|
}, |
|
{ |
|
"completion_length": 750.5506172180176, |
|
"epoch": 0.3191042687193842, |
|
"grad_norm": 0.20404821634292603, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.25000000395812094, |
|
"reward_std": 0.15004885476082563, |
|
"rewards/accuracy_reward": 0.25000000395812094, |
|
"step": 57 |
|
}, |
|
{ |
|
"completion_length": 802.5141525268555, |
|
"epoch": 0.32470258922323303, |
|
"grad_norm": 0.30963581800460815, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.15848214778816327, |
|
"reward_std": 0.13095595594495535, |
|
"rewards/accuracy_reward": 0.15848214778816327, |
|
"step": 58 |
|
}, |
|
{ |
|
"completion_length": 817.5855865478516, |
|
"epoch": 0.33030090972708187, |
|
"grad_norm": 0.4757382571697235, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.08258928690338507, |
|
"reward_std": 0.09397146827541292, |
|
"rewards/accuracy_reward": 0.08258928690338507, |
|
"step": 59 |
|
}, |
|
{ |
|
"completion_length": 780.9456977844238, |
|
"epoch": 0.3358992302309307, |
|
"grad_norm": 0.9979881644248962, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.14136905007762834, |
|
"reward_std": 0.10418214229866862, |
|
"rewards/accuracy_reward": 0.14136905007762834, |
|
"step": 60 |
|
}, |
|
{ |
|
"completion_length": 806.7462959289551, |
|
"epoch": 0.34149755073477955, |
|
"grad_norm": 2.2850308418273926, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.16741071856813505, |
|
"reward_std": 0.12904326571151614, |
|
"rewards/accuracy_reward": 0.16741071856813505, |
|
"step": 61 |
|
}, |
|
{ |
|
"completion_length": 796.9077606201172, |
|
"epoch": 0.3470958712386284, |
|
"grad_norm": 3.0867838859558105, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.1555059568490833, |
|
"reward_std": 0.1193866366520524, |
|
"rewards/accuracy_reward": 0.1555059568490833, |
|
"step": 62 |
|
}, |
|
{ |
|
"completion_length": 815.139892578125, |
|
"epoch": 0.35269419174247724, |
|
"grad_norm": 9.1638822555542, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.12425595556851476, |
|
"reward_std": 0.11769626522436738, |
|
"rewards/accuracy_reward": 0.12425595556851476, |
|
"step": 63 |
|
}, |
|
{ |
|
"completion_length": 811.334831237793, |
|
"epoch": 0.3582925122463261, |
|
"grad_norm": 9.835746765136719, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.13467262114863843, |
|
"reward_std": 0.10124374250881374, |
|
"rewards/accuracy_reward": 0.13467262114863843, |
|
"step": 64 |
|
}, |
|
{ |
|
"completion_length": 816.058048248291, |
|
"epoch": 0.363890832750175, |
|
"grad_norm": 12.197829246520996, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.13467262114863843, |
|
"reward_std": 0.11615834524855018, |
|
"rewards/accuracy_reward": 0.13467262114863843, |
|
"step": 65 |
|
}, |
|
{ |
|
"completion_length": 778.9769439697266, |
|
"epoch": 0.3694891532540238, |
|
"grad_norm": 17.09342384338379, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.1324404794140719, |
|
"reward_std": 0.1296369112096727, |
|
"rewards/accuracy_reward": 0.1324404794140719, |
|
"step": 66 |
|
}, |
|
{ |
|
"completion_length": 791.2901954650879, |
|
"epoch": 0.37508747375787266, |
|
"grad_norm": 5.410295009613037, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.10342262109043077, |
|
"reward_std": 0.10896145971491933, |
|
"rewards/accuracy_reward": 0.10342262109043077, |
|
"step": 67 |
|
}, |
|
{ |
|
"completion_length": 745.5297737121582, |
|
"epoch": 0.3806857942617215, |
|
"grad_norm": 13.46410846710205, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.21428571629803628, |
|
"reward_std": 0.14342379802837968, |
|
"rewards/accuracy_reward": 0.21428571629803628, |
|
"step": 68 |
|
}, |
|
{ |
|
"completion_length": 776.0439109802246, |
|
"epoch": 0.38628411476557034, |
|
"grad_norm": 8.488734245300293, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.11830357467988506, |
|
"reward_std": 0.11542791035026312, |
|
"rewards/accuracy_reward": 0.11830357467988506, |
|
"step": 69 |
|
}, |
|
{ |
|
"completion_length": 797.398078918457, |
|
"epoch": 0.3918824352694192, |
|
"grad_norm": 8.824621200561523, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.11383928917348385, |
|
"reward_std": 0.09247074788436294, |
|
"rewards/accuracy_reward": 0.11383928917348385, |
|
"step": 70 |
|
}, |
|
{ |
|
"completion_length": 765.494800567627, |
|
"epoch": 0.397480755773268, |
|
"grad_norm": 7.611891746520996, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.15178571932483464, |
|
"reward_std": 0.10585443489253521, |
|
"rewards/accuracy_reward": 0.15178571932483464, |
|
"step": 71 |
|
}, |
|
{ |
|
"completion_length": 778.1890029907227, |
|
"epoch": 0.40307907627711687, |
|
"grad_norm": 14.848072052001953, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.18452381214592606, |
|
"reward_std": 0.16349461674690247, |
|
"rewards/accuracy_reward": 0.18452381214592606, |
|
"step": 72 |
|
}, |
|
{ |
|
"completion_length": 774.4456939697266, |
|
"epoch": 0.4086773967809657, |
|
"grad_norm": 5.173851490020752, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.20014881325187162, |
|
"reward_std": 0.12936217105016112, |
|
"rewards/accuracy_reward": 0.20014881325187162, |
|
"step": 73 |
|
}, |
|
{ |
|
"completion_length": 841.1287307739258, |
|
"epoch": 0.41427571728481455, |
|
"grad_norm": 2.6559793949127197, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.13839285890571773, |
|
"reward_std": 0.13339613983407617, |
|
"rewards/accuracy_reward": 0.13839285890571773, |
|
"step": 74 |
|
}, |
|
{ |
|
"completion_length": 818.7284393310547, |
|
"epoch": 0.4198740377886634, |
|
"grad_norm": 1.2601547241210938, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.15699405129998922, |
|
"reward_std": 0.147474380210042, |
|
"rewards/accuracy_reward": 0.15699405129998922, |
|
"step": 75 |
|
}, |
|
{ |
|
"completion_length": 806.4762115478516, |
|
"epoch": 0.42547235829251223, |
|
"grad_norm": 0.6955339312553406, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.14434524072567, |
|
"reward_std": 0.11006892891600728, |
|
"rewards/accuracy_reward": 0.14434524072567, |
|
"step": 76 |
|
}, |
|
{ |
|
"completion_length": 829.7209968566895, |
|
"epoch": 0.4310706787963611, |
|
"grad_norm": 0.3638227880001068, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.16294643277069554, |
|
"reward_std": 0.12348859198391438, |
|
"rewards/accuracy_reward": 0.16294643277069554, |
|
"step": 77 |
|
}, |
|
{ |
|
"completion_length": 825.5610313415527, |
|
"epoch": 0.4366689993002099, |
|
"grad_norm": 0.26862311363220215, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.1272321459837258, |
|
"reward_std": 0.10739197209477425, |
|
"rewards/accuracy_reward": 0.1272321459837258, |
|
"step": 78 |
|
}, |
|
{ |
|
"completion_length": 789.9360198974609, |
|
"epoch": 0.44226731980405876, |
|
"grad_norm": 0.2580260634422302, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.16964286100119352, |
|
"reward_std": 0.12814528262242675, |
|
"rewards/accuracy_reward": 0.16964286100119352, |
|
"step": 79 |
|
}, |
|
{ |
|
"completion_length": 812.0885543823242, |
|
"epoch": 0.44786564030790765, |
|
"grad_norm": 0.21123050153255463, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.15252976340707392, |
|
"reward_std": 0.11390745500102639, |
|
"rewards/accuracy_reward": 0.15252976340707392, |
|
"step": 80 |
|
}, |
|
{ |
|
"completion_length": 827.6890068054199, |
|
"epoch": 0.4534639608117565, |
|
"grad_norm": 0.21955974400043488, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.13392857264261693, |
|
"reward_std": 0.11876308592036366, |
|
"rewards/accuracy_reward": 0.13392857264261693, |
|
"step": 81 |
|
}, |
|
{ |
|
"completion_length": 790.8415336608887, |
|
"epoch": 0.45906228131560534, |
|
"grad_norm": 0.15459412336349487, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.21205357508733869, |
|
"reward_std": 0.1082809790968895, |
|
"rewards/accuracy_reward": 0.21205357508733869, |
|
"step": 82 |
|
}, |
|
{ |
|
"completion_length": 799.1346817016602, |
|
"epoch": 0.4646606018194542, |
|
"grad_norm": 0.23524288833141327, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.1569940506014973, |
|
"reward_std": 0.11971638561226428, |
|
"rewards/accuracy_reward": 0.1569940506014973, |
|
"step": 83 |
|
}, |
|
{ |
|
"completion_length": 757.0647468566895, |
|
"epoch": 0.470258922323303, |
|
"grad_norm": 0.19839145243167877, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.2038690508925356, |
|
"reward_std": 0.13258925126865506, |
|
"rewards/accuracy_reward": 0.2038690508925356, |
|
"step": 84 |
|
}, |
|
{ |
|
"completion_length": 808.5535926818848, |
|
"epoch": 0.47585724282715186, |
|
"grad_norm": 0.32531610131263733, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.1666666684905067, |
|
"reward_std": 0.13159588165581226, |
|
"rewards/accuracy_reward": 0.1666666684905067, |
|
"step": 85 |
|
}, |
|
{ |
|
"completion_length": 813.6451110839844, |
|
"epoch": 0.4814555633310007, |
|
"grad_norm": 0.18594929575920105, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.144345240900293, |
|
"reward_std": 0.1254395372234285, |
|
"rewards/accuracy_reward": 0.144345240900293, |
|
"step": 86 |
|
}, |
|
{ |
|
"completion_length": 774.9620704650879, |
|
"epoch": 0.48705388383484954, |
|
"grad_norm": 0.18389521539211273, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.19270833674818277, |
|
"reward_std": 0.15590714011341333, |
|
"rewards/accuracy_reward": 0.19270833674818277, |
|
"step": 87 |
|
}, |
|
{ |
|
"completion_length": 769.0573043823242, |
|
"epoch": 0.4926522043386984, |
|
"grad_norm": 0.2218749225139618, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.2105654807528481, |
|
"reward_std": 0.12664200831204653, |
|
"rewards/accuracy_reward": 0.2105654807528481, |
|
"step": 88 |
|
}, |
|
{ |
|
"completion_length": 792.6644401550293, |
|
"epoch": 0.4982505248425472, |
|
"grad_norm": 1.4022585153579712, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.15476190886693075, |
|
"reward_std": 0.11095877178013325, |
|
"rewards/accuracy_reward": 0.15476190886693075, |
|
"step": 89 |
|
}, |
|
{ |
|
"completion_length": 782.799861907959, |
|
"epoch": 0.5038488453463961, |
|
"grad_norm": 28.66908073425293, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.15104167041135952, |
|
"reward_std": 0.15992113715037704, |
|
"rewards/accuracy_reward": 0.15104167041135952, |
|
"step": 90 |
|
}, |
|
{ |
|
"completion_length": 795.1547698974609, |
|
"epoch": 0.509447165850245, |
|
"grad_norm": 8.09515380859375, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.1569940495537594, |
|
"reward_std": 0.12339623901061714, |
|
"rewards/accuracy_reward": 0.1569940495537594, |
|
"step": 91 |
|
}, |
|
{ |
|
"completion_length": 823.6614723205566, |
|
"epoch": 0.5150454863540938, |
|
"grad_norm": 4.843145847320557, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.1197916705859825, |
|
"reward_std": 0.11639799829572439, |
|
"rewards/accuracy_reward": 0.1197916705859825, |
|
"step": 92 |
|
}, |
|
{ |
|
"completion_length": 814.1956977844238, |
|
"epoch": 0.5206438068579426, |
|
"grad_norm": 3.3891513347625732, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.12276785861467943, |
|
"reward_std": 0.11077928310260177, |
|
"rewards/accuracy_reward": 0.12276785861467943, |
|
"step": 93 |
|
}, |
|
{ |
|
"completion_length": 832.5439147949219, |
|
"epoch": 0.5262421273617914, |
|
"grad_norm": 3.9651668071746826, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.12797619338380173, |
|
"reward_std": 0.13058934407308698, |
|
"rewards/accuracy_reward": 0.12797619338380173, |
|
"step": 94 |
|
}, |
|
{ |
|
"completion_length": 845.0171318054199, |
|
"epoch": 0.5318404478656403, |
|
"grad_norm": 2.0770602226257324, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.11235119315097108, |
|
"reward_std": 0.08664647908881307, |
|
"rewards/accuracy_reward": 0.11235119315097108, |
|
"step": 95 |
|
}, |
|
{ |
|
"completion_length": 790.6480865478516, |
|
"epoch": 0.5374387683694891, |
|
"grad_norm": 1.959823727607727, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.12351190787740052, |
|
"reward_std": 0.10759956203401089, |
|
"rewards/accuracy_reward": 0.12351190787740052, |
|
"step": 96 |
|
}, |
|
{ |
|
"completion_length": 816.3586463928223, |
|
"epoch": 0.543037088873338, |
|
"grad_norm": 1.8126938343048096, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.1688988117966801, |
|
"reward_std": 0.13670154195278883, |
|
"rewards/accuracy_reward": 0.1688988117966801, |
|
"step": 97 |
|
}, |
|
{ |
|
"completion_length": 787.2901878356934, |
|
"epoch": 0.5486354093771868, |
|
"grad_norm": 1.0017977952957153, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.15104166907258332, |
|
"reward_std": 0.12999275140464306, |
|
"rewards/accuracy_reward": 0.15104166907258332, |
|
"step": 98 |
|
}, |
|
{ |
|
"completion_length": 755.874267578125, |
|
"epoch": 0.5542337298810357, |
|
"grad_norm": 1.0542266368865967, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.24479167215758935, |
|
"reward_std": 0.15221792878583074, |
|
"rewards/accuracy_reward": 0.24479167215758935, |
|
"step": 99 |
|
}, |
|
{ |
|
"completion_length": 791.4122161865234, |
|
"epoch": 0.5598320503848845, |
|
"grad_norm": 0.7178149223327637, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.17038690840126947, |
|
"reward_std": 0.13499723048880696, |
|
"rewards/accuracy_reward": 0.17038690840126947, |
|
"step": 100 |
|
}, |
|
{ |
|
"completion_length": 744.4151840209961, |
|
"epoch": 0.5654303708887334, |
|
"grad_norm": 0.7150797843933105, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.20610119280172512, |
|
"reward_std": 0.1412361622788012, |
|
"rewards/accuracy_reward": 0.20610119280172512, |
|
"step": 101 |
|
}, |
|
{ |
|
"completion_length": 789.238109588623, |
|
"epoch": 0.5710286913925823, |
|
"grad_norm": 0.9325894117355347, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.16443452634848654, |
|
"reward_std": 0.11632257583551109, |
|
"rewards/accuracy_reward": 0.16443452634848654, |
|
"step": 102 |
|
}, |
|
{ |
|
"completion_length": 768.8869132995605, |
|
"epoch": 0.5766270118964311, |
|
"grad_norm": 7.234724998474121, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.125744050310459, |
|
"reward_std": 0.10205298336222768, |
|
"rewards/accuracy_reward": 0.125744050310459, |
|
"step": 103 |
|
}, |
|
{ |
|
"completion_length": 751.9308242797852, |
|
"epoch": 0.58222533240028, |
|
"grad_norm": 3.879939079284668, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.1755952414823696, |
|
"reward_std": 0.12294519320130348, |
|
"rewards/accuracy_reward": 0.1755952414823696, |
|
"step": 104 |
|
}, |
|
{ |
|
"completion_length": 627.394359588623, |
|
"epoch": 0.5878236529041287, |
|
"grad_norm": 3.7597320079803467, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.2224702457897365, |
|
"reward_std": 0.15529246046207845, |
|
"rewards/accuracy_reward": 0.2224702457897365, |
|
"step": 105 |
|
}, |
|
{ |
|
"completion_length": 617.4479293823242, |
|
"epoch": 0.5934219734079776, |
|
"grad_norm": 1.7537007331848145, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.2157738134264946, |
|
"reward_std": 0.14103552885353565, |
|
"rewards/accuracy_reward": 0.2157738134264946, |
|
"step": 106 |
|
}, |
|
{ |
|
"completion_length": 654.3489608764648, |
|
"epoch": 0.5990202939118264, |
|
"grad_norm": 1.367697834968567, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.17485119379125535, |
|
"reward_std": 0.14419334661215544, |
|
"rewards/accuracy_reward": 0.17485119379125535, |
|
"step": 107 |
|
}, |
|
{ |
|
"completion_length": 674.4724807739258, |
|
"epoch": 0.6046186144156753, |
|
"grad_norm": 0.4473717212677002, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.17857143364381045, |
|
"reward_std": 0.11048215441405773, |
|
"rewards/accuracy_reward": 0.17857143364381045, |
|
"step": 108 |
|
}, |
|
{ |
|
"completion_length": 725.7738227844238, |
|
"epoch": 0.6102169349195241, |
|
"grad_norm": 3.6793627738952637, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.14806547824991867, |
|
"reward_std": 0.08863410213962197, |
|
"rewards/accuracy_reward": 0.14806547824991867, |
|
"step": 109 |
|
}, |
|
{ |
|
"completion_length": 677.5327529907227, |
|
"epoch": 0.615815255423373, |
|
"grad_norm": 1.7502440214157104, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.24627976631745696, |
|
"reward_std": 0.16675466718152165, |
|
"rewards/accuracy_reward": 0.24627976631745696, |
|
"step": 110 |
|
}, |
|
{ |
|
"completion_length": 738.4843883514404, |
|
"epoch": 0.6214135759272218, |
|
"grad_norm": 5.396337509155273, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.17485119419870898, |
|
"reward_std": 0.1276370887644589, |
|
"rewards/accuracy_reward": 0.17485119419870898, |
|
"step": 111 |
|
}, |
|
{ |
|
"completion_length": 669.8638534545898, |
|
"epoch": 0.6270118964310707, |
|
"grad_norm": 13.67143726348877, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.21726190828485414, |
|
"reward_std": 0.13963406695984304, |
|
"rewards/accuracy_reward": 0.21726190828485414, |
|
"step": 112 |
|
}, |
|
{ |
|
"completion_length": 665.2529907226562, |
|
"epoch": 0.6326102169349195, |
|
"grad_norm": 44.409034729003906, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.19940476591000333, |
|
"reward_std": 0.14031004393473268, |
|
"rewards/accuracy_reward": 0.19940476591000333, |
|
"step": 113 |
|
}, |
|
{ |
|
"completion_length": 632.8326091766357, |
|
"epoch": 0.6382085374387684, |
|
"grad_norm": 10.954506874084473, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.2090773865347728, |
|
"reward_std": 0.13506855070590973, |
|
"rewards/accuracy_reward": 0.2090773865347728, |
|
"step": 114 |
|
}, |
|
{ |
|
"completion_length": 684.7834930419922, |
|
"epoch": 0.6438068579426172, |
|
"grad_norm": 3.232715606689453, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.18601190974004567, |
|
"reward_std": 0.14560958743095398, |
|
"rewards/accuracy_reward": 0.18601190974004567, |
|
"step": 115 |
|
}, |
|
{ |
|
"completion_length": 707.04465675354, |
|
"epoch": 0.6494051784464661, |
|
"grad_norm": 0.662769079208374, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.21875000512227416, |
|
"reward_std": 0.1376757239922881, |
|
"rewards/accuracy_reward": 0.21875000512227416, |
|
"step": 116 |
|
}, |
|
{ |
|
"completion_length": 669.0446586608887, |
|
"epoch": 0.655003498950315, |
|
"grad_norm": 0.33869776129722595, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.21056548063643277, |
|
"reward_std": 0.14122196286916733, |
|
"rewards/accuracy_reward": 0.21056548063643277, |
|
"step": 117 |
|
}, |
|
{ |
|
"completion_length": 684.3393020629883, |
|
"epoch": 0.6606018194541637, |
|
"grad_norm": 0.7248935103416443, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.1614583363989368, |
|
"reward_std": 0.11945095984265208, |
|
"rewards/accuracy_reward": 0.1614583363989368, |
|
"step": 118 |
|
}, |
|
{ |
|
"completion_length": 666.3139991760254, |
|
"epoch": 0.6662001399580126, |
|
"grad_norm": 43.89714431762695, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.22470238665118814, |
|
"reward_std": 0.10662061581388116, |
|
"rewards/accuracy_reward": 0.22470238665118814, |
|
"step": 119 |
|
}, |
|
{ |
|
"completion_length": 679.056568145752, |
|
"epoch": 0.6717984604618614, |
|
"grad_norm": 8.356165885925293, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.21279762336052954, |
|
"reward_std": 0.124493746785447, |
|
"rewards/accuracy_reward": 0.21279762336052954, |
|
"step": 120 |
|
}, |
|
{ |
|
"completion_length": 687.572925567627, |
|
"epoch": 0.6773967809657103, |
|
"grad_norm": 7.832132816314697, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.18750000436557457, |
|
"reward_std": 0.14207694586366415, |
|
"rewards/accuracy_reward": 0.18750000436557457, |
|
"step": 121 |
|
}, |
|
{ |
|
"completion_length": 689.9590873718262, |
|
"epoch": 0.6829951014695591, |
|
"grad_norm": 2.498194932937622, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.15773809858364984, |
|
"reward_std": 0.10648334585130215, |
|
"rewards/accuracy_reward": 0.15773809858364984, |
|
"step": 122 |
|
}, |
|
{ |
|
"completion_length": 704.2939109802246, |
|
"epoch": 0.688593421973408, |
|
"grad_norm": 6.899082183837891, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.18377976468764246, |
|
"reward_std": 0.1484173396602273, |
|
"rewards/accuracy_reward": 0.18377976468764246, |
|
"step": 123 |
|
}, |
|
{ |
|
"completion_length": 651.5878105163574, |
|
"epoch": 0.6941917424772568, |
|
"grad_norm": 7.5064544677734375, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.2500000016298145, |
|
"reward_std": 0.16986942291259766, |
|
"rewards/accuracy_reward": 0.2500000016298145, |
|
"step": 124 |
|
}, |
|
{ |
|
"completion_length": 695.3638496398926, |
|
"epoch": 0.6997900629811057, |
|
"grad_norm": 4.8156867027282715, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.20238095673266798, |
|
"reward_std": 0.13348530931398273, |
|
"rewards/accuracy_reward": 0.20238095673266798, |
|
"step": 125 |
|
}, |
|
{ |
|
"completion_length": 702.0327548980713, |
|
"epoch": 0.7053883834849545, |
|
"grad_norm": 7.651693344116211, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.21875000500585884, |
|
"reward_std": 0.16874144971370697, |
|
"rewards/accuracy_reward": 0.21875000500585884, |
|
"step": 126 |
|
}, |
|
{ |
|
"completion_length": 671.8259048461914, |
|
"epoch": 0.7109867039888034, |
|
"grad_norm": 6.942368507385254, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.24330357555299997, |
|
"reward_std": 0.1387807228602469, |
|
"rewards/accuracy_reward": 0.24330357555299997, |
|
"step": 127 |
|
}, |
|
{ |
|
"completion_length": 694.0915260314941, |
|
"epoch": 0.7165850244926522, |
|
"grad_norm": 4.375088691711426, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.20833333634072915, |
|
"reward_std": 0.12121187802404165, |
|
"rewards/accuracy_reward": 0.20833333634072915, |
|
"step": 128 |
|
}, |
|
{ |
|
"completion_length": 703.304328918457, |
|
"epoch": 0.722183344996501, |
|
"grad_norm": 3.8942065238952637, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.1785714317811653, |
|
"reward_std": 0.1311384318396449, |
|
"rewards/accuracy_reward": 0.1785714317811653, |
|
"step": 129 |
|
}, |
|
{ |
|
"completion_length": 681.0907897949219, |
|
"epoch": 0.72778166550035, |
|
"grad_norm": 2.7340543270111084, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.1979166711680591, |
|
"reward_std": 0.12608638824895024, |
|
"rewards/accuracy_reward": 0.1979166711680591, |
|
"step": 130 |
|
}, |
|
{ |
|
"completion_length": 695.2366180419922, |
|
"epoch": 0.7333799860041987, |
|
"grad_norm": 1.5587713718414307, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.15922619513003156, |
|
"reward_std": 0.11638409737497568, |
|
"rewards/accuracy_reward": 0.15922619513003156, |
|
"step": 131 |
|
}, |
|
{ |
|
"completion_length": 709.5788822174072, |
|
"epoch": 0.7389783065080476, |
|
"grad_norm": 3.3193564414978027, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.159226194024086, |
|
"reward_std": 0.13807840831577778, |
|
"rewards/accuracy_reward": 0.159226194024086, |
|
"step": 132 |
|
}, |
|
{ |
|
"completion_length": 698.4427185058594, |
|
"epoch": 0.7445766270118964, |
|
"grad_norm": 1.9349188804626465, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.19717262242920697, |
|
"reward_std": 0.12813474284484982, |
|
"rewards/accuracy_reward": 0.19717262242920697, |
|
"step": 133 |
|
}, |
|
{ |
|
"completion_length": 688.2210006713867, |
|
"epoch": 0.7501749475157453, |
|
"grad_norm": 2.0050582885742188, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.20758929062867537, |
|
"reward_std": 0.1788321421481669, |
|
"rewards/accuracy_reward": 0.20758929062867537, |
|
"step": 134 |
|
}, |
|
{ |
|
"completion_length": 696.2515087127686, |
|
"epoch": 0.7557732680195941, |
|
"grad_norm": 0.6444657444953918, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.19642857677536085, |
|
"reward_std": 0.12122054304927588, |
|
"rewards/accuracy_reward": 0.19642857677536085, |
|
"step": 135 |
|
}, |
|
{ |
|
"completion_length": 694.3489761352539, |
|
"epoch": 0.761371588523443, |
|
"grad_norm": 0.5755271315574646, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.2113095311797224, |
|
"reward_std": 0.15177863789722323, |
|
"rewards/accuracy_reward": 0.2113095311797224, |
|
"step": 136 |
|
}, |
|
{ |
|
"completion_length": 703.0320091247559, |
|
"epoch": 0.7669699090272918, |
|
"grad_norm": 0.382269024848938, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.19419643247965723, |
|
"reward_std": 0.11693665431812406, |
|
"rewards/accuracy_reward": 0.19419643247965723, |
|
"step": 137 |
|
}, |
|
{ |
|
"completion_length": 705.675615310669, |
|
"epoch": 0.7725682295311407, |
|
"grad_norm": 0.23526860773563385, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.17187500471482053, |
|
"reward_std": 0.12707782164216042, |
|
"rewards/accuracy_reward": 0.17187500471482053, |
|
"step": 138 |
|
}, |
|
{ |
|
"completion_length": 673.7269477844238, |
|
"epoch": 0.7781665500349895, |
|
"grad_norm": 0.22726382315158844, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.2581845277454704, |
|
"reward_std": 0.1626861086115241, |
|
"rewards/accuracy_reward": 0.2581845277454704, |
|
"step": 139 |
|
}, |
|
{ |
|
"completion_length": 701.4620704650879, |
|
"epoch": 0.7837648705388384, |
|
"grad_norm": 0.19594760239124298, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.21726190811023116, |
|
"reward_std": 0.15826332941651344, |
|
"rewards/accuracy_reward": 0.21726190811023116, |
|
"step": 140 |
|
}, |
|
{ |
|
"completion_length": 726.7105808258057, |
|
"epoch": 0.7893631910426872, |
|
"grad_norm": 0.21776525676250458, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.19122024084208533, |
|
"reward_std": 0.13169277971610427, |
|
"rewards/accuracy_reward": 0.19122024084208533, |
|
"step": 141 |
|
}, |
|
{ |
|
"completion_length": 709.7366180419922, |
|
"epoch": 0.794961511546536, |
|
"grad_norm": 0.17518886923789978, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.19642857578583062, |
|
"reward_std": 0.13794927392154932, |
|
"rewards/accuracy_reward": 0.19642857578583062, |
|
"step": 142 |
|
}, |
|
{ |
|
"completion_length": 700.7433128356934, |
|
"epoch": 0.8005598320503848, |
|
"grad_norm": 0.1962345391511917, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.2261904808692634, |
|
"reward_std": 0.17080192361027002, |
|
"rewards/accuracy_reward": 0.2261904808692634, |
|
"step": 143 |
|
}, |
|
{ |
|
"completion_length": 749.479175567627, |
|
"epoch": 0.8061581525542337, |
|
"grad_norm": 0.09322026371955872, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.14434524165699258, |
|
"reward_std": 0.07725285878404975, |
|
"rewards/accuracy_reward": 0.14434524165699258, |
|
"step": 144 |
|
}, |
|
{ |
|
"completion_length": 726.3467464447021, |
|
"epoch": 0.8117564730580826, |
|
"grad_norm": 0.1386028379201889, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.2031250053551048, |
|
"reward_std": 0.14093447849154472, |
|
"rewards/accuracy_reward": 0.2031250053551048, |
|
"step": 145 |
|
}, |
|
{ |
|
"completion_length": 738.7120704650879, |
|
"epoch": 0.8173547935619314, |
|
"grad_norm": 0.12930598855018616, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.17857143195578828, |
|
"reward_std": 0.12077544536441565, |
|
"rewards/accuracy_reward": 0.17857143195578828, |
|
"step": 146 |
|
}, |
|
{ |
|
"completion_length": 765.6473388671875, |
|
"epoch": 0.8229531140657803, |
|
"grad_norm": 0.13917317986488342, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.12425595498643816, |
|
"reward_std": 0.12115136627107859, |
|
"rewards/accuracy_reward": 0.12425595498643816, |
|
"step": 147 |
|
}, |
|
{ |
|
"completion_length": 741.0476341247559, |
|
"epoch": 0.8285514345696291, |
|
"grad_norm": 0.5330355763435364, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.18080357613507658, |
|
"reward_std": 0.12820188701152802, |
|
"rewards/accuracy_reward": 0.18080357613507658, |
|
"step": 148 |
|
}, |
|
{ |
|
"completion_length": 715.349723815918, |
|
"epoch": 0.834149755073478, |
|
"grad_norm": 0.1551419347524643, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.18452381377574056, |
|
"reward_std": 0.13737005554139614, |
|
"rewards/accuracy_reward": 0.18452381377574056, |
|
"step": 149 |
|
}, |
|
{ |
|
"completion_length": 748.0156364440918, |
|
"epoch": 0.8397480755773268, |
|
"grad_norm": 0.17706456780433655, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.19047619315097108, |
|
"reward_std": 0.10885073570534587, |
|
"rewards/accuracy_reward": 0.19047619315097108, |
|
"step": 150 |
|
}, |
|
{ |
|
"completion_length": 772.0535888671875, |
|
"epoch": 0.8453463960811757, |
|
"grad_norm": 0.3694287836551666, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.17336309852544218, |
|
"reward_std": 0.11179409665055573, |
|
"rewards/accuracy_reward": 0.17336309852544218, |
|
"step": 151 |
|
}, |
|
{ |
|
"completion_length": 774.1912307739258, |
|
"epoch": 0.8509447165850245, |
|
"grad_norm": 0.2840059697628021, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.15252976468764246, |
|
"reward_std": 0.11831123428419232, |
|
"rewards/accuracy_reward": 0.15252976468764246, |
|
"step": 152 |
|
}, |
|
{ |
|
"completion_length": 760.9918327331543, |
|
"epoch": 0.8565430370888734, |
|
"grad_norm": 0.2995321452617645, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.1778273859526962, |
|
"reward_std": 0.15920937061309814, |
|
"rewards/accuracy_reward": 0.1778273859526962, |
|
"step": 153 |
|
}, |
|
{ |
|
"completion_length": 763.8556671142578, |
|
"epoch": 0.8621413575927221, |
|
"grad_norm": 1.4188278913497925, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.1696428614668548, |
|
"reward_std": 0.12167660798877478, |
|
"rewards/accuracy_reward": 0.1696428614668548, |
|
"step": 154 |
|
}, |
|
{ |
|
"completion_length": 771.9032936096191, |
|
"epoch": 0.867739678096571, |
|
"grad_norm": 0.15566886961460114, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.20684524130774662, |
|
"reward_std": 0.13866402814164758, |
|
"rewards/accuracy_reward": 0.20684524130774662, |
|
"step": 155 |
|
}, |
|
{ |
|
"completion_length": 797.9323081970215, |
|
"epoch": 0.8733379986004198, |
|
"grad_norm": 0.21956497430801392, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.11979166796663776, |
|
"reward_std": 0.08592891087755561, |
|
"rewards/accuracy_reward": 0.11979166796663776, |
|
"step": 156 |
|
}, |
|
{ |
|
"completion_length": 803.9062576293945, |
|
"epoch": 0.8789363191042687, |
|
"grad_norm": 0.4193490743637085, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.14955357427243143, |
|
"reward_std": 0.13559656590223312, |
|
"rewards/accuracy_reward": 0.14955357427243143, |
|
"step": 157 |
|
}, |
|
{ |
|
"completion_length": 734.5416793823242, |
|
"epoch": 0.8845346396081175, |
|
"grad_norm": 0.32442227005958557, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.20312500384170562, |
|
"reward_std": 0.13238779548555613, |
|
"rewards/accuracy_reward": 0.20312500384170562, |
|
"step": 158 |
|
}, |
|
{ |
|
"completion_length": 780.0461502075195, |
|
"epoch": 0.8901329601119664, |
|
"grad_norm": 0.2237851321697235, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.15773809980601072, |
|
"reward_std": 0.11771008232608438, |
|
"rewards/accuracy_reward": 0.15773809980601072, |
|
"step": 159 |
|
}, |
|
{ |
|
"completion_length": 725.6488285064697, |
|
"epoch": 0.8957312806158153, |
|
"grad_norm": 0.20033074915409088, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.19122024055104703, |
|
"reward_std": 0.13583081704564393, |
|
"rewards/accuracy_reward": 0.19122024055104703, |
|
"step": 160 |
|
}, |
|
{ |
|
"completion_length": 790.0461349487305, |
|
"epoch": 0.9013296011196641, |
|
"grad_norm": 0.2317737340927124, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.19345238577807322, |
|
"reward_std": 0.17005419172346592, |
|
"rewards/accuracy_reward": 0.19345238577807322, |
|
"step": 161 |
|
}, |
|
{ |
|
"completion_length": 770.9166851043701, |
|
"epoch": 0.906927921623513, |
|
"grad_norm": 0.19784215092658997, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.16592262306949124, |
|
"reward_std": 0.12148957094177604, |
|
"rewards/accuracy_reward": 0.16592262306949124, |
|
"step": 162 |
|
}, |
|
{ |
|
"completion_length": 801.5565567016602, |
|
"epoch": 0.9125262421273618, |
|
"grad_norm": 0.16940854489803314, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.14806547743501142, |
|
"reward_std": 0.14629250299185514, |
|
"rewards/accuracy_reward": 0.14806547743501142, |
|
"step": 163 |
|
}, |
|
{ |
|
"completion_length": 758.389892578125, |
|
"epoch": 0.9181245626312107, |
|
"grad_norm": 0.251857727766037, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.1979166735545732, |
|
"reward_std": 0.13391391886398196, |
|
"rewards/accuracy_reward": 0.1979166735545732, |
|
"step": 164 |
|
}, |
|
{ |
|
"completion_length": 778.1622200012207, |
|
"epoch": 0.9237228831350595, |
|
"grad_norm": 0.2375626116991043, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.17038691049674526, |
|
"reward_std": 0.12663642317056656, |
|
"rewards/accuracy_reward": 0.17038691049674526, |
|
"step": 165 |
|
}, |
|
{ |
|
"completion_length": 788.4985237121582, |
|
"epoch": 0.9293212036389084, |
|
"grad_norm": 0.2811802327632904, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.1428571462747641, |
|
"reward_std": 0.09295102627947927, |
|
"rewards/accuracy_reward": 0.1428571462747641, |
|
"step": 166 |
|
}, |
|
{ |
|
"completion_length": 706.411470413208, |
|
"epoch": 0.9349195241427571, |
|
"grad_norm": 0.75251305103302, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.2306547665502876, |
|
"reward_std": 0.14719042740762234, |
|
"rewards/accuracy_reward": 0.2306547665502876, |
|
"step": 167 |
|
}, |
|
{ |
|
"completion_length": 744.7879638671875, |
|
"epoch": 0.940517844646606, |
|
"grad_norm": 0.21151037514209747, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.16964286006987095, |
|
"reward_std": 0.13533117901533842, |
|
"rewards/accuracy_reward": 0.16964286006987095, |
|
"step": 168 |
|
}, |
|
{ |
|
"completion_length": 734.4300689697266, |
|
"epoch": 0.9461161651504548, |
|
"grad_norm": 0.24678246676921844, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.17336309817619622, |
|
"reward_std": 0.14309520740061998, |
|
"rewards/accuracy_reward": 0.17336309817619622, |
|
"step": 169 |
|
}, |
|
{ |
|
"completion_length": 724.0357284545898, |
|
"epoch": 0.9517144856543037, |
|
"grad_norm": 0.36960089206695557, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.22023809846723452, |
|
"reward_std": 0.16868228651583195, |
|
"rewards/accuracy_reward": 0.22023809846723452, |
|
"step": 170 |
|
}, |
|
{ |
|
"completion_length": 681.9323043823242, |
|
"epoch": 0.9573128061581525, |
|
"grad_norm": 0.3980717062950134, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.25297619425691664, |
|
"reward_std": 0.1777943717315793, |
|
"rewards/accuracy_reward": 0.25297619425691664, |
|
"step": 171 |
|
}, |
|
{ |
|
"completion_length": 706.1183166503906, |
|
"epoch": 0.9629111266620014, |
|
"grad_norm": 0.1279979944229126, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.18601190811023116, |
|
"reward_std": 0.12339989701285958, |
|
"rewards/accuracy_reward": 0.18601190811023116, |
|
"step": 172 |
|
}, |
|
{ |
|
"completion_length": 696.6547718048096, |
|
"epoch": 0.9685094471658502, |
|
"grad_norm": 0.10279275476932526, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.17187500558793545, |
|
"reward_std": 0.1345887309871614, |
|
"rewards/accuracy_reward": 0.17187500558793545, |
|
"step": 173 |
|
}, |
|
{ |
|
"completion_length": 659.0766487121582, |
|
"epoch": 0.9741077676696991, |
|
"grad_norm": 0.14932937920093536, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.23511904943734407, |
|
"reward_std": 0.12750320974737406, |
|
"rewards/accuracy_reward": 0.23511904943734407, |
|
"step": 174 |
|
}, |
|
{ |
|
"completion_length": 710.5974884033203, |
|
"epoch": 0.979706088173548, |
|
"grad_norm": 0.15650224685668945, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.20089286181610078, |
|
"reward_std": 0.1395609532482922, |
|
"rewards/accuracy_reward": 0.20089286181610078, |
|
"step": 175 |
|
}, |
|
{ |
|
"completion_length": 752.3147430419922, |
|
"epoch": 0.9853044086773968, |
|
"grad_norm": 0.14169637858867645, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.1659226217889227, |
|
"reward_std": 0.14061464229598641, |
|
"rewards/accuracy_reward": 0.1659226217889227, |
|
"step": 176 |
|
}, |
|
{ |
|
"completion_length": 718.4464435577393, |
|
"epoch": 0.9909027291812457, |
|
"grad_norm": 0.1935679018497467, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.20907738618552685, |
|
"reward_std": 0.13396611297503114, |
|
"rewards/accuracy_reward": 0.20907738618552685, |
|
"step": 177 |
|
}, |
|
{ |
|
"completion_length": 753.9546356201172, |
|
"epoch": 0.9965010496850945, |
|
"grad_norm": 0.23016896843910217, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.1510416681994684, |
|
"reward_std": 0.11266338312998414, |
|
"rewards/accuracy_reward": 0.1510416681994684, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.9965010496850945, |
|
"step": 178, |
|
"total_flos": 0.0, |
|
"train_loss": 1.002405131310157e-08, |
|
"train_runtime": 79864.5906, |
|
"train_samples_per_second": 0.25, |
|
"train_steps_per_second": 0.002 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 178, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 10, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|