|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9965010496850945, |
|
"eval_steps": 100, |
|
"global_step": 178, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"completion_length": 548.2455501556396, |
|
"epoch": 0.005598320503848845, |
|
"grad_norm": 0.003676735097542405, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.03125000128056854, |
|
"reward_std": 0.035294653847813606, |
|
"rewards/accuracy_reward": 0.03125000128056854, |
|
"step": 1 |
|
}, |
|
{ |
|
"completion_length": 521.3355770111084, |
|
"epoch": 0.01119664100769769, |
|
"grad_norm": 0.001673316117376089, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.005952381179668009, |
|
"reward_std": 0.008138235658407211, |
|
"rewards/accuracy_reward": 0.005952381179668009, |
|
"step": 2 |
|
}, |
|
{ |
|
"completion_length": 565.3147449493408, |
|
"epoch": 0.016794961511546535, |
|
"grad_norm": 0.002517723012715578, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.009672619285993278, |
|
"reward_std": 0.01636804547160864, |
|
"rewards/accuracy_reward": 0.009672619285993278, |
|
"step": 3 |
|
}, |
|
{ |
|
"completion_length": 577.5029792785645, |
|
"epoch": 0.02239328201539538, |
|
"grad_norm": 0.003224034095183015, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.029761905316263437, |
|
"reward_std": 0.02953372267074883, |
|
"rewards/accuracy_reward": 0.029761905316263437, |
|
"step": 4 |
|
}, |
|
{ |
|
"completion_length": 564.710578918457, |
|
"epoch": 0.02799160251924423, |
|
"grad_norm": 0.0038144055288285017, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.020833333779592067, |
|
"reward_std": 0.022426264360547066, |
|
"rewards/accuracy_reward": 0.020833333779592067, |
|
"step": 5 |
|
}, |
|
{ |
|
"completion_length": 534.8861694335938, |
|
"epoch": 0.03358992302309307, |
|
"grad_norm": 0.003479737089946866, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.026785714842844754, |
|
"reward_std": 0.031758472323417664, |
|
"rewards/accuracy_reward": 0.026785714842844754, |
|
"step": 6 |
|
}, |
|
{ |
|
"completion_length": 580.7991256713867, |
|
"epoch": 0.03918824352694192, |
|
"grad_norm": 0.0025944788940250874, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.014880952483508736, |
|
"reward_std": 0.010090996511280537, |
|
"rewards/accuracy_reward": 0.014880952483508736, |
|
"step": 7 |
|
}, |
|
{ |
|
"completion_length": 515.3296279907227, |
|
"epoch": 0.04478656403079076, |
|
"grad_norm": 0.003000692930072546, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.021577381354290992, |
|
"reward_std": 0.019644177984446287, |
|
"rewards/accuracy_reward": 0.021577381354290992, |
|
"step": 8 |
|
}, |
|
{ |
|
"completion_length": 571.0290279388428, |
|
"epoch": 0.05038488453463961, |
|
"grad_norm": 0.003332852851599455, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.02901785750873387, |
|
"reward_std": 0.026531722396612167, |
|
"rewards/accuracy_reward": 0.02901785750873387, |
|
"step": 9 |
|
}, |
|
{ |
|
"completion_length": 580.1770935058594, |
|
"epoch": 0.05598320503848846, |
|
"grad_norm": 0.0018766584107652307, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.0074404762126505375, |
|
"reward_std": 0.003475441597402096, |
|
"rewards/accuracy_reward": 0.0074404762126505375, |
|
"step": 10 |
|
}, |
|
{ |
|
"completion_length": 576.3244209289551, |
|
"epoch": 0.0615815255423373, |
|
"grad_norm": 0.004447213374078274, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.03497023892123252, |
|
"reward_std": 0.03141464572399855, |
|
"rewards/accuracy_reward": 0.03497023892123252, |
|
"step": 11 |
|
}, |
|
{ |
|
"completion_length": 555.8616218566895, |
|
"epoch": 0.06717984604618614, |
|
"grad_norm": 0.003440001280978322, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.033482144062872976, |
|
"reward_std": 0.03174867480993271, |
|
"rewards/accuracy_reward": 0.033482144062872976, |
|
"step": 12 |
|
}, |
|
{ |
|
"completion_length": 555.1770973205566, |
|
"epoch": 0.072778166550035, |
|
"grad_norm": 0.004410188645124435, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.02604166732635349, |
|
"reward_std": 0.03677397267892957, |
|
"rewards/accuracy_reward": 0.02604166732635349, |
|
"step": 13 |
|
}, |
|
{ |
|
"completion_length": 582.8259029388428, |
|
"epoch": 0.07837648705388384, |
|
"grad_norm": 0.0032948441803455353, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.019345238630194217, |
|
"reward_std": 0.02020683465525508, |
|
"rewards/accuracy_reward": 0.019345238630194217, |
|
"step": 14 |
|
}, |
|
{ |
|
"completion_length": 573.2150402069092, |
|
"epoch": 0.08397480755773268, |
|
"grad_norm": 0.004153064452111721, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.02083333395421505, |
|
"reward_std": 0.024796947836875916, |
|
"rewards/accuracy_reward": 0.02083333395421505, |
|
"step": 15 |
|
}, |
|
{ |
|
"completion_length": 511.9151840209961, |
|
"epoch": 0.08957312806158152, |
|
"grad_norm": 0.0052322824485599995, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.04017857229337096, |
|
"reward_std": 0.04290085076354444, |
|
"rewards/accuracy_reward": 0.04017857229337096, |
|
"step": 16 |
|
}, |
|
{ |
|
"completion_length": 538.8735218048096, |
|
"epoch": 0.09517144856543037, |
|
"grad_norm": 0.005073050037026405, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.0349702388048172, |
|
"reward_std": 0.03157654171809554, |
|
"rewards/accuracy_reward": 0.0349702388048172, |
|
"step": 17 |
|
}, |
|
{ |
|
"completion_length": 594.4494113922119, |
|
"epoch": 0.10076976906927922, |
|
"grad_norm": 0.004708003718405962, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.030505953240208328, |
|
"reward_std": 0.035523281432688236, |
|
"rewards/accuracy_reward": 0.030505953240208328, |
|
"step": 18 |
|
}, |
|
{ |
|
"completion_length": 557.1242733001709, |
|
"epoch": 0.10636808957312806, |
|
"grad_norm": 0.0028041391633450985, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.02827381028328091, |
|
"reward_std": 0.01928615104407072, |
|
"rewards/accuracy_reward": 0.02827381028328091, |
|
"step": 19 |
|
}, |
|
{ |
|
"completion_length": 560.6994209289551, |
|
"epoch": 0.11196641007697691, |
|
"grad_norm": 0.004033979959785938, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.029761905258055776, |
|
"reward_std": 0.03157439874485135, |
|
"rewards/accuracy_reward": 0.029761905258055776, |
|
"step": 20 |
|
}, |
|
{ |
|
"completion_length": 565.3564147949219, |
|
"epoch": 0.11756473058082575, |
|
"grad_norm": 0.0020209557842463255, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.004464285797439516, |
|
"reward_std": 0.00787156680598855, |
|
"rewards/accuracy_reward": 0.004464285797439516, |
|
"step": 21 |
|
}, |
|
{ |
|
"completion_length": 618.4695091247559, |
|
"epoch": 0.1231630510846746, |
|
"grad_norm": 0.002670794492587447, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.021577381703536958, |
|
"reward_std": 0.01789072621613741, |
|
"rewards/accuracy_reward": 0.021577381703536958, |
|
"step": 22 |
|
}, |
|
{ |
|
"completion_length": 584.6480770111084, |
|
"epoch": 0.12876137158852344, |
|
"grad_norm": 0.005253300536423922, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.03720238187815994, |
|
"reward_std": 0.03154852241277695, |
|
"rewards/accuracy_reward": 0.03720238187815994, |
|
"step": 23 |
|
}, |
|
{ |
|
"completion_length": 581.5640029907227, |
|
"epoch": 0.13435969209237228, |
|
"grad_norm": 0.0021275475155562162, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.005952381179668009, |
|
"reward_std": 0.008138235658407211, |
|
"rewards/accuracy_reward": 0.005952381179668009, |
|
"step": 24 |
|
}, |
|
{ |
|
"completion_length": 545.2701072692871, |
|
"epoch": 0.13995801259622112, |
|
"grad_norm": 0.002449671970680356, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.015625000465661287, |
|
"reward_std": 0.017629378009587526, |
|
"rewards/accuracy_reward": 0.015625000465661287, |
|
"step": 25 |
|
}, |
|
{ |
|
"completion_length": 545.0669708251953, |
|
"epoch": 0.14555633310007, |
|
"grad_norm": 0.0028808764182031155, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.0200892859720625, |
|
"reward_std": 0.018164015375077724, |
|
"rewards/accuracy_reward": 0.0200892859720625, |
|
"step": 26 |
|
}, |
|
{ |
|
"completion_length": 561.1131038665771, |
|
"epoch": 0.15115465360391883, |
|
"grad_norm": 0.004521294496953487, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.043898809934034944, |
|
"reward_std": 0.027857428416609764, |
|
"rewards/accuracy_reward": 0.043898809934034944, |
|
"step": 27 |
|
}, |
|
{ |
|
"completion_length": 573.0468807220459, |
|
"epoch": 0.15675297410776767, |
|
"grad_norm": 0.004434187430888414, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.03050595335662365, |
|
"reward_std": 0.03036304796114564, |
|
"rewards/accuracy_reward": 0.03050595335662365, |
|
"step": 28 |
|
}, |
|
{ |
|
"completion_length": 573.6540298461914, |
|
"epoch": 0.16235129461161651, |
|
"grad_norm": 0.0023913481272757053, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.03422619198681787, |
|
"reward_std": 0.01964203454554081, |
|
"rewards/accuracy_reward": 0.03422619198681787, |
|
"step": 29 |
|
}, |
|
{ |
|
"completion_length": 582.3474769592285, |
|
"epoch": 0.16794961511546536, |
|
"grad_norm": 0.0027055193204432726, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.027529762592166662, |
|
"reward_std": 0.02218207810074091, |
|
"rewards/accuracy_reward": 0.027529762592166662, |
|
"step": 30 |
|
}, |
|
{ |
|
"completion_length": 583.9047756195068, |
|
"epoch": 0.1735479356193142, |
|
"grad_norm": 0.003626331454142928, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.03720238181995228, |
|
"reward_std": 0.028277710545808077, |
|
"rewards/accuracy_reward": 0.03720238181995228, |
|
"step": 31 |
|
}, |
|
{ |
|
"completion_length": 576.6346855163574, |
|
"epoch": 0.17914625612316304, |
|
"grad_norm": 0.0032239772845059633, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.015625000291038305, |
|
"reward_std": 0.022000661585479975, |
|
"rewards/accuracy_reward": 0.015625000291038305, |
|
"step": 32 |
|
}, |
|
{ |
|
"completion_length": 568.7916736602783, |
|
"epoch": 0.1847445766270119, |
|
"grad_norm": 0.0029333089478313923, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.025297619693446904, |
|
"reward_std": 0.020201513543725014, |
|
"rewards/accuracy_reward": 0.025297619693446904, |
|
"step": 33 |
|
}, |
|
{ |
|
"completion_length": 555.8154830932617, |
|
"epoch": 0.19034289713086075, |
|
"grad_norm": 0.003082014387473464, |
|
"learning_rate": 3e-07, |
|
"loss": -0.0, |
|
"reward": 0.01264880975941196, |
|
"reward_std": 0.019351367838680744, |
|
"rewards/accuracy_reward": 0.01264880975941196, |
|
"step": 34 |
|
}, |
|
{ |
|
"completion_length": 593.186767578125, |
|
"epoch": 0.1959412176347096, |
|
"grad_norm": 0.003542139893397689, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.029017857741564512, |
|
"reward_std": 0.03906076308339834, |
|
"rewards/accuracy_reward": 0.029017857741564512, |
|
"step": 35 |
|
}, |
|
{ |
|
"completion_length": 539.3273983001709, |
|
"epoch": 0.20153953813855843, |
|
"grad_norm": 0.0035734642297029495, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.04092262004269287, |
|
"reward_std": 0.02163945697247982, |
|
"rewards/accuracy_reward": 0.04092262004269287, |
|
"step": 36 |
|
}, |
|
{ |
|
"completion_length": 528.0818519592285, |
|
"epoch": 0.20713785864240727, |
|
"grad_norm": 0.004155021160840988, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.04241071513388306, |
|
"reward_std": 0.031058762688189745, |
|
"rewards/accuracy_reward": 0.04241071513388306, |
|
"step": 37 |
|
}, |
|
{ |
|
"completion_length": 572.0044708251953, |
|
"epoch": 0.21273617914625612, |
|
"grad_norm": 0.0043424940668046474, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.031250000349245965, |
|
"reward_std": 0.03391032665967941, |
|
"rewards/accuracy_reward": 0.031250000349245965, |
|
"step": 38 |
|
}, |
|
{ |
|
"completion_length": 561.321439743042, |
|
"epoch": 0.21833449965010496, |
|
"grad_norm": 0.0013517189072445035, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.014136905316263437, |
|
"reward_std": 0.004597577266395092, |
|
"rewards/accuracy_reward": 0.014136905316263437, |
|
"step": 39 |
|
}, |
|
{ |
|
"completion_length": 604.6815567016602, |
|
"epoch": 0.22393282015395383, |
|
"grad_norm": 0.004611727315932512, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.04464285826543346, |
|
"reward_std": 0.042770151514559984, |
|
"rewards/accuracy_reward": 0.04464285826543346, |
|
"step": 40 |
|
}, |
|
{ |
|
"completion_length": 591.9910755157471, |
|
"epoch": 0.22953114065780267, |
|
"grad_norm": 0.003368583507835865, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.03720238246023655, |
|
"reward_std": 0.022582839708775282, |
|
"rewards/accuracy_reward": 0.03720238246023655, |
|
"step": 41 |
|
}, |
|
{ |
|
"completion_length": 585.0193538665771, |
|
"epoch": 0.2351294611616515, |
|
"grad_norm": 0.0034278561361134052, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.04315476305782795, |
|
"reward_std": 0.02897424390539527, |
|
"rewards/accuracy_reward": 0.04315476305782795, |
|
"step": 42 |
|
}, |
|
{ |
|
"completion_length": 588.648078918457, |
|
"epoch": 0.24072778166550035, |
|
"grad_norm": 0.003546294756233692, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.04092262068297714, |
|
"reward_std": 0.02937500481493771, |
|
"rewards/accuracy_reward": 0.04092262068297714, |
|
"step": 43 |
|
}, |
|
{ |
|
"completion_length": 591.8616180419922, |
|
"epoch": 0.2463261021693492, |
|
"grad_norm": 0.0032863786909729242, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.029017857741564512, |
|
"reward_std": 0.02016195748001337, |
|
"rewards/accuracy_reward": 0.029017857741564512, |
|
"step": 44 |
|
}, |
|
{ |
|
"completion_length": 595.4494113922119, |
|
"epoch": 0.25192442267319803, |
|
"grad_norm": 0.003891779575496912, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.0513392873108387, |
|
"reward_std": 0.021664298605173826, |
|
"rewards/accuracy_reward": 0.0513392873108387, |
|
"step": 45 |
|
}, |
|
{ |
|
"completion_length": 576.5751647949219, |
|
"epoch": 0.2575227431770469, |
|
"grad_norm": 0.004738082177937031, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.04092262021731585, |
|
"reward_std": 0.03612527949735522, |
|
"rewards/accuracy_reward": 0.04092262021731585, |
|
"step": 46 |
|
}, |
|
{ |
|
"completion_length": 582.7514915466309, |
|
"epoch": 0.2631210636808957, |
|
"grad_norm": 0.005118933971971273, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.044642857857979834, |
|
"reward_std": 0.02991444803774357, |
|
"rewards/accuracy_reward": 0.044642857857979834, |
|
"step": 47 |
|
}, |
|
{ |
|
"completion_length": 597.6398983001709, |
|
"epoch": 0.26871938418474456, |
|
"grad_norm": 0.003335759276524186, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.017857143247965723, |
|
"reward_std": 0.02291816775687039, |
|
"rewards/accuracy_reward": 0.017857143247965723, |
|
"step": 48 |
|
}, |
|
{ |
|
"completion_length": 632.6994171142578, |
|
"epoch": 0.2743177046885934, |
|
"grad_norm": 0.002710092579945922, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.026785714959260076, |
|
"reward_std": 0.018883246928453445, |
|
"rewards/accuracy_reward": 0.026785714959260076, |
|
"step": 49 |
|
}, |
|
{ |
|
"completion_length": 614.4695072174072, |
|
"epoch": 0.27991602519244224, |
|
"grad_norm": 0.004074351862072945, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.034970239736139774, |
|
"reward_std": 0.0352512919344008, |
|
"rewards/accuracy_reward": 0.034970239736139774, |
|
"step": 50 |
|
}, |
|
{ |
|
"completion_length": 564.6369132995605, |
|
"epoch": 0.28551434569629114, |
|
"grad_norm": 0.00433464627712965, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.04761904897168279, |
|
"reward_std": 0.036211316008120775, |
|
"rewards/accuracy_reward": 0.04761904897168279, |
|
"step": 51 |
|
}, |
|
{ |
|
"completion_length": 574.7514991760254, |
|
"epoch": 0.29111266620014, |
|
"grad_norm": 0.0021451774518936872, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.02232142904540524, |
|
"reward_std": 0.01412591733969748, |
|
"rewards/accuracy_reward": 0.02232142904540524, |
|
"step": 52 |
|
}, |
|
{ |
|
"completion_length": 587.8154850006104, |
|
"epoch": 0.2967109867039888, |
|
"grad_norm": 0.004314142279326916, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.04017857281723991, |
|
"reward_std": 0.03161609871312976, |
|
"rewards/accuracy_reward": 0.04017857281723991, |
|
"step": 53 |
|
}, |
|
{ |
|
"completion_length": 612.7529888153076, |
|
"epoch": 0.30230930720783766, |
|
"grad_norm": 0.003322584554553032, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.023809524485841393, |
|
"reward_std": 0.029193072579801083, |
|
"rewards/accuracy_reward": 0.023809524485841393, |
|
"step": 54 |
|
}, |
|
{ |
|
"completion_length": 602.6979274749756, |
|
"epoch": 0.3079076277116865, |
|
"grad_norm": 0.0024285970721393824, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.014136904967017472, |
|
"reward_std": 0.017268173396587372, |
|
"rewards/accuracy_reward": 0.014136904967017472, |
|
"step": 55 |
|
}, |
|
{ |
|
"completion_length": 594.776798248291, |
|
"epoch": 0.31350594821553535, |
|
"grad_norm": 0.002405191073194146, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.031250000931322575, |
|
"reward_std": 0.008076196536421776, |
|
"rewards/accuracy_reward": 0.031250000931322575, |
|
"step": 56 |
|
}, |
|
{ |
|
"completion_length": 569.1354274749756, |
|
"epoch": 0.3191042687193842, |
|
"grad_norm": 0.004245223011821508, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.056547619809862226, |
|
"reward_std": 0.03459409927017987, |
|
"rewards/accuracy_reward": 0.056547619809862226, |
|
"step": 57 |
|
}, |
|
{ |
|
"completion_length": 610.0952472686768, |
|
"epoch": 0.32470258922323303, |
|
"grad_norm": 0.003974903374910355, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.035714287078008056, |
|
"reward_std": 0.02040082309395075, |
|
"rewards/accuracy_reward": 0.035714287078008056, |
|
"step": 58 |
|
}, |
|
{ |
|
"completion_length": 645.4836387634277, |
|
"epoch": 0.33030090972708187, |
|
"grad_norm": 0.0026164394803345203, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.011160714784637094, |
|
"reward_std": 0.011207811534404755, |
|
"rewards/accuracy_reward": 0.011160714784637094, |
|
"step": 59 |
|
}, |
|
{ |
|
"completion_length": 626.8445014953613, |
|
"epoch": 0.3358992302309307, |
|
"grad_norm": 0.0031215217895805836, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.024553572467993945, |
|
"reward_std": 0.02602896187454462, |
|
"rewards/accuracy_reward": 0.024553572467993945, |
|
"step": 60 |
|
}, |
|
{ |
|
"completion_length": 602.254472732544, |
|
"epoch": 0.34149755073477955, |
|
"grad_norm": 0.0025609612930566072, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.03273809637175873, |
|
"reward_std": 0.02431014971807599, |
|
"rewards/accuracy_reward": 0.03273809637175873, |
|
"step": 61 |
|
}, |
|
{ |
|
"completion_length": 605.808048248291, |
|
"epoch": 0.3470958712386284, |
|
"grad_norm": 0.0041216155514121056, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.0223214291036129, |
|
"reward_std": 0.027785591781139374, |
|
"rewards/accuracy_reward": 0.0223214291036129, |
|
"step": 62 |
|
}, |
|
{ |
|
"completion_length": 620.4248657226562, |
|
"epoch": 0.35269419174247724, |
|
"grad_norm": 0.0027605677023530006, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.020089286204893142, |
|
"reward_std": 0.02404030319303274, |
|
"rewards/accuracy_reward": 0.020089286204893142, |
|
"step": 63 |
|
}, |
|
{ |
|
"completion_length": 616.587064743042, |
|
"epoch": 0.3582925122463261, |
|
"grad_norm": 0.006433432921767235, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.034970238571986556, |
|
"reward_std": 0.0240207826718688, |
|
"rewards/accuracy_reward": 0.034970238571986556, |
|
"step": 64 |
|
}, |
|
{ |
|
"completion_length": 656.1890068054199, |
|
"epoch": 0.363890832750175, |
|
"grad_norm": 0.002678812015801668, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.038690476736519486, |
|
"reward_std": 0.026529580354690552, |
|
"rewards/accuracy_reward": 0.038690476736519486, |
|
"step": 65 |
|
}, |
|
{ |
|
"completion_length": 615.4241199493408, |
|
"epoch": 0.3694891532540238, |
|
"grad_norm": 0.004133144393563271, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.055059525300748646, |
|
"reward_std": 0.030434885527938604, |
|
"rewards/accuracy_reward": 0.055059525300748646, |
|
"step": 66 |
|
}, |
|
{ |
|
"completion_length": 631.1212978363037, |
|
"epoch": 0.37508747375787266, |
|
"grad_norm": 0.004037661012262106, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.038690477376803756, |
|
"reward_std": 0.028071781154721975, |
|
"rewards/accuracy_reward": 0.038690477376803756, |
|
"step": 67 |
|
}, |
|
{ |
|
"completion_length": 577.1934661865234, |
|
"epoch": 0.3806857942617215, |
|
"grad_norm": 0.0031959640327841043, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.023809524427633733, |
|
"reward_std": 0.0161665934138, |
|
"rewards/accuracy_reward": 0.023809524427633733, |
|
"step": 68 |
|
}, |
|
{ |
|
"completion_length": 645.9285850524902, |
|
"epoch": 0.38628411476557034, |
|
"grad_norm": 0.004407494328916073, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.030505953065585345, |
|
"reward_std": 0.03433060785755515, |
|
"rewards/accuracy_reward": 0.030505953065585345, |
|
"step": 69 |
|
}, |
|
{ |
|
"completion_length": 629.8266506195068, |
|
"epoch": 0.3918824352694192, |
|
"grad_norm": 0.0024215257726609707, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.019345238513778895, |
|
"reward_std": 0.02312279725447297, |
|
"rewards/accuracy_reward": 0.019345238513778895, |
|
"step": 70 |
|
}, |
|
{ |
|
"completion_length": 604.0178699493408, |
|
"epoch": 0.397480755773268, |
|
"grad_norm": 0.003344905562698841, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.02306547691114247, |
|
"reward_std": 0.019507942255586386, |
|
"rewards/accuracy_reward": 0.02306547691114247, |
|
"step": 71 |
|
}, |
|
{ |
|
"completion_length": 590.7827472686768, |
|
"epoch": 0.40307907627711687, |
|
"grad_norm": 0.0028942637145519257, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.05803571501746774, |
|
"reward_std": 0.03128055343404412, |
|
"rewards/accuracy_reward": 0.05803571501746774, |
|
"step": 72 |
|
}, |
|
{ |
|
"completion_length": 599.7842330932617, |
|
"epoch": 0.4086773967809657, |
|
"grad_norm": 0.0034949486143887043, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.05580357339931652, |
|
"reward_std": 0.015248052775859833, |
|
"rewards/accuracy_reward": 0.05580357339931652, |
|
"step": 73 |
|
}, |
|
{ |
|
"completion_length": 635.7009048461914, |
|
"epoch": 0.41427571728481455, |
|
"grad_norm": 0.0022625280544161797, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.02678571466822177, |
|
"reward_std": 0.021662155631929636, |
|
"rewards/accuracy_reward": 0.02678571466822177, |
|
"step": 74 |
|
}, |
|
{ |
|
"completion_length": 631.5788726806641, |
|
"epoch": 0.4198740377886634, |
|
"grad_norm": 0.0037094622384756804, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.02604166726814583, |
|
"reward_std": 0.02264805557206273, |
|
"rewards/accuracy_reward": 0.02604166726814583, |
|
"step": 75 |
|
}, |
|
{ |
|
"completion_length": 636.7358722686768, |
|
"epoch": 0.42547235829251223, |
|
"grad_norm": 0.003503567073494196, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.03273809503298253, |
|
"reward_std": 0.02112219762057066, |
|
"rewards/accuracy_reward": 0.03273809503298253, |
|
"step": 76 |
|
}, |
|
{ |
|
"completion_length": 635.9628219604492, |
|
"epoch": 0.4310706787963611, |
|
"grad_norm": 0.0047474331222474575, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.04166666779201478, |
|
"reward_std": 0.03789610881358385, |
|
"rewards/accuracy_reward": 0.04166666779201478, |
|
"step": 77 |
|
}, |
|
{ |
|
"completion_length": 638.1703987121582, |
|
"epoch": 0.4366689993002099, |
|
"grad_norm": 0.0018300635274499655, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.05282738315872848, |
|
"reward_std": 0.01760453707538545, |
|
"rewards/accuracy_reward": 0.05282738315872848, |
|
"step": 78 |
|
}, |
|
{ |
|
"completion_length": 617.7492637634277, |
|
"epoch": 0.44226731980405876, |
|
"grad_norm": 0.0036325210239738226, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.04092262010090053, |
|
"reward_std": 0.03621981432661414, |
|
"rewards/accuracy_reward": 0.04092262010090053, |
|
"step": 79 |
|
}, |
|
{ |
|
"completion_length": 621.1123561859131, |
|
"epoch": 0.44786564030790765, |
|
"grad_norm": 0.0025254676584154367, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.04092262004269287, |
|
"reward_std": 0.02054001996293664, |
|
"rewards/accuracy_reward": 0.04092262004269287, |
|
"step": 80 |
|
}, |
|
{ |
|
"completion_length": 670.346004486084, |
|
"epoch": 0.4534639608117565, |
|
"grad_norm": 0.0038352429401129484, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.03497023903764784, |
|
"reward_std": 0.03099754173308611, |
|
"rewards/accuracy_reward": 0.03497023903764784, |
|
"step": 81 |
|
}, |
|
{ |
|
"completion_length": 632.6949424743652, |
|
"epoch": 0.45906228131560534, |
|
"grad_norm": 0.005097648594528437, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.05059524026000872, |
|
"reward_std": 0.02744494192302227, |
|
"rewards/accuracy_reward": 0.05059524026000872, |
|
"step": 82 |
|
}, |
|
{ |
|
"completion_length": 630.6183204650879, |
|
"epoch": 0.4646606018194542, |
|
"grad_norm": 0.0024996348656713963, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.012648809934034944, |
|
"reward_std": 0.014683252666145563, |
|
"rewards/accuracy_reward": 0.012648809934034944, |
|
"step": 83 |
|
}, |
|
{ |
|
"completion_length": 582.6726245880127, |
|
"epoch": 0.470258922323303, |
|
"grad_norm": 0.0026008612476289272, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.037202382169198245, |
|
"reward_std": 0.021662155631929636, |
|
"rewards/accuracy_reward": 0.037202382169198245, |
|
"step": 84 |
|
}, |
|
{ |
|
"completion_length": 665.8355827331543, |
|
"epoch": 0.47585724282715186, |
|
"grad_norm": 0.002596538746729493, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.04389881080714986, |
|
"reward_std": 0.02897106483578682, |
|
"rewards/accuracy_reward": 0.04389881080714986, |
|
"step": 85 |
|
}, |
|
{ |
|
"completion_length": 635.4821529388428, |
|
"epoch": 0.4814555633310007, |
|
"grad_norm": 0.0027116115670651197, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.024553571769502014, |
|
"reward_std": 0.025856829015538096, |
|
"rewards/accuracy_reward": 0.024553571769502014, |
|
"step": 86 |
|
}, |
|
{ |
|
"completion_length": 642.225456237793, |
|
"epoch": 0.48705388383484954, |
|
"grad_norm": 0.00335879810154438, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.0535714304423891, |
|
"reward_std": 0.04018289828673005, |
|
"rewards/accuracy_reward": 0.0535714304423891, |
|
"step": 87 |
|
}, |
|
{ |
|
"completion_length": 611.2046222686768, |
|
"epoch": 0.4926522043386984, |
|
"grad_norm": 0.004874168895184994, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.06994047714397311, |
|
"reward_std": 0.030564499087631702, |
|
"rewards/accuracy_reward": 0.06994047714397311, |
|
"step": 88 |
|
}, |
|
{ |
|
"completion_length": 634.81325340271, |
|
"epoch": 0.4982505248425472, |
|
"grad_norm": 0.0030458923429250717, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.03273809637175873, |
|
"reward_std": 0.025404266081750393, |
|
"rewards/accuracy_reward": 0.03273809637175873, |
|
"step": 89 |
|
}, |
|
{ |
|
"completion_length": 606.618314743042, |
|
"epoch": 0.5038488453463961, |
|
"grad_norm": 0.0029052915051579475, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.034226191812194884, |
|
"reward_std": 0.018181392922997475, |
|
"rewards/accuracy_reward": 0.034226191812194884, |
|
"step": 90 |
|
}, |
|
{ |
|
"completion_length": 649.934534072876, |
|
"epoch": 0.509447165850245, |
|
"grad_norm": 0.0023663390893489122, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.028273810050450265, |
|
"reward_std": 0.014822450000792742, |
|
"rewards/accuracy_reward": 0.028273810050450265, |
|
"step": 91 |
|
}, |
|
{ |
|
"completion_length": 645.944206237793, |
|
"epoch": 0.5150454863540938, |
|
"grad_norm": 0.002700702054426074, |
|
"learning_rate": 3e-07, |
|
"loss": -0.0, |
|
"reward": 0.026041666977107525, |
|
"reward_std": 0.023188014514744282, |
|
"rewards/accuracy_reward": 0.026041666977107525, |
|
"step": 92 |
|
}, |
|
{ |
|
"completion_length": 606.7001571655273, |
|
"epoch": 0.5206438068579426, |
|
"grad_norm": 0.002376874443143606, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.02455357206054032, |
|
"reward_std": 0.02123525319620967, |
|
"rewards/accuracy_reward": 0.02455357206054032, |
|
"step": 93 |
|
}, |
|
{ |
|
"completion_length": 635.0580425262451, |
|
"epoch": 0.5262421273617914, |
|
"grad_norm": 0.0022008493542671204, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.02678571513388306, |
|
"reward_std": 0.016507241874933243, |
|
"rewards/accuracy_reward": 0.02678571513388306, |
|
"step": 94 |
|
}, |
|
{ |
|
"completion_length": 651.9226360321045, |
|
"epoch": 0.5318404478656403, |
|
"grad_norm": 0.003217194229364395, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.04464285826543346, |
|
"reward_std": 0.018883246928453445, |
|
"rewards/accuracy_reward": 0.04464285826543346, |
|
"step": 95 |
|
}, |
|
{ |
|
"completion_length": 621.9144458770752, |
|
"epoch": 0.5374387683694891, |
|
"grad_norm": 0.0026905853301286697, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.04166666849050671, |
|
"reward_std": 0.022358688060194254, |
|
"rewards/accuracy_reward": 0.04166666849050671, |
|
"step": 96 |
|
}, |
|
{ |
|
"completion_length": 630.8422737121582, |
|
"epoch": 0.543037088873338, |
|
"grad_norm": 0.00430277269333601, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.03422619169577956, |
|
"reward_std": 0.03490746580064297, |
|
"rewards/accuracy_reward": 0.03422619169577956, |
|
"step": 97 |
|
}, |
|
{ |
|
"completion_length": 635.1577453613281, |
|
"epoch": 0.5486354093771868, |
|
"grad_norm": 0.0027177336160093546, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.024553572409786284, |
|
"reward_std": 0.01907807867974043, |
|
"rewards/accuracy_reward": 0.024553572409786284, |
|
"step": 98 |
|
}, |
|
{ |
|
"completion_length": 621.9509105682373, |
|
"epoch": 0.5542337298810357, |
|
"grad_norm": 0.0029670181684195995, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.04761904920451343, |
|
"reward_std": 0.024105519521981478, |
|
"rewards/accuracy_reward": 0.04761904920451343, |
|
"step": 99 |
|
}, |
|
{ |
|
"completion_length": 645.11012840271, |
|
"epoch": 0.5598320503848845, |
|
"grad_norm": 0.004191585350781679, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.03720238246023655, |
|
"reward_std": 0.027021698653697968, |
|
"rewards/accuracy_reward": 0.03720238246023655, |
|
"step": 100 |
|
}, |
|
{ |
|
"completion_length": 628.647331237793, |
|
"epoch": 0.5654303708887334, |
|
"grad_norm": 0.005200605373829603, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.04017857293365523, |
|
"reward_std": 0.032400546595454216, |
|
"rewards/accuracy_reward": 0.04017857293365523, |
|
"step": 101 |
|
}, |
|
{ |
|
"completion_length": 674.1391506195068, |
|
"epoch": 0.5710286913925823, |
|
"grad_norm": 0.00332686142064631, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.039434525242540985, |
|
"reward_std": 0.02585682924836874, |
|
"rewards/accuracy_reward": 0.039434525242540985, |
|
"step": 102 |
|
}, |
|
{ |
|
"completion_length": 635.6376628875732, |
|
"epoch": 0.5766270118964311, |
|
"grad_norm": 0.002928570844233036, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.03348214377183467, |
|
"reward_std": 0.017823366448283195, |
|
"rewards/accuracy_reward": 0.03348214377183467, |
|
"step": 103 |
|
}, |
|
{ |
|
"completion_length": 667.8132553100586, |
|
"epoch": 0.58222533240028, |
|
"grad_norm": 0.0027422241400927305, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.05059524020180106, |
|
"reward_std": 0.02381271030753851, |
|
"rewards/accuracy_reward": 0.05059524020180106, |
|
"step": 104 |
|
}, |
|
{ |
|
"completion_length": 607.5372142791748, |
|
"epoch": 0.5878236529041287, |
|
"grad_norm": 0.0026503645349293947, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.023809524660464376, |
|
"reward_std": 0.021662155631929636, |
|
"rewards/accuracy_reward": 0.023809524660464376, |
|
"step": 105 |
|
}, |
|
{ |
|
"completion_length": 622.7715854644775, |
|
"epoch": 0.5934219734079776, |
|
"grad_norm": 0.003517127363011241, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.04836309718666598, |
|
"reward_std": 0.03329852968454361, |
|
"rewards/accuracy_reward": 0.04836309718666598, |
|
"step": 106 |
|
}, |
|
{ |
|
"completion_length": 647.0959930419922, |
|
"epoch": 0.5990202939118264, |
|
"grad_norm": 0.0036508163902908564, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.061755954287946224, |
|
"reward_std": 0.03932873113080859, |
|
"rewards/accuracy_reward": 0.061755954287946224, |
|
"step": 107 |
|
}, |
|
{ |
|
"completion_length": 623.8727836608887, |
|
"epoch": 0.6046186144156753, |
|
"grad_norm": 0.0034824141766875982, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.05282738321693614, |
|
"reward_std": 0.023543079383671284, |
|
"rewards/accuracy_reward": 0.05282738321693614, |
|
"step": 108 |
|
}, |
|
{ |
|
"completion_length": 646.765645980835, |
|
"epoch": 0.6102169349195241, |
|
"grad_norm": 0.0026973283383995295, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.017113095498643816, |
|
"reward_std": 0.017985261976718903, |
|
"rewards/accuracy_reward": 0.017113095498643816, |
|
"step": 109 |
|
}, |
|
{ |
|
"completion_length": 625.6376571655273, |
|
"epoch": 0.615815255423373, |
|
"grad_norm": 0.0037007054779678583, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.06547619227785617, |
|
"reward_std": 0.040159355383366346, |
|
"rewards/accuracy_reward": 0.06547619227785617, |
|
"step": 110 |
|
}, |
|
{ |
|
"completion_length": 657.3631019592285, |
|
"epoch": 0.6214135759272218, |
|
"grad_norm": 0.0027858198154717684, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.027529762883204967, |
|
"reward_std": 0.016143894754350185, |
|
"rewards/accuracy_reward": 0.027529762883204967, |
|
"step": 111 |
|
}, |
|
{ |
|
"completion_length": 627.3422756195068, |
|
"epoch": 0.6270118964310707, |
|
"grad_norm": 0.0035545658320188522, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.05059523967793211, |
|
"reward_std": 0.03195094550028443, |
|
"rewards/accuracy_reward": 0.05059523967793211, |
|
"step": 112 |
|
}, |
|
{ |
|
"completion_length": 629.8177185058594, |
|
"epoch": 0.6326102169349195, |
|
"grad_norm": 0.0040982505306601524, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.05357142956927419, |
|
"reward_std": 0.03337568882852793, |
|
"rewards/accuracy_reward": 0.05357142956927419, |
|
"step": 113 |
|
}, |
|
{ |
|
"completion_length": 650.6034278869629, |
|
"epoch": 0.6382085374387684, |
|
"grad_norm": 0.0028056029696017504, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.05208333529299125, |
|
"reward_std": 0.01970939477905631, |
|
"rewards/accuracy_reward": 0.05208333529299125, |
|
"step": 114 |
|
}, |
|
{ |
|
"completion_length": 649.5796318054199, |
|
"epoch": 0.6438068579426172, |
|
"grad_norm": 0.0023333376739174128, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.014880952483508736, |
|
"reward_std": 0.010090996511280537, |
|
"rewards/accuracy_reward": 0.014880952483508736, |
|
"step": 115 |
|
}, |
|
{ |
|
"completion_length": 645.5312652587891, |
|
"epoch": 0.6494051784464661, |
|
"grad_norm": 0.0031304731965065002, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.022321428870782256, |
|
"reward_std": 0.015947763342410326, |
|
"rewards/accuracy_reward": 0.022321428870782256, |
|
"step": 116 |
|
}, |
|
{ |
|
"completion_length": 606.377986907959, |
|
"epoch": 0.655003498950315, |
|
"grad_norm": 0.004076777026057243, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.030505952949170023, |
|
"reward_std": 0.028484483249485493, |
|
"rewards/accuracy_reward": 0.030505952949170023, |
|
"step": 117 |
|
}, |
|
{ |
|
"completion_length": 633.9389953613281, |
|
"epoch": 0.6606018194541637, |
|
"grad_norm": 0.0019400623859837651, |
|
"learning_rate": 3e-07, |
|
"loss": -0.0, |
|
"reward": 0.01934523874660954, |
|
"reward_std": 0.014487121719866991, |
|
"rewards/accuracy_reward": 0.01934523874660954, |
|
"step": 118 |
|
}, |
|
{ |
|
"completion_length": 626.8236713409424, |
|
"epoch": 0.6662001399580126, |
|
"grad_norm": 0.0019256924279034138, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.040178572409786284, |
|
"reward_std": 0.013790588825941086, |
|
"rewards/accuracy_reward": 0.040178572409786284, |
|
"step": 119 |
|
}, |
|
{ |
|
"completion_length": 633.7790336608887, |
|
"epoch": 0.6717984604618614, |
|
"grad_norm": 0.0031517883762717247, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.04092261998448521, |
|
"reward_std": 0.022219491191208363, |
|
"rewards/accuracy_reward": 0.04092261998448521, |
|
"step": 120 |
|
}, |
|
{ |
|
"completion_length": 648.1837844848633, |
|
"epoch": 0.6773967809657103, |
|
"grad_norm": 0.0026536902878433466, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.032738096197135746, |
|
"reward_std": 0.02146284654736519, |
|
"rewards/accuracy_reward": 0.032738096197135746, |
|
"step": 121 |
|
}, |
|
{ |
|
"completion_length": 652.9933204650879, |
|
"epoch": 0.6829951014695591, |
|
"grad_norm": 0.0033722377847880125, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.023809524544049054, |
|
"reward_std": 0.020562718622386456, |
|
"rewards/accuracy_reward": 0.023809524544049054, |
|
"step": 122 |
|
}, |
|
{ |
|
"completion_length": 656.3534355163574, |
|
"epoch": 0.688593421973408, |
|
"grad_norm": 0.002776005771011114, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.034970239794347435, |
|
"reward_std": 0.01705795805901289, |
|
"rewards/accuracy_reward": 0.034970239794347435, |
|
"step": 123 |
|
}, |
|
{ |
|
"completion_length": 610.5788822174072, |
|
"epoch": 0.6941917424772568, |
|
"grad_norm": 0.0037796536926180124, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.07142857392318547, |
|
"reward_std": 0.03704408532939851, |
|
"rewards/accuracy_reward": 0.07142857392318547, |
|
"step": 124 |
|
}, |
|
{ |
|
"completion_length": 639.4814128875732, |
|
"epoch": 0.6997900629811057, |
|
"grad_norm": 0.003291892819106579, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.05133928789291531, |
|
"reward_std": 0.033701435662806034, |
|
"rewards/accuracy_reward": 0.05133928789291531, |
|
"step": 125 |
|
}, |
|
{ |
|
"completion_length": 627.3214416503906, |
|
"epoch": 0.7053883834849545, |
|
"grad_norm": 0.002914518816396594, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.0386904776096344, |
|
"reward_std": 0.02166215470060706, |
|
"rewards/accuracy_reward": 0.0386904776096344, |
|
"step": 126 |
|
}, |
|
{ |
|
"completion_length": 614.4308052062988, |
|
"epoch": 0.7109867039888034, |
|
"grad_norm": 0.003878154093399644, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.03348214388824999, |
|
"reward_std": 0.018164014909416437, |
|
"rewards/accuracy_reward": 0.03348214388824999, |
|
"step": 127 |
|
}, |
|
{ |
|
"completion_length": 637.3244171142578, |
|
"epoch": 0.7165850244926522, |
|
"grad_norm": 0.002592942677438259, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.026785714901052415, |
|
"reward_std": 0.01852204231545329, |
|
"rewards/accuracy_reward": 0.026785714901052415, |
|
"step": 128 |
|
}, |
|
{ |
|
"completion_length": 662.0282936096191, |
|
"epoch": 0.722183344996501, |
|
"grad_norm": 0.0030688210390508175, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.0424107150756754, |
|
"reward_std": 0.029373969649896026, |
|
"rewards/accuracy_reward": 0.0424107150756754, |
|
"step": 129 |
|
}, |
|
{ |
|
"completion_length": 609.4747123718262, |
|
"epoch": 0.72778166550035, |
|
"grad_norm": 0.0026889187283813953, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.03869047720218077, |
|
"reward_std": 0.021642634645104408, |
|
"rewards/accuracy_reward": 0.03869047720218077, |
|
"step": 130 |
|
}, |
|
{ |
|
"completion_length": 628.831859588623, |
|
"epoch": 0.7333799860041987, |
|
"grad_norm": 0.0032478254288434982, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.019345238571986556, |
|
"reward_std": 0.01975191291421652, |
|
"rewards/accuracy_reward": 0.019345238571986556, |
|
"step": 131 |
|
}, |
|
{ |
|
"completion_length": 642.3846893310547, |
|
"epoch": 0.7389783065080476, |
|
"grad_norm": 0.0019529862329363823, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.01116071455180645, |
|
"reward_std": 0.013233252801001072, |
|
"rewards/accuracy_reward": 0.01116071455180645, |
|
"step": 132 |
|
}, |
|
{ |
|
"completion_length": 634.9181728363037, |
|
"epoch": 0.7445766270118964, |
|
"grad_norm": 0.004057868849486113, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.031250000873114914, |
|
"reward_std": 0.040879431180655956, |
|
"rewards/accuracy_reward": 0.031250000873114914, |
|
"step": 133 |
|
}, |
|
{ |
|
"completion_length": 609.6331977844238, |
|
"epoch": 0.7501749475157453, |
|
"grad_norm": 0.003426521783694625, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.038690477260388434, |
|
"reward_std": 0.029601082671433687, |
|
"rewards/accuracy_reward": 0.038690477260388434, |
|
"step": 134 |
|
}, |
|
{ |
|
"completion_length": 639.4613227844238, |
|
"epoch": 0.7557732680195941, |
|
"grad_norm": 0.003970231860876083, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.054315477260388434, |
|
"reward_std": 0.029604259878396988, |
|
"rewards/accuracy_reward": 0.054315477260388434, |
|
"step": 135 |
|
}, |
|
{ |
|
"completion_length": 614.8199501037598, |
|
"epoch": 0.761371588523443, |
|
"grad_norm": 0.0031252307817339897, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.05654762062476948, |
|
"reward_std": 0.02807308081537485, |
|
"rewards/accuracy_reward": 0.05654762062476948, |
|
"step": 136 |
|
}, |
|
{ |
|
"completion_length": 640.7924194335938, |
|
"epoch": 0.7669699090272918, |
|
"grad_norm": 0.0020789685659110546, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.019345238397363573, |
|
"reward_std": 0.015046600718051195, |
|
"rewards/accuracy_reward": 0.019345238397363573, |
|
"step": 137 |
|
}, |
|
{ |
|
"completion_length": 640.4226322174072, |
|
"epoch": 0.7725682295311407, |
|
"grad_norm": 0.0019399580778554082, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.012648809934034944, |
|
"reward_std": 0.013656496535986662, |
|
"rewards/accuracy_reward": 0.012648809934034944, |
|
"step": 138 |
|
}, |
|
{ |
|
"completion_length": 594.6205444335938, |
|
"epoch": 0.7781665500349895, |
|
"grad_norm": 0.0063768248073756695, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.052083334885537624, |
|
"reward_std": 0.023117476608604193, |
|
"rewards/accuracy_reward": 0.052083334885537624, |
|
"step": 139 |
|
}, |
|
{ |
|
"completion_length": 613.655517578125, |
|
"epoch": 0.7837648705388384, |
|
"grad_norm": 0.0031261774711310863, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.039434525300748646, |
|
"reward_std": 0.020764170680195093, |
|
"rewards/accuracy_reward": 0.039434525300748646, |
|
"step": 140 |
|
}, |
|
{ |
|
"completion_length": 645.6837902069092, |
|
"epoch": 0.7893631910426872, |
|
"grad_norm": 0.0014134430093690753, |
|
"learning_rate": 3e-07, |
|
"loss": -0.0, |
|
"reward": 0.02157738187815994, |
|
"reward_std": 0.007513539865612984, |
|
"rewards/accuracy_reward": 0.02157738187815994, |
|
"step": 141 |
|
}, |
|
{ |
|
"completion_length": 630.043176651001, |
|
"epoch": 0.794961511546536, |
|
"grad_norm": 0.004378010053187609, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.04241071571595967, |
|
"reward_std": 0.03774271160364151, |
|
"rewards/accuracy_reward": 0.04241071571595967, |
|
"step": 142 |
|
}, |
|
{ |
|
"completion_length": 622.1555137634277, |
|
"epoch": 0.8005598320503848, |
|
"grad_norm": 0.0029741383623331785, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.00892857153667137, |
|
"reward_std": 0.016726072411984205, |
|
"rewards/accuracy_reward": 0.00892857153667137, |
|
"step": 143 |
|
}, |
|
{ |
|
"completion_length": 646.7626647949219, |
|
"epoch": 0.8061581525542337, |
|
"grad_norm": 0.0028938716277480125, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.021577381528913975, |
|
"reward_std": 0.022778970655053854, |
|
"rewards/accuracy_reward": 0.021577381528913975, |
|
"step": 144 |
|
}, |
|
{ |
|
"completion_length": 651.0178699493408, |
|
"epoch": 0.8117564730580826, |
|
"grad_norm": 0.003098010318353772, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.023809524718672037, |
|
"reward_std": 0.00955103849992156, |
|
"rewards/accuracy_reward": 0.023809524718672037, |
|
"step": 145 |
|
}, |
|
{ |
|
"completion_length": 655.3891487121582, |
|
"epoch": 0.8173547935619314, |
|
"grad_norm": 0.002411956200376153, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.028273810166865587, |
|
"reward_std": 0.024912146851420403, |
|
"rewards/accuracy_reward": 0.028273810166865587, |
|
"step": 146 |
|
}, |
|
{ |
|
"completion_length": 660.3393001556396, |
|
"epoch": 0.8229531140657803, |
|
"grad_norm": 0.0024566147476434708, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.03199404838960618, |
|
"reward_std": 0.021460703574121, |
|
"rewards/accuracy_reward": 0.03199404838960618, |
|
"step": 147 |
|
}, |
|
{ |
|
"completion_length": 660.4471855163574, |
|
"epoch": 0.8285514345696291, |
|
"grad_norm": 0.0033500257413834333, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.03199404838960618, |
|
"reward_std": 0.026453721337020397, |
|
"rewards/accuracy_reward": 0.03199404838960618, |
|
"step": 148 |
|
}, |
|
{ |
|
"completion_length": 616.9553737640381, |
|
"epoch": 0.834149755073478, |
|
"grad_norm": 0.0035977945663034916, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.04613095358945429, |
|
"reward_std": 0.03803316270932555, |
|
"rewards/accuracy_reward": 0.04613095358945429, |
|
"step": 149 |
|
}, |
|
{ |
|
"completion_length": 624.0811023712158, |
|
"epoch": 0.8397480755773268, |
|
"grad_norm": 0.0024934441316872835, |
|
"learning_rate": 3e-07, |
|
"loss": -0.0, |
|
"reward": 0.020089286321308464, |
|
"reward_std": 0.014129094779491425, |
|
"rewards/accuracy_reward": 0.020089286321308464, |
|
"step": 150 |
|
}, |
|
{ |
|
"completion_length": 649.3125076293945, |
|
"epoch": 0.8453463960811757, |
|
"grad_norm": 0.0040410468354821205, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.08035714528523386, |
|
"reward_std": 0.037069959565997124, |
|
"rewards/accuracy_reward": 0.08035714528523386, |
|
"step": 151 |
|
}, |
|
{ |
|
"completion_length": 655.87575340271, |
|
"epoch": 0.8509447165850245, |
|
"grad_norm": 0.0036378325894474983, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.020089286554139107, |
|
"reward_std": 0.025742772268131375, |
|
"rewards/accuracy_reward": 0.020089286554139107, |
|
"step": 152 |
|
}, |
|
{ |
|
"completion_length": 608.526050567627, |
|
"epoch": 0.8565430370888734, |
|
"grad_norm": 0.0019003109773620963, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.019345238339155912, |
|
"reward_std": 0.013455044478178024, |
|
"rewards/accuracy_reward": 0.019345238339155912, |
|
"step": 153 |
|
}, |
|
{ |
|
"completion_length": 605.8340892791748, |
|
"epoch": 0.8621413575927221, |
|
"grad_norm": 0.002737295813858509, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.04538690613117069, |
|
"reward_std": 0.025694933719933033, |
|
"rewards/accuracy_reward": 0.04538690613117069, |
|
"step": 154 |
|
}, |
|
{ |
|
"completion_length": 649.8913841247559, |
|
"epoch": 0.867739678096571, |
|
"grad_norm": 0.0013863355852663517, |
|
"learning_rate": 3e-07, |
|
"loss": -0.0, |
|
"reward": 0.012648810050450265, |
|
"reward_std": 0.007513539865612984, |
|
"rewards/accuracy_reward": 0.012648810050450265, |
|
"step": 155 |
|
}, |
|
{ |
|
"completion_length": 649.567720413208, |
|
"epoch": 0.8733379986004198, |
|
"grad_norm": 0.003731328761205077, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.020089286379516125, |
|
"reward_std": 0.022558840923011303, |
|
"rewards/accuracy_reward": 0.020089286379516125, |
|
"step": 156 |
|
}, |
|
{ |
|
"completion_length": 661.0640029907227, |
|
"epoch": 0.8789363191042687, |
|
"grad_norm": 0.0034664925187826157, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.05505952559178695, |
|
"reward_std": 0.035452743992209435, |
|
"rewards/accuracy_reward": 0.05505952559178695, |
|
"step": 157 |
|
}, |
|
{ |
|
"completion_length": 616.1681728363037, |
|
"epoch": 0.8845346396081175, |
|
"grad_norm": 0.003077705856412649, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.020833333721384406, |
|
"reward_std": 0.023389466106891632, |
|
"rewards/accuracy_reward": 0.020833333721384406, |
|
"step": 158 |
|
}, |
|
{ |
|
"completion_length": 645.6845321655273, |
|
"epoch": 0.8901329601119664, |
|
"grad_norm": 0.0034521420020610094, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.02380952425301075, |
|
"reward_std": 0.01677391119301319, |
|
"rewards/accuracy_reward": 0.02380952425301075, |
|
"step": 159 |
|
}, |
|
{ |
|
"completion_length": 621.7961502075195, |
|
"epoch": 0.8957312806158153, |
|
"grad_norm": 0.0036637301091104746, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.04092262004269287, |
|
"reward_std": 0.02751574432477355, |
|
"rewards/accuracy_reward": 0.04092262004269287, |
|
"step": 160 |
|
}, |
|
{ |
|
"completion_length": 661.4003067016602, |
|
"epoch": 0.9013296011196641, |
|
"grad_norm": 0.0030420024413615465, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.031250000989530236, |
|
"reward_std": 0.022196792997419834, |
|
"rewards/accuracy_reward": 0.031250000989530236, |
|
"step": 161 |
|
}, |
|
{ |
|
"completion_length": 636.8690567016602, |
|
"epoch": 0.906927921623513, |
|
"grad_norm": 0.003414291888475418, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.05357143084984273, |
|
"reward_std": 0.021321506705135107, |
|
"rewards/accuracy_reward": 0.05357143084984273, |
|
"step": 162 |
|
}, |
|
{ |
|
"completion_length": 641.8973331451416, |
|
"epoch": 0.9125262421273618, |
|
"grad_norm": 0.0033480250276625156, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.03720238176174462, |
|
"reward_std": 0.029198394622653723, |
|
"rewards/accuracy_reward": 0.03720238176174462, |
|
"step": 163 |
|
}, |
|
{ |
|
"completion_length": 623.6264972686768, |
|
"epoch": 0.9181245626312107, |
|
"grad_norm": 0.003853818401694298, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.05729166802484542, |
|
"reward_std": 0.037587220780551434, |
|
"rewards/accuracy_reward": 0.05729166802484542, |
|
"step": 164 |
|
}, |
|
{ |
|
"completion_length": 649.7075996398926, |
|
"epoch": 0.9237228831350595, |
|
"grad_norm": 0.0024723373353481293, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.019345238513778895, |
|
"reward_std": 0.01928615104407072, |
|
"rewards/accuracy_reward": 0.019345238513778895, |
|
"step": 165 |
|
}, |
|
{ |
|
"completion_length": 680.0818481445312, |
|
"epoch": 0.9293212036389084, |
|
"grad_norm": 0.00249727675691247, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.03869047743501142, |
|
"reward_std": 0.015805388800799847, |
|
"rewards/accuracy_reward": 0.03869047743501142, |
|
"step": 166 |
|
}, |
|
{ |
|
"completion_length": 606.4137096405029, |
|
"epoch": 0.9349195241427571, |
|
"grad_norm": 0.0031826056074351072, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.04166666814126074, |
|
"reward_std": 0.02047350350767374, |
|
"rewards/accuracy_reward": 0.04166666814126074, |
|
"step": 167 |
|
}, |
|
{ |
|
"completion_length": 620.4821529388428, |
|
"epoch": 0.940517844646606, |
|
"grad_norm": 0.0025734296068549156, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.02380952431121841, |
|
"reward_std": 0.0181671935133636, |
|
"rewards/accuracy_reward": 0.02380952431121841, |
|
"step": 168 |
|
}, |
|
{ |
|
"completion_length": 617.4836406707764, |
|
"epoch": 0.9461161651504548, |
|
"grad_norm": 0.0036193837877362967, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.03199404873885214, |
|
"reward_std": 0.018744049593806267, |
|
"rewards/accuracy_reward": 0.03199404873885214, |
|
"step": 169 |
|
}, |
|
{ |
|
"completion_length": 630.1562633514404, |
|
"epoch": 0.9517144856543037, |
|
"grad_norm": 0.0025704463478177786, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.03497023897944018, |
|
"reward_std": 0.01300378143787384, |
|
"rewards/accuracy_reward": 0.03497023897944018, |
|
"step": 170 |
|
}, |
|
{ |
|
"completion_length": 608.0171241760254, |
|
"epoch": 0.9573128061581525, |
|
"grad_norm": 0.003403712995350361, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.05357143055880442, |
|
"reward_std": 0.030789734097197652, |
|
"rewards/accuracy_reward": 0.05357143055880442, |
|
"step": 171 |
|
}, |
|
{ |
|
"completion_length": 624.5669803619385, |
|
"epoch": 0.9629111266620014, |
|
"grad_norm": 0.00244723167270422, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.018601191113702953, |
|
"reward_std": 0.014688573777675629, |
|
"rewards/accuracy_reward": 0.018601191113702953, |
|
"step": 172 |
|
}, |
|
{ |
|
"completion_length": 619.2753105163574, |
|
"epoch": 0.9685094471658502, |
|
"grad_norm": 0.003216799348592758, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.015625000465661287, |
|
"reward_std": 0.011909665539860725, |
|
"rewards/accuracy_reward": 0.015625000465661287, |
|
"step": 173 |
|
}, |
|
{ |
|
"completion_length": 603.6629619598389, |
|
"epoch": 0.9741077676696991, |
|
"grad_norm": 0.004921557381749153, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.1026785732829012, |
|
"reward_std": 0.035813949070870876, |
|
"rewards/accuracy_reward": 0.1026785732829012, |
|
"step": 174 |
|
}, |
|
{ |
|
"completion_length": 619.9709911346436, |
|
"epoch": 0.979706088173548, |
|
"grad_norm": 0.0021367412991821766, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.03943452483508736, |
|
"reward_std": 0.017064577899873257, |
|
"rewards/accuracy_reward": 0.03943452483508736, |
|
"step": 175 |
|
}, |
|
{ |
|
"completion_length": 661.3519515991211, |
|
"epoch": 0.9853044086773968, |
|
"grad_norm": 0.003553919028490782, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.03422619169577956, |
|
"reward_std": 0.02724563330411911, |
|
"rewards/accuracy_reward": 0.03422619169577956, |
|
"step": 176 |
|
}, |
|
{ |
|
"completion_length": 638.8660793304443, |
|
"epoch": 0.9909027291812457, |
|
"grad_norm": 0.0027842505369335413, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.05803571594879031, |
|
"reward_std": 0.021928824484348297, |
|
"rewards/accuracy_reward": 0.05803571594879031, |
|
"step": 177 |
|
}, |
|
{ |
|
"completion_length": 666.0945014953613, |
|
"epoch": 0.9965010496850945, |
|
"grad_norm": 0.004953757394105196, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"reward": 0.03571428661234677, |
|
"reward_std": 0.03300916403532028, |
|
"rewards/accuracy_reward": 0.03571428661234677, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.9965010496850945, |
|
"step": 178, |
|
"total_flos": 0.0, |
|
"train_loss": 1.5679008615386834e-09, |
|
"train_runtime": 33567.1333, |
|
"train_samples_per_second": 0.596, |
|
"train_steps_per_second": 0.005 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 178, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 10, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|