|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9983500824958752, |
|
"eval_steps": 100, |
|
"global_step": 416, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"completion_length": 579.5451488494873, |
|
"epoch": 0.0023998800059997, |
|
"grad_norm": 0.023609351366758347, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -1.4967548958957195, |
|
"reward_std": 0.5467220321297646, |
|
"rewards/semantic_entropy_math_reward": -1.4967548958957195, |
|
"step": 1 |
|
}, |
|
{ |
|
"completion_length": 555.8125, |
|
"epoch": 0.0047997600119994, |
|
"grad_norm": 0.023468418046832085, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -1.2350558526813984, |
|
"reward_std": 0.47530375327914953, |
|
"rewards/semantic_entropy_math_reward": -1.2350558526813984, |
|
"step": 2 |
|
}, |
|
{ |
|
"completion_length": 593.9114570617676, |
|
"epoch": 0.0071996400179991, |
|
"grad_norm": 0.020463040098547935, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -1.1972830928862095, |
|
"reward_std": 0.41670812107622623, |
|
"rewards/semantic_entropy_math_reward": -1.1972830928862095, |
|
"step": 3 |
|
}, |
|
{ |
|
"completion_length": 534.1493110656738, |
|
"epoch": 0.0095995200239988, |
|
"grad_norm": 0.02234843000769615, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -1.2397769559174776, |
|
"reward_std": 0.45711124083027244, |
|
"rewards/semantic_entropy_math_reward": -1.2397769559174776, |
|
"step": 4 |
|
}, |
|
{ |
|
"completion_length": 557.0486125946045, |
|
"epoch": 0.0119994000299985, |
|
"grad_norm": 0.022175000980496407, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -1.0718507505953312, |
|
"reward_std": 0.40449281968176365, |
|
"rewards/semantic_entropy_math_reward": -1.0718507505953312, |
|
"step": 5 |
|
}, |
|
{ |
|
"completion_length": 532.0104236602783, |
|
"epoch": 0.0143992800359982, |
|
"grad_norm": 0.023260876536369324, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -1.2149364296346903, |
|
"reward_std": 0.5302340695634484, |
|
"rewards/semantic_entropy_math_reward": -1.2149364296346903, |
|
"step": 6 |
|
}, |
|
{ |
|
"completion_length": 585.7517433166504, |
|
"epoch": 0.0167991600419979, |
|
"grad_norm": 0.02340966835618019, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -1.164964143652469, |
|
"reward_std": 0.4511878960765898, |
|
"rewards/semantic_entropy_math_reward": -1.164964143652469, |
|
"step": 7 |
|
}, |
|
{ |
|
"completion_length": 610.2152843475342, |
|
"epoch": 0.0191990400479976, |
|
"grad_norm": 0.025966230779886246, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -1.1972486525774002, |
|
"reward_std": 0.4742407090961933, |
|
"rewards/semantic_entropy_math_reward": -1.1972486525774002, |
|
"step": 8 |
|
}, |
|
{ |
|
"completion_length": 661.5781269073486, |
|
"epoch": 0.0215989200539973, |
|
"grad_norm": 0.019320230931043625, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -1.0158138242550194, |
|
"reward_std": 0.3856792887672782, |
|
"rewards/semantic_entropy_math_reward": -1.0158138242550194, |
|
"step": 9 |
|
}, |
|
{ |
|
"completion_length": 553.5677089691162, |
|
"epoch": 0.023998800059997, |
|
"grad_norm": 0.02017727680504322, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -1.2822860479354858, |
|
"reward_std": 0.5186142511665821, |
|
"rewards/semantic_entropy_math_reward": -1.2822860479354858, |
|
"step": 10 |
|
}, |
|
{ |
|
"completion_length": 689.8593769073486, |
|
"epoch": 0.0263986800659967, |
|
"grad_norm": 0.016234688460826874, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -1.1487259063869715, |
|
"reward_std": 0.39036796567961574, |
|
"rewards/semantic_entropy_math_reward": -1.1487259063869715, |
|
"step": 11 |
|
}, |
|
{ |
|
"completion_length": 613.6718845367432, |
|
"epoch": 0.0287985600719964, |
|
"grad_norm": 0.01803727075457573, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -1.0811139764264226, |
|
"reward_std": 0.4259672285988927, |
|
"rewards/semantic_entropy_math_reward": -1.0811139764264226, |
|
"step": 12 |
|
}, |
|
{ |
|
"completion_length": 620.7552165985107, |
|
"epoch": 0.0311984400779961, |
|
"grad_norm": 0.015589025802910328, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -0.9830317534506321, |
|
"reward_std": 0.36907480750232935, |
|
"rewards/semantic_entropy_math_reward": -0.9830317534506321, |
|
"step": 13 |
|
}, |
|
{ |
|
"completion_length": 657.2968921661377, |
|
"epoch": 0.0335983200839958, |
|
"grad_norm": 0.016835488379001617, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -1.1987630133517087, |
|
"reward_std": 0.5184819111600518, |
|
"rewards/semantic_entropy_math_reward": -1.1987630133517087, |
|
"step": 14 |
|
}, |
|
{ |
|
"completion_length": 610.79514503479, |
|
"epoch": 0.0359982000899955, |
|
"grad_norm": 0.0162822213023901, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -1.0888072960078716, |
|
"reward_std": 0.39932171534746885, |
|
"rewards/semantic_entropy_math_reward": -1.0888072960078716, |
|
"step": 15 |
|
}, |
|
{ |
|
"completion_length": 563.517370223999, |
|
"epoch": 0.0383980800959952, |
|
"grad_norm": 0.019534002989530563, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -1.09929908066988, |
|
"reward_std": 0.41265817545354366, |
|
"rewards/semantic_entropy_math_reward": -1.09929908066988, |
|
"step": 16 |
|
}, |
|
{ |
|
"completion_length": 637.720495223999, |
|
"epoch": 0.0407979601019949, |
|
"grad_norm": 0.01697668805718422, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -1.1787171624600887, |
|
"reward_std": 0.428286274895072, |
|
"rewards/semantic_entropy_math_reward": -1.1787171624600887, |
|
"step": 17 |
|
}, |
|
{ |
|
"completion_length": 621.3993148803711, |
|
"epoch": 0.0431978401079946, |
|
"grad_norm": 0.017321443185210228, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -1.2735732290893793, |
|
"reward_std": 0.43550457525998354, |
|
"rewards/semantic_entropy_math_reward": -1.2735732290893793, |
|
"step": 18 |
|
}, |
|
{ |
|
"completion_length": 586.9704875946045, |
|
"epoch": 0.0455977201139943, |
|
"grad_norm": 0.02127678506076336, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -1.2064109002240002, |
|
"reward_std": 0.4663712582550943, |
|
"rewards/semantic_entropy_math_reward": -1.2064109002240002, |
|
"step": 19 |
|
}, |
|
{ |
|
"completion_length": 626.7257080078125, |
|
"epoch": 0.047997600119994, |
|
"grad_norm": 0.016206126660108566, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -1.152345221489668, |
|
"reward_std": 0.39662991324439645, |
|
"rewards/semantic_entropy_math_reward": -1.152345221489668, |
|
"step": 20 |
|
}, |
|
{ |
|
"completion_length": 583.7916679382324, |
|
"epoch": 0.0503974801259937, |
|
"grad_norm": 0.019242137670516968, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -1.1798701924271882, |
|
"reward_std": 0.4554295316338539, |
|
"rewards/semantic_entropy_math_reward": -1.1798701924271882, |
|
"step": 21 |
|
}, |
|
{ |
|
"completion_length": 587.8559036254883, |
|
"epoch": 0.0527973601319934, |
|
"grad_norm": 0.020106367766857147, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -1.211361431516707, |
|
"reward_std": 0.5777098424732685, |
|
"rewards/semantic_entropy_math_reward": -1.211361431516707, |
|
"step": 22 |
|
}, |
|
{ |
|
"completion_length": 613.4635486602783, |
|
"epoch": 0.0551972401379931, |
|
"grad_norm": 0.019849685952067375, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -1.3117065764963627, |
|
"reward_std": 0.45542027335613966, |
|
"rewards/semantic_entropy_math_reward": -1.3117065764963627, |
|
"step": 23 |
|
}, |
|
{ |
|
"completion_length": 609.819450378418, |
|
"epoch": 0.0575971201439928, |
|
"grad_norm": 0.019817881286144257, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -1.2621403858065605, |
|
"reward_std": 0.4839176032692194, |
|
"rewards/semantic_entropy_math_reward": -1.2621403858065605, |
|
"step": 24 |
|
}, |
|
{ |
|
"completion_length": 730.8524436950684, |
|
"epoch": 0.0599970001499925, |
|
"grad_norm": 0.014975810423493385, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -1.2369180954992771, |
|
"reward_std": 0.43273931834846735, |
|
"rewards/semantic_entropy_math_reward": -1.2369180954992771, |
|
"step": 25 |
|
}, |
|
{ |
|
"completion_length": 594.288200378418, |
|
"epoch": 0.0623968801559922, |
|
"grad_norm": 0.01721040904521942, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -1.1034620627760887, |
|
"reward_std": 0.4124642931856215, |
|
"rewards/semantic_entropy_math_reward": -1.1034620627760887, |
|
"step": 26 |
|
}, |
|
{ |
|
"completion_length": 553.0868148803711, |
|
"epoch": 0.0647967601619919, |
|
"grad_norm": 0.019415754824876785, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -1.130167681723833, |
|
"reward_std": 0.5143290963023901, |
|
"rewards/semantic_entropy_math_reward": -1.130167681723833, |
|
"step": 27 |
|
}, |
|
{ |
|
"completion_length": 653.2829856872559, |
|
"epoch": 0.0671966401679916, |
|
"grad_norm": 0.017851749435067177, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -0.989163676276803, |
|
"reward_std": 0.4082563756965101, |
|
"rewards/semantic_entropy_math_reward": -0.989163676276803, |
|
"step": 28 |
|
}, |
|
{ |
|
"completion_length": 605.6371650695801, |
|
"epoch": 0.0695965201739913, |
|
"grad_norm": 0.023129364475607872, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -1.4734891951084137, |
|
"reward_std": 0.5043481979519129, |
|
"rewards/semantic_entropy_math_reward": -1.4734891951084137, |
|
"step": 29 |
|
}, |
|
{ |
|
"completion_length": 654.7951488494873, |
|
"epoch": 0.071996400179991, |
|
"grad_norm": 0.020585162565112114, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -1.37895911000669, |
|
"reward_std": 0.4850574918091297, |
|
"rewards/semantic_entropy_math_reward": -1.37895911000669, |
|
"step": 30 |
|
}, |
|
{ |
|
"completion_length": 601.3628540039062, |
|
"epoch": 0.0743962801859907, |
|
"grad_norm": 0.02046247385442257, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -1.0621395409107208, |
|
"reward_std": 0.4105358109809458, |
|
"rewards/semantic_entropy_math_reward": -1.0621395409107208, |
|
"step": 31 |
|
}, |
|
{ |
|
"completion_length": 656.0902843475342, |
|
"epoch": 0.0767961601919904, |
|
"grad_norm": 0.021095257252454758, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -1.328016122803092, |
|
"reward_std": 0.4542691232636571, |
|
"rewards/semantic_entropy_math_reward": -1.328016122803092, |
|
"step": 32 |
|
}, |
|
{ |
|
"completion_length": 585.4427185058594, |
|
"epoch": 0.0791960401979901, |
|
"grad_norm": 0.01907271519303322, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -1.0881526842713356, |
|
"reward_std": 0.4317492740228772, |
|
"rewards/semantic_entropy_math_reward": -1.0881526842713356, |
|
"step": 33 |
|
}, |
|
{ |
|
"completion_length": 617.6753482818604, |
|
"epoch": 0.0815959202039898, |
|
"grad_norm": 0.022275349125266075, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -1.2841743230819702, |
|
"reward_std": 0.47124351374804974, |
|
"rewards/semantic_entropy_math_reward": -1.2841743230819702, |
|
"step": 34 |
|
}, |
|
{ |
|
"completion_length": 591.3628520965576, |
|
"epoch": 0.08399580020998951, |
|
"grad_norm": 0.021941347047686577, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -1.0643355981446803, |
|
"reward_std": 0.4094805922359228, |
|
"rewards/semantic_entropy_math_reward": -1.0643355981446803, |
|
"step": 35 |
|
}, |
|
{ |
|
"completion_length": 519.338544845581, |
|
"epoch": 0.0863956802159892, |
|
"grad_norm": 0.026442214846611023, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -0.9271132331341505, |
|
"reward_std": 0.4420350408181548, |
|
"rewards/semantic_entropy_math_reward": -0.9271132331341505, |
|
"step": 36 |
|
}, |
|
{ |
|
"completion_length": 628.0937614440918, |
|
"epoch": 0.0887955602219889, |
|
"grad_norm": 0.022040951997041702, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -1.0198373273015022, |
|
"reward_std": 0.41784033365547657, |
|
"rewards/semantic_entropy_math_reward": -1.0198373273015022, |
|
"step": 37 |
|
}, |
|
{ |
|
"completion_length": 608.0572986602783, |
|
"epoch": 0.0911954402279886, |
|
"grad_norm": 0.026434265077114105, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -1.3598360251635313, |
|
"reward_std": 0.41981533356010914, |
|
"rewards/semantic_entropy_math_reward": -1.3598360251635313, |
|
"step": 38 |
|
}, |
|
{ |
|
"completion_length": 626.1857681274414, |
|
"epoch": 0.0935953202339883, |
|
"grad_norm": 0.042337119579315186, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -1.3184345178306103, |
|
"reward_std": 0.48870162200182676, |
|
"rewards/semantic_entropy_math_reward": -1.3184345178306103, |
|
"step": 39 |
|
}, |
|
{ |
|
"completion_length": 627.5381984710693, |
|
"epoch": 0.095995200239988, |
|
"grad_norm": 0.03428055718541145, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -1.3468139134347439, |
|
"reward_std": 0.4769498906098306, |
|
"rewards/semantic_entropy_math_reward": -1.3468139134347439, |
|
"step": 40 |
|
}, |
|
{ |
|
"completion_length": 633.9496536254883, |
|
"epoch": 0.0983950802459877, |
|
"grad_norm": 0.030928973108530045, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -1.132771894801408, |
|
"reward_std": 0.5021341033279896, |
|
"rewards/semantic_entropy_math_reward": -1.132771894801408, |
|
"step": 41 |
|
}, |
|
{ |
|
"completion_length": 569.9253540039062, |
|
"epoch": 0.1007949602519874, |
|
"grad_norm": 0.026956375688314438, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -1.1975932940840721, |
|
"reward_std": 0.467874969355762, |
|
"rewards/semantic_entropy_math_reward": -1.1975932940840721, |
|
"step": 42 |
|
}, |
|
{ |
|
"completion_length": 608.6944522857666, |
|
"epoch": 0.1031948402579871, |
|
"grad_norm": 0.0328996405005455, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -1.2175438068807125, |
|
"reward_std": 0.4515871210023761, |
|
"rewards/semantic_entropy_math_reward": -1.2175438068807125, |
|
"step": 43 |
|
}, |
|
{ |
|
"completion_length": 619.1527767181396, |
|
"epoch": 0.1055947202639868, |
|
"grad_norm": 0.03257475048303604, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -1.2264893371611834, |
|
"reward_std": 0.4431668370962143, |
|
"rewards/semantic_entropy_math_reward": -1.2264893371611834, |
|
"step": 44 |
|
}, |
|
{ |
|
"completion_length": 562.6493148803711, |
|
"epoch": 0.1079946002699865, |
|
"grad_norm": 0.03283218294382095, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -1.0574581907130778, |
|
"reward_std": 0.48819410149008036, |
|
"rewards/semantic_entropy_math_reward": -1.0574581907130778, |
|
"step": 45 |
|
}, |
|
{ |
|
"completion_length": 597.8142395019531, |
|
"epoch": 0.1103944802759862, |
|
"grad_norm": 0.03596136346459389, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -1.3064512498676777, |
|
"reward_std": 0.5052206655964255, |
|
"rewards/semantic_entropy_math_reward": -1.3064512498676777, |
|
"step": 46 |
|
}, |
|
{ |
|
"completion_length": 559.2951469421387, |
|
"epoch": 0.1127943602819859, |
|
"grad_norm": 0.04295654594898224, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -1.2744261305779219, |
|
"reward_std": 0.4529256196692586, |
|
"rewards/semantic_entropy_math_reward": -1.2744261305779219, |
|
"step": 47 |
|
}, |
|
{ |
|
"completion_length": 574.0347270965576, |
|
"epoch": 0.1151942402879856, |
|
"grad_norm": 0.037087395787239075, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -1.1887072566896677, |
|
"reward_std": 0.5219112485647202, |
|
"rewards/semantic_entropy_math_reward": -1.1887072566896677, |
|
"step": 48 |
|
}, |
|
{ |
|
"completion_length": 576.9548645019531, |
|
"epoch": 0.1175941202939853, |
|
"grad_norm": 0.0523596853017807, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -1.2185893571004272, |
|
"reward_std": 0.5283103645779192, |
|
"rewards/semantic_entropy_math_reward": -1.2185893571004272, |
|
"step": 49 |
|
}, |
|
{ |
|
"completion_length": 587.7326488494873, |
|
"epoch": 0.119994000299985, |
|
"grad_norm": 0.07026118040084839, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -1.424642201513052, |
|
"reward_std": 0.5118397288024426, |
|
"rewards/semantic_entropy_math_reward": -1.424642201513052, |
|
"step": 50 |
|
}, |
|
{ |
|
"completion_length": 607.8958377838135, |
|
"epoch": 0.1223938803059847, |
|
"grad_norm": 0.11793287098407745, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -1.2733404748141766, |
|
"reward_std": 0.4436110374517739, |
|
"rewards/semantic_entropy_math_reward": -1.2733404748141766, |
|
"step": 51 |
|
}, |
|
{ |
|
"completion_length": 592.35764503479, |
|
"epoch": 0.1247937603119844, |
|
"grad_norm": 0.2582877576351166, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -1.28734240680933, |
|
"reward_std": 0.43458423344418406, |
|
"rewards/semantic_entropy_math_reward": -1.28734240680933, |
|
"step": 52 |
|
}, |
|
{ |
|
"completion_length": 591.2170181274414, |
|
"epoch": 0.1271936403179841, |
|
"grad_norm": 0.9619891047477722, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -1.335528964176774, |
|
"reward_std": 0.480956160929054, |
|
"rewards/semantic_entropy_math_reward": -1.335528964176774, |
|
"step": 53 |
|
}, |
|
{ |
|
"completion_length": 601.8802165985107, |
|
"epoch": 0.1295935203239838, |
|
"grad_norm": 1.464898705482483, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -1.0449805338867009, |
|
"reward_std": 0.3896405389532447, |
|
"rewards/semantic_entropy_math_reward": -1.0449805338867009, |
|
"step": 54 |
|
}, |
|
{ |
|
"completion_length": 745.2829933166504, |
|
"epoch": 0.1319934003299835, |
|
"grad_norm": 2.908984422683716, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -0.8210013713687658, |
|
"reward_std": 0.28385873371735215, |
|
"rewards/semantic_entropy_math_reward": -0.8210013713687658, |
|
"step": 55 |
|
}, |
|
{ |
|
"completion_length": 888.5954971313477, |
|
"epoch": 0.1343932803359832, |
|
"grad_norm": 2.303511381149292, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -0.28092469088733196, |
|
"reward_std": 0.07076533045619726, |
|
"rewards/semantic_entropy_math_reward": -0.28092469088733196, |
|
"step": 56 |
|
}, |
|
{ |
|
"completion_length": 901.1823043823242, |
|
"epoch": 0.1367931603419829, |
|
"grad_norm": 1.215346336364746, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -0.24043410643935204, |
|
"reward_std": 0.0535881663672626, |
|
"rewards/semantic_entropy_math_reward": -0.24043410643935204, |
|
"step": 57 |
|
}, |
|
{ |
|
"completion_length": 945.3715286254883, |
|
"epoch": 0.1391930403479826, |
|
"grad_norm": 0.24487844109535217, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": -0.04936212673783302, |
|
"reward_std": 0.005620982963591814, |
|
"rewards/semantic_entropy_math_reward": -0.04936212673783302, |
|
"step": 58 |
|
}, |
|
{ |
|
"completion_length": 958.8437576293945, |
|
"epoch": 0.1415929203539823, |
|
"grad_norm": 0.3599923551082611, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -0.0954090766608715, |
|
"reward_std": 0.015972360502928495, |
|
"rewards/semantic_entropy_math_reward": -0.0954090766608715, |
|
"step": 59 |
|
}, |
|
{ |
|
"completion_length": 919.9392471313477, |
|
"epoch": 0.143992800359982, |
|
"grad_norm": 0.6720292568206787, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": -0.04936212673783302, |
|
"reward_std": 0.005620982963591814, |
|
"rewards/semantic_entropy_math_reward": -0.04936212673783302, |
|
"step": 60 |
|
}, |
|
{ |
|
"completion_length": 872.0069389343262, |
|
"epoch": 0.14639268036598171, |
|
"grad_norm": 0.7852513790130615, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -0.24349546059966087, |
|
"reward_std": 0.03283530939370394, |
|
"rewards/semantic_entropy_math_reward": -0.24349546059966087, |
|
"step": 61 |
|
}, |
|
{ |
|
"completion_length": 885.3871574401855, |
|
"epoch": 0.1487925603719814, |
|
"grad_norm": 0.051829516887664795, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": -0.04936212673783302, |
|
"reward_std": 0.005620982963591814, |
|
"rewards/semantic_entropy_math_reward": -0.04936212673783302, |
|
"step": 62 |
|
}, |
|
{ |
|
"completion_length": 852.8298721313477, |
|
"epoch": 0.1511924403779811, |
|
"grad_norm": 0.0997152253985405, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -0.1974485069513321, |
|
"reward_std": 0.022483931854367256, |
|
"rewards/semantic_entropy_math_reward": -0.1974485069513321, |
|
"step": 63 |
|
}, |
|
{ |
|
"completion_length": 666.3194484710693, |
|
"epoch": 0.1535923203839808, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 64 |
|
}, |
|
{ |
|
"completion_length": 453.2257013320923, |
|
"epoch": 0.1559922003899805, |
|
"grad_norm": 0.12662464380264282, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -0.48036056384444237, |
|
"reward_std": 0.07513140747323632, |
|
"rewards/semantic_entropy_math_reward": -0.48036056384444237, |
|
"step": 65 |
|
}, |
|
{ |
|
"completion_length": 428.5538215637207, |
|
"epoch": 0.1583920803959802, |
|
"grad_norm": 0.11950503289699554, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -0.3143479097634554, |
|
"reward_std": 0.06879218481481075, |
|
"rewards/semantic_entropy_math_reward": -0.3143479097634554, |
|
"step": 66 |
|
}, |
|
{ |
|
"completion_length": 372.9236145019531, |
|
"epoch": 0.1607919604019799, |
|
"grad_norm": 1.306576132774353, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -1.2195042371749878, |
|
"reward_std": 0.30455823382362723, |
|
"rewards/semantic_entropy_math_reward": -1.2195042371749878, |
|
"step": 67 |
|
}, |
|
{ |
|
"completion_length": 405.38021087646484, |
|
"epoch": 0.1631918404079796, |
|
"grad_norm": 3.1170105934143066, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -1.187372762709856, |
|
"reward_std": 0.4900816101580858, |
|
"rewards/semantic_entropy_math_reward": -1.187372762709856, |
|
"step": 68 |
|
}, |
|
{ |
|
"completion_length": 401.2066020965576, |
|
"epoch": 0.1655917204139793, |
|
"grad_norm": 1.0041663646697998, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -0.7445017509162426, |
|
"reward_std": 0.19908591220155358, |
|
"rewards/semantic_entropy_math_reward": -0.7445017509162426, |
|
"step": 69 |
|
}, |
|
{ |
|
"completion_length": 368.0833320617676, |
|
"epoch": 0.16799160041997901, |
|
"grad_norm": 0.262119323015213, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -0.25673690997064114, |
|
"reward_std": 0.0700853606685996, |
|
"rewards/semantic_entropy_math_reward": -0.25673690997064114, |
|
"step": 70 |
|
}, |
|
{ |
|
"completion_length": 295.34201431274414, |
|
"epoch": 0.1703914804259787, |
|
"grad_norm": 0.4609036445617676, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -0.18782146647572517, |
|
"reward_std": 0.030400068033486605, |
|
"rewards/semantic_entropy_math_reward": -0.18782146647572517, |
|
"step": 71 |
|
}, |
|
{ |
|
"completion_length": 362.2968807220459, |
|
"epoch": 0.1727913604319784, |
|
"grad_norm": 0.1498037576675415, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -0.20401418767869473, |
|
"reward_std": 0.0741230258718133, |
|
"rewards/semantic_entropy_math_reward": -0.20401418767869473, |
|
"step": 72 |
|
}, |
|
{ |
|
"completion_length": 398.3211898803711, |
|
"epoch": 0.1751912404379781, |
|
"grad_norm": 0.20637626945972443, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -0.33969277516007423, |
|
"reward_std": 0.08184323832392693, |
|
"rewards/semantic_entropy_math_reward": -0.33969277516007423, |
|
"step": 73 |
|
}, |
|
{ |
|
"completion_length": 364.69270610809326, |
|
"epoch": 0.1775911204439778, |
|
"grad_norm": 0.3202667236328125, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -0.5564979244954884, |
|
"reward_std": 0.17949713906273246, |
|
"rewards/semantic_entropy_math_reward": -0.5564979244954884, |
|
"step": 74 |
|
}, |
|
{ |
|
"completion_length": 398.5538263320923, |
|
"epoch": 0.1799910004499775, |
|
"grad_norm": 0.4271727502346039, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -0.5178813878446817, |
|
"reward_std": 0.19856971083208919, |
|
"rewards/semantic_entropy_math_reward": -0.5178813878446817, |
|
"step": 75 |
|
}, |
|
{ |
|
"completion_length": 328.8177137374878, |
|
"epoch": 0.1823908804559772, |
|
"grad_norm": 1.2274174690246582, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -0.730956120416522, |
|
"reward_std": 0.17488946160301566, |
|
"rewards/semantic_entropy_math_reward": -0.730956120416522, |
|
"step": 76 |
|
}, |
|
{ |
|
"completion_length": 435.6927089691162, |
|
"epoch": 0.1847907604619769, |
|
"grad_norm": 1.0755140781402588, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -1.0360593143850565, |
|
"reward_std": 0.31746397400274873, |
|
"rewards/semantic_entropy_math_reward": -1.0360593143850565, |
|
"step": 77 |
|
}, |
|
{ |
|
"completion_length": 396.92187881469727, |
|
"epoch": 0.1871906404679766, |
|
"grad_norm": 0.49259528517723083, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -1.0349559504538774, |
|
"reward_std": 0.30292816972360015, |
|
"rewards/semantic_entropy_math_reward": -1.0349559504538774, |
|
"step": 78 |
|
}, |
|
{ |
|
"completion_length": 303.45312213897705, |
|
"epoch": 0.18959052047397632, |
|
"grad_norm": 0.49153009057044983, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -0.5245293974876404, |
|
"reward_std": 0.18332215631380677, |
|
"rewards/semantic_entropy_math_reward": -0.5245293974876404, |
|
"step": 79 |
|
}, |
|
{ |
|
"completion_length": 241.1944465637207, |
|
"epoch": 0.191990400479976, |
|
"grad_norm": 1.309309482574463, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -0.7875159978866577, |
|
"reward_std": 0.2404517256654799, |
|
"rewards/semantic_entropy_math_reward": -0.7875159978866577, |
|
"step": 80 |
|
}, |
|
{ |
|
"completion_length": 106.01736164093018, |
|
"epoch": 0.1943902804859757, |
|
"grad_norm": 1.159730076789856, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -0.3929096572101116, |
|
"reward_std": 0.10203729756176472, |
|
"rewards/semantic_entropy_math_reward": -0.3929096572101116, |
|
"step": 81 |
|
}, |
|
{ |
|
"completion_length": 64.85590314865112, |
|
"epoch": 0.1967901604919754, |
|
"grad_norm": 2.622030258178711, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -0.7893553748726845, |
|
"reward_std": 0.1765661663375795, |
|
"rewards/semantic_entropy_math_reward": -0.7893553748726845, |
|
"step": 82 |
|
}, |
|
{ |
|
"completion_length": 233.77778005599976, |
|
"epoch": 0.1991900404979751, |
|
"grad_norm": 6.741254806518555, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -0.8342948034405708, |
|
"reward_std": 0.30480349250137806, |
|
"rewards/semantic_entropy_math_reward": -0.8342948034405708, |
|
"step": 83 |
|
}, |
|
{ |
|
"completion_length": 252.50521039962769, |
|
"epoch": 0.2015899205039748, |
|
"grad_norm": 6.380768299102783, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -1.1298357415944338, |
|
"reward_std": 0.4231584039516747, |
|
"rewards/semantic_entropy_math_reward": -1.1298357415944338, |
|
"step": 84 |
|
}, |
|
{ |
|
"completion_length": 85.42708444595337, |
|
"epoch": 0.2039898005099745, |
|
"grad_norm": 1.255273461341858, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -0.9342016261070967, |
|
"reward_std": 0.37867429945617914, |
|
"rewards/semantic_entropy_math_reward": -0.9342016261070967, |
|
"step": 85 |
|
}, |
|
{ |
|
"completion_length": 83.58854204416275, |
|
"epoch": 0.2063896805159742, |
|
"grad_norm": 1.491475224494934, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -1.1672791484743357, |
|
"reward_std": 0.411985841114074, |
|
"rewards/semantic_entropy_math_reward": -1.1672791484743357, |
|
"step": 86 |
|
}, |
|
{ |
|
"completion_length": 147.82638984918594, |
|
"epoch": 0.2087895605219739, |
|
"grad_norm": 1.796517252922058, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -1.186328262090683, |
|
"reward_std": 0.5614343388006091, |
|
"rewards/semantic_entropy_math_reward": -1.186328262090683, |
|
"step": 87 |
|
}, |
|
{ |
|
"completion_length": 266.8281271457672, |
|
"epoch": 0.2111894405279736, |
|
"grad_norm": 0.8293857574462891, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -0.7227916922420263, |
|
"reward_std": 0.5203290600329638, |
|
"rewards/semantic_entropy_math_reward": -0.7227916922420263, |
|
"step": 88 |
|
}, |
|
{ |
|
"completion_length": 419.9079918861389, |
|
"epoch": 0.2135893205339733, |
|
"grad_norm": 1.5567965507507324, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -0.6697393516078591, |
|
"reward_std": 0.3471951074898243, |
|
"rewards/semantic_entropy_math_reward": -0.6697393516078591, |
|
"step": 89 |
|
}, |
|
{ |
|
"completion_length": 848.0347213745117, |
|
"epoch": 0.215989200539973, |
|
"grad_norm": 0.7054896354675293, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -0.4150713551789522, |
|
"reward_std": 0.18047819379717112, |
|
"rewards/semantic_entropy_math_reward": -0.4150713551789522, |
|
"step": 90 |
|
}, |
|
{ |
|
"completion_length": 508.8593807220459, |
|
"epoch": 0.2183890805459727, |
|
"grad_norm": 3.7869491577148438, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -1.4939053989946842, |
|
"reward_std": 0.6185994260013103, |
|
"rewards/semantic_entropy_math_reward": -1.4939053989946842, |
|
"step": 91 |
|
}, |
|
{ |
|
"completion_length": 589.859375, |
|
"epoch": 0.2207889605519724, |
|
"grad_norm": 0.05026252567768097, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -1.0941957477480173, |
|
"reward_std": 0.5725127439945936, |
|
"rewards/semantic_entropy_math_reward": -1.0941957477480173, |
|
"step": 92 |
|
}, |
|
{ |
|
"completion_length": 605.2968769073486, |
|
"epoch": 0.2231888405579721, |
|
"grad_norm": 0.056097887456417084, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -1.2516395896673203, |
|
"reward_std": 0.6146153416484594, |
|
"rewards/semantic_entropy_math_reward": -1.2516395896673203, |
|
"step": 93 |
|
}, |
|
{ |
|
"completion_length": 539.8055553436279, |
|
"epoch": 0.2255887205639718, |
|
"grad_norm": 0.07102109491825104, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -1.312748797237873, |
|
"reward_std": 0.5412911372259259, |
|
"rewards/semantic_entropy_math_reward": -1.312748797237873, |
|
"step": 94 |
|
}, |
|
{ |
|
"completion_length": 502.1197929382324, |
|
"epoch": 0.2279886005699715, |
|
"grad_norm": 0.06010741740465164, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -0.9112661816179752, |
|
"reward_std": 0.5259749758988619, |
|
"rewards/semantic_entropy_math_reward": -0.9112661816179752, |
|
"step": 95 |
|
}, |
|
{ |
|
"completion_length": 546.5416793823242, |
|
"epoch": 0.2303884805759712, |
|
"grad_norm": 0.05066002905368805, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -1.128136644139886, |
|
"reward_std": 0.5719058271497488, |
|
"rewards/semantic_entropy_math_reward": -1.128136644139886, |
|
"step": 96 |
|
}, |
|
{ |
|
"completion_length": 492.8194522857666, |
|
"epoch": 0.2327883605819709, |
|
"grad_norm": 0.0559186227619648, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -1.1546523049473763, |
|
"reward_std": 0.5111725647002459, |
|
"rewards/semantic_entropy_math_reward": -1.1546523049473763, |
|
"step": 97 |
|
}, |
|
{ |
|
"completion_length": 473.44097900390625, |
|
"epoch": 0.2351882405879706, |
|
"grad_norm": 0.05957731232047081, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -0.9957732241600752, |
|
"reward_std": 0.5401759054511786, |
|
"rewards/semantic_entropy_math_reward": -0.9957732241600752, |
|
"step": 98 |
|
}, |
|
{ |
|
"completion_length": 489.8177146911621, |
|
"epoch": 0.2375881205939703, |
|
"grad_norm": 0.06533516943454742, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -0.8187971916049719, |
|
"reward_std": 0.4784417259506881, |
|
"rewards/semantic_entropy_math_reward": -0.8187971916049719, |
|
"step": 99 |
|
}, |
|
{ |
|
"completion_length": 571.5191040039062, |
|
"epoch": 0.23998800059997, |
|
"grad_norm": 0.25294971466064453, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -0.9735191389918327, |
|
"reward_std": 0.6345395464450121, |
|
"rewards/semantic_entropy_math_reward": -0.9735191389918327, |
|
"step": 100 |
|
}, |
|
{ |
|
"completion_length": 400.6996593475342, |
|
"epoch": 0.2423878806059697, |
|
"grad_norm": 0.3172270357608795, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -0.6215638350695372, |
|
"reward_std": 0.38559078332036734, |
|
"rewards/semantic_entropy_math_reward": -0.6215638350695372, |
|
"step": 101 |
|
}, |
|
{ |
|
"completion_length": 238.6892409324646, |
|
"epoch": 0.2447877606119694, |
|
"grad_norm": 1.6551789045333862, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -0.39494438795372844, |
|
"reward_std": 0.3323068944737315, |
|
"rewards/semantic_entropy_math_reward": -0.39494438795372844, |
|
"step": 102 |
|
}, |
|
{ |
|
"completion_length": 10.09375, |
|
"epoch": 0.2471876406179691, |
|
"grad_norm": 0.430303156375885, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -0.0410688160918653, |
|
"reward_std": 0.07188181672245264, |
|
"rewards/semantic_entropy_math_reward": -0.0410688160918653, |
|
"step": 103 |
|
}, |
|
{ |
|
"completion_length": 10.078125059604645, |
|
"epoch": 0.2495875206239688, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 104 |
|
}, |
|
{ |
|
"completion_length": 10.062499940395355, |
|
"epoch": 0.2519874006299685, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 105 |
|
}, |
|
{ |
|
"completion_length": 10.045138835906982, |
|
"epoch": 0.2543872806359682, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 106 |
|
}, |
|
{ |
|
"completion_length": 10.057291746139526, |
|
"epoch": 0.2567871606419679, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 107 |
|
}, |
|
{ |
|
"completion_length": 10.008680582046509, |
|
"epoch": 0.2591870406479676, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 108 |
|
}, |
|
{ |
|
"completion_length": 10.036458432674408, |
|
"epoch": 0.2615869206539673, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 109 |
|
}, |
|
{ |
|
"completion_length": 10.032986104488373, |
|
"epoch": 0.263986800659967, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 110 |
|
}, |
|
{ |
|
"completion_length": 10.013888835906982, |
|
"epoch": 0.2663866806659667, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 111 |
|
}, |
|
{ |
|
"completion_length": 10.052083313465118, |
|
"epoch": 0.2687865606719664, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 112 |
|
}, |
|
{ |
|
"completion_length": 10.013888895511627, |
|
"epoch": 0.2711864406779661, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 113 |
|
}, |
|
{ |
|
"completion_length": 10.020833313465118, |
|
"epoch": 0.2735863206839658, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 114 |
|
}, |
|
{ |
|
"completion_length": 10.019097208976746, |
|
"epoch": 0.2759862006899655, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 115 |
|
}, |
|
{ |
|
"completion_length": 10.071180582046509, |
|
"epoch": 0.2783860806959652, |
|
"grad_norm": 0.2876928746700287, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": -0.011715315282344818, |
|
"reward_std": 0.010351377539336681, |
|
"rewards/semantic_entropy_math_reward": -0.011715315282344818, |
|
"step": 116 |
|
}, |
|
{ |
|
"completion_length": 10.093750059604645, |
|
"epoch": 0.2807859607019649, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 117 |
|
}, |
|
{ |
|
"completion_length": 10.124999940395355, |
|
"epoch": 0.2831858407079646, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 118 |
|
}, |
|
{ |
|
"completion_length": 10.423611223697662, |
|
"epoch": 0.2855857207139643, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 119 |
|
}, |
|
{ |
|
"completion_length": 10.47569453716278, |
|
"epoch": 0.287985600719964, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 120 |
|
}, |
|
{ |
|
"completion_length": 10.59375011920929, |
|
"epoch": 0.2903854807259637, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 121 |
|
}, |
|
{ |
|
"completion_length": 10.640625178813934, |
|
"epoch": 0.29278536073196343, |
|
"grad_norm": 0.23409874737262726, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": -0.005975749809294939, |
|
"reward_std": 0.014421098865568638, |
|
"rewards/semantic_entropy_math_reward": -0.005975749809294939, |
|
"step": 122 |
|
}, |
|
{ |
|
"completion_length": 10.890625178813934, |
|
"epoch": 0.2951852407379631, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 123 |
|
}, |
|
{ |
|
"completion_length": 10.881944596767426, |
|
"epoch": 0.2975851207439628, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 124 |
|
}, |
|
{ |
|
"completion_length": 10.994791686534882, |
|
"epoch": 0.2999850007499625, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 125 |
|
}, |
|
{ |
|
"completion_length": 10.973958432674408, |
|
"epoch": 0.3023848807559622, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 126 |
|
}, |
|
{ |
|
"completion_length": 10.987847208976746, |
|
"epoch": 0.3047847607619619, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 127 |
|
}, |
|
{ |
|
"completion_length": 10.996527791023254, |
|
"epoch": 0.3071846407679616, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 128 |
|
}, |
|
{ |
|
"completion_length": 10.984375, |
|
"epoch": 0.3095845207739613, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 129 |
|
}, |
|
{ |
|
"completion_length": 10.998263895511627, |
|
"epoch": 0.311984400779961, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 130 |
|
}, |
|
{ |
|
"completion_length": 10.993055582046509, |
|
"epoch": 0.3143842807859607, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 131 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.3167841607919604, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 132 |
|
}, |
|
{ |
|
"completion_length": 11.01909726858139, |
|
"epoch": 0.3191840407979601, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 133 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.3215839208039598, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 134 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.3239838008099595, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 135 |
|
}, |
|
{ |
|
"completion_length": 10.994791686534882, |
|
"epoch": 0.3263836808159592, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 136 |
|
}, |
|
{ |
|
"completion_length": 10.998263895511627, |
|
"epoch": 0.3287835608219589, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 137 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.3311834408279586, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 138 |
|
}, |
|
{ |
|
"completion_length": 10.996527791023254, |
|
"epoch": 0.3335833208339583, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 139 |
|
}, |
|
{ |
|
"completion_length": 10.998263895511627, |
|
"epoch": 0.33598320083995803, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 140 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.3383830808459577, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 141 |
|
}, |
|
{ |
|
"completion_length": 10.996527791023254, |
|
"epoch": 0.3407829608519574, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 142 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.3431828408579571, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 143 |
|
}, |
|
{ |
|
"completion_length": 11.017361164093018, |
|
"epoch": 0.3455827208639568, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 144 |
|
}, |
|
{ |
|
"completion_length": 10.998263895511627, |
|
"epoch": 0.3479826008699565, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 145 |
|
}, |
|
{ |
|
"completion_length": 10.996527791023254, |
|
"epoch": 0.3503824808759562, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 146 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.3527823608819559, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 147 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.3551822408879556, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 148 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.3575821208939553, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 149 |
|
}, |
|
{ |
|
"completion_length": 10.998263895511627, |
|
"epoch": 0.359982000899955, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 150 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.3623818809059547, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 151 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.3647817609119544, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 152 |
|
}, |
|
{ |
|
"completion_length": 10.994791686534882, |
|
"epoch": 0.3671816409179541, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 153 |
|
}, |
|
{ |
|
"completion_length": 10.984375, |
|
"epoch": 0.3695815209239538, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 154 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.3719814009299535, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 155 |
|
}, |
|
{ |
|
"completion_length": 10.998263895511627, |
|
"epoch": 0.3743812809359532, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 156 |
|
}, |
|
{ |
|
"completion_length": 10.993055582046509, |
|
"epoch": 0.3767811609419529, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 157 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.37918104094795263, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 158 |
|
}, |
|
{ |
|
"completion_length": 11.013888895511627, |
|
"epoch": 0.3815809209539523, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 159 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.383980800959952, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 160 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.3863806809659517, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 161 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.3887805609719514, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 162 |
|
}, |
|
{ |
|
"completion_length": 10.996527791023254, |
|
"epoch": 0.3911804409779511, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 163 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.3935803209839508, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 164 |
|
}, |
|
{ |
|
"completion_length": 11.024305582046509, |
|
"epoch": 0.3959802009899505, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 165 |
|
}, |
|
{ |
|
"completion_length": 10.996527791023254, |
|
"epoch": 0.3983800809959502, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 166 |
|
}, |
|
{ |
|
"completion_length": 10.996527791023254, |
|
"epoch": 0.4007799610019499, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 167 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.4031798410079496, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 168 |
|
}, |
|
{ |
|
"completion_length": 10.998263895511627, |
|
"epoch": 0.4055797210139493, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 169 |
|
}, |
|
{ |
|
"completion_length": 10.994791686534882, |
|
"epoch": 0.407979601019949, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 170 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.4103794810259487, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 171 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.4127793610319484, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 172 |
|
}, |
|
{ |
|
"completion_length": 10.998263895511627, |
|
"epoch": 0.4151792410379481, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 173 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.4175791210439478, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 174 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.4199790010499475, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 175 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.4223788810559472, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 176 |
|
}, |
|
{ |
|
"completion_length": 10.998263895511627, |
|
"epoch": 0.4247787610619469, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 177 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.4271786410679466, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 178 |
|
}, |
|
{ |
|
"completion_length": 10.996527791023254, |
|
"epoch": 0.4295785210739463, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 179 |
|
}, |
|
{ |
|
"completion_length": 10.998263895511627, |
|
"epoch": 0.431978401079946, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 180 |
|
}, |
|
{ |
|
"completion_length": 10.998263895511627, |
|
"epoch": 0.4343782810859457, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 181 |
|
}, |
|
{ |
|
"completion_length": 10.980902791023254, |
|
"epoch": 0.4367781610919454, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 182 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.4391780410979451, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 183 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.4415779211039448, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 184 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.4439778011099445, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 185 |
|
}, |
|
{ |
|
"completion_length": 10.986111104488373, |
|
"epoch": 0.4463776811159442, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 186 |
|
}, |
|
{ |
|
"completion_length": 10.994791686534882, |
|
"epoch": 0.4487775611219439, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 187 |
|
}, |
|
{ |
|
"completion_length": 10.998263895511627, |
|
"epoch": 0.4511774411279436, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 188 |
|
}, |
|
{ |
|
"completion_length": 10.984375, |
|
"epoch": 0.4535773211339433, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 189 |
|
}, |
|
{ |
|
"completion_length": 10.996527791023254, |
|
"epoch": 0.455977201139943, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 190 |
|
}, |
|
{ |
|
"completion_length": 10.998263895511627, |
|
"epoch": 0.4583770811459427, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 191 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.4607769611519424, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 192 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.4631768411579421, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 193 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.4655767211639418, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 194 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.4679766011699415, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 195 |
|
}, |
|
{ |
|
"completion_length": 10.989583313465118, |
|
"epoch": 0.4703764811759412, |
|
"grad_norm": 0.07069958746433258, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -0.005975749809294939, |
|
"reward_std": 0.014421098865568638, |
|
"rewards/semantic_entropy_math_reward": -0.005975749809294939, |
|
"step": 196 |
|
}, |
|
{ |
|
"completion_length": 10.998263895511627, |
|
"epoch": 0.4727763611819409, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 197 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.4751762411879406, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 198 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.47757612119394033, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 199 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.47997600119994, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"completion_length": 10.996527791023254, |
|
"epoch": 0.4823758812059397, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 201 |
|
}, |
|
{ |
|
"completion_length": 10.998263895511627, |
|
"epoch": 0.4847757612119394, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 202 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.4871756412179391, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 203 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.4895755212239388, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 204 |
|
}, |
|
{ |
|
"completion_length": 10.987847208976746, |
|
"epoch": 0.4919754012299385, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 205 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.4943752812359382, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 206 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.4967751612419379, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 207 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.4991750412479376, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 208 |
|
}, |
|
{ |
|
"completion_length": 10.996527791023254, |
|
"epoch": 0.5015749212539373, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 209 |
|
}, |
|
{ |
|
"completion_length": 10.998263895511627, |
|
"epoch": 0.503974801259937, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 210 |
|
}, |
|
{ |
|
"completion_length": 10.980902791023254, |
|
"epoch": 0.5063746812659367, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 211 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.5087745612719364, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 212 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.5111744412779361, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 213 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.5135743212839358, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 214 |
|
}, |
|
{ |
|
"completion_length": 10.996527791023254, |
|
"epoch": 0.5159742012899355, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 215 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.5183740812959352, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 216 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.5207739613019349, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 217 |
|
}, |
|
{ |
|
"completion_length": 10.996527791023254, |
|
"epoch": 0.5231738413079347, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 218 |
|
}, |
|
{ |
|
"completion_length": 10.996527791023254, |
|
"epoch": 0.5255737213139343, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 219 |
|
}, |
|
{ |
|
"completion_length": 10.996527791023254, |
|
"epoch": 0.527973601319934, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 220 |
|
}, |
|
{ |
|
"completion_length": 11.020833313465118, |
|
"epoch": 0.5303734813259336, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 221 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.5327733613319334, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 222 |
|
}, |
|
{ |
|
"completion_length": 10.994791686534882, |
|
"epoch": 0.5351732413379331, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 223 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.5375731213439328, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 224 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.5399730013499325, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 225 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.5423728813559322, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 226 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.5447727613619319, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 227 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.5471726413679316, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 228 |
|
}, |
|
{ |
|
"completion_length": 10.98784726858139, |
|
"epoch": 0.5495725213739313, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 229 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.551972401379931, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 230 |
|
}, |
|
{ |
|
"completion_length": 10.996527791023254, |
|
"epoch": 0.5543722813859308, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 231 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.5567721613919304, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 232 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.5591720413979301, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 233 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.5615719214039298, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 234 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.5639718014099295, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 235 |
|
}, |
|
{ |
|
"completion_length": 10.998263895511627, |
|
"epoch": 0.5663716814159292, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 236 |
|
}, |
|
{ |
|
"completion_length": 10.998263895511627, |
|
"epoch": 0.5687715614219289, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 237 |
|
}, |
|
{ |
|
"completion_length": 10.993055582046509, |
|
"epoch": 0.5711714414279286, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 238 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.5735713214339283, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 239 |
|
}, |
|
{ |
|
"completion_length": 10.991319477558136, |
|
"epoch": 0.575971201439928, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 240 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.5783710814459277, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 241 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.5807709614519274, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 242 |
|
}, |
|
{ |
|
"completion_length": 11.022569477558136, |
|
"epoch": 0.5831708414579271, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 243 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.5855707214639269, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 244 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.5879706014699265, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 245 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.5903704814759262, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 246 |
|
}, |
|
{ |
|
"completion_length": 10.996527791023254, |
|
"epoch": 0.5927703614819259, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 247 |
|
}, |
|
{ |
|
"completion_length": 10.998263895511627, |
|
"epoch": 0.5951702414879256, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 248 |
|
}, |
|
{ |
|
"completion_length": 10.996527791023254, |
|
"epoch": 0.5975701214939253, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 249 |
|
}, |
|
{ |
|
"completion_length": 10.991319477558136, |
|
"epoch": 0.599970001499925, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 250 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.6023698815059247, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 251 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.6047697615119244, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 252 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.6071696415179241, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 253 |
|
}, |
|
{ |
|
"completion_length": 10.994791686534882, |
|
"epoch": 0.6095695215239239, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 254 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.6119694015299235, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 255 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.6143692815359232, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 256 |
|
}, |
|
{ |
|
"completion_length": 10.987847208976746, |
|
"epoch": 0.6167691615419229, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 257 |
|
}, |
|
{ |
|
"completion_length": 10.996527791023254, |
|
"epoch": 0.6191690415479226, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 258 |
|
}, |
|
{ |
|
"completion_length": 10.996527791023254, |
|
"epoch": 0.6215689215539223, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 259 |
|
}, |
|
{ |
|
"completion_length": 11.012152791023254, |
|
"epoch": 0.623968801559922, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 260 |
|
}, |
|
{ |
|
"completion_length": 10.998263895511627, |
|
"epoch": 0.6263686815659217, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 261 |
|
}, |
|
{ |
|
"completion_length": 10.998263895511627, |
|
"epoch": 0.6287685615719214, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 262 |
|
}, |
|
{ |
|
"completion_length": 10.996527791023254, |
|
"epoch": 0.6311684415779211, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 263 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.6335683215839208, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 264 |
|
}, |
|
{ |
|
"completion_length": 10.998263895511627, |
|
"epoch": 0.6359682015899205, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 265 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.6383680815959202, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 266 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.64076796160192, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 267 |
|
}, |
|
{ |
|
"completion_length": 10.996527791023254, |
|
"epoch": 0.6431678416079196, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 268 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.6455677216139193, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 269 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.647967601619919, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 270 |
|
}, |
|
{ |
|
"completion_length": 10.998263895511627, |
|
"epoch": 0.6503674816259187, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 271 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.6527673616319184, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 272 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.6551672416379181, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 273 |
|
}, |
|
{ |
|
"completion_length": 10.984375059604645, |
|
"epoch": 0.6575671216439178, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 274 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.6599670016499175, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 275 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.6623668816559172, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 276 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.664766761661917, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 277 |
|
}, |
|
{ |
|
"completion_length": 10.998263895511627, |
|
"epoch": 0.6671666416679166, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 278 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.6695665216739163, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 279 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.6719664016799161, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 280 |
|
}, |
|
{ |
|
"completion_length": 10.994791686534882, |
|
"epoch": 0.6743662816859157, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 281 |
|
}, |
|
{ |
|
"completion_length": 10.996527791023254, |
|
"epoch": 0.6767661616919154, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 282 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.6791660416979151, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 283 |
|
}, |
|
{ |
|
"completion_length": 10.980902791023254, |
|
"epoch": 0.6815659217039148, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 284 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.6839658017099145, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 285 |
|
}, |
|
{ |
|
"completion_length": 10.998263895511627, |
|
"epoch": 0.6863656817159142, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 286 |
|
}, |
|
{ |
|
"completion_length": 10.998263895511627, |
|
"epoch": 0.6887655617219139, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 287 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.6911654417279136, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 288 |
|
}, |
|
{ |
|
"completion_length": 10.994791686534882, |
|
"epoch": 0.6935653217339133, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 289 |
|
}, |
|
{ |
|
"completion_length": 10.998263895511627, |
|
"epoch": 0.695965201739913, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 290 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.6983650817459127, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 291 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.7007649617519124, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 292 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.703164841757912, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 293 |
|
}, |
|
{ |
|
"completion_length": 10.987847208976746, |
|
"epoch": 0.7055647217639118, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 294 |
|
}, |
|
{ |
|
"completion_length": 10.994791686534882, |
|
"epoch": 0.7079646017699115, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 295 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.7103644817759112, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 296 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.7127643617819109, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 297 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.7151642417879106, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 298 |
|
}, |
|
{ |
|
"completion_length": 10.996527791023254, |
|
"epoch": 0.7175641217939103, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 299 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.71996400179991, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 300 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.7223638818059097, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 301 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.7247637618119094, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 302 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.7271636418179092, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 303 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.7295635218239088, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 304 |
|
}, |
|
{ |
|
"completion_length": 10.998263895511627, |
|
"epoch": 0.7319634018299085, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 305 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.7343632818359082, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 306 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.7367631618419079, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 307 |
|
}, |
|
{ |
|
"completion_length": 10.996527791023254, |
|
"epoch": 0.7391630418479076, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 308 |
|
}, |
|
{ |
|
"completion_length": 10.993055582046509, |
|
"epoch": 0.7415629218539073, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 309 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.743962801859907, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 310 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.7463626818659067, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 311 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.7487625618719064, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 312 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.7511624418779062, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 313 |
|
}, |
|
{ |
|
"completion_length": 10.998263895511627, |
|
"epoch": 0.7535623218839058, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 314 |
|
}, |
|
{ |
|
"completion_length": 10.993055582046509, |
|
"epoch": 0.7559622018899055, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 315 |
|
}, |
|
{ |
|
"completion_length": 10.996527791023254, |
|
"epoch": 0.7583620818959053, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 316 |
|
}, |
|
{ |
|
"completion_length": 10.998263895511627, |
|
"epoch": 0.7607619619019049, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 317 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.7631618419079046, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 318 |
|
}, |
|
{ |
|
"completion_length": 10.998263895511627, |
|
"epoch": 0.7655617219139043, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 319 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.767961601919904, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 320 |
|
}, |
|
{ |
|
"completion_length": 10.993055582046509, |
|
"epoch": 0.7703614819259037, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 321 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.7727613619319034, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 322 |
|
}, |
|
{ |
|
"completion_length": 10.979166686534882, |
|
"epoch": 0.7751612419379031, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 323 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.7775611219439028, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 324 |
|
}, |
|
{ |
|
"completion_length": 10.994791686534882, |
|
"epoch": 0.7799610019499025, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 325 |
|
}, |
|
{ |
|
"completion_length": 10.991319477558136, |
|
"epoch": 0.7823608819559023, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 326 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.7847607619619019, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 327 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.7871606419679016, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 328 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.7895605219739013, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 329 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.791960401979901, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 330 |
|
}, |
|
{ |
|
"completion_length": 10.993055582046509, |
|
"epoch": 0.7943602819859007, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 331 |
|
}, |
|
{ |
|
"completion_length": 10.98784726858139, |
|
"epoch": 0.7967601619919004, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 332 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.7991600419979001, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 333 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.8015599220038998, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 334 |
|
}, |
|
{ |
|
"completion_length": 10.996527791023254, |
|
"epoch": 0.8039598020098995, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 335 |
|
}, |
|
{ |
|
"completion_length": 10.998263895511627, |
|
"epoch": 0.8063596820158992, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 336 |
|
}, |
|
{ |
|
"completion_length": 10.998263895511627, |
|
"epoch": 0.8087595620218989, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 337 |
|
}, |
|
{ |
|
"completion_length": 10.998263895511627, |
|
"epoch": 0.8111594420278986, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 338 |
|
}, |
|
{ |
|
"completion_length": 10.996527791023254, |
|
"epoch": 0.8135593220338984, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 339 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.815959202039898, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 340 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.8183590820458977, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 341 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.8207589620518974, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 342 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.8231588420578971, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 343 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.8255587220638968, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 344 |
|
}, |
|
{ |
|
"completion_length": 10.998263895511627, |
|
"epoch": 0.8279586020698965, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 345 |
|
}, |
|
{ |
|
"completion_length": 10.998263895511627, |
|
"epoch": 0.8303584820758962, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 346 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.8327583620818959, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 347 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.8351582420878956, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 348 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.8375581220938954, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 349 |
|
}, |
|
{ |
|
"completion_length": 10.998263895511627, |
|
"epoch": 0.839958002099895, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 350 |
|
}, |
|
{ |
|
"completion_length": 10.979166686534882, |
|
"epoch": 0.8423578821058947, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 351 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.8447577621118944, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 352 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.8471576421178941, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 353 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.8495575221238938, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 354 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.8519574021298935, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 355 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.8543572821358932, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 356 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.8567571621418929, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 357 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.8591570421478926, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 358 |
|
}, |
|
{ |
|
"completion_length": 10.998263895511627, |
|
"epoch": 0.8615569221538923, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 359 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.863956802159892, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 360 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.8663566821658917, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 361 |
|
}, |
|
{ |
|
"completion_length": 11.022569477558136, |
|
"epoch": 0.8687565621718915, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 362 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.8711564421778911, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 363 |
|
}, |
|
{ |
|
"completion_length": 10.998263895511627, |
|
"epoch": 0.8735563221838908, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 364 |
|
}, |
|
{ |
|
"completion_length": 10.998263895511627, |
|
"epoch": 0.8759562021898905, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 365 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.8783560821958902, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 366 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.8807559622018899, |
|
"grad_norm": 0.02790955826640129, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0, |
|
"reward": -0.005975749343633652, |
|
"reward_std": 0.014421098865568638, |
|
"rewards/semantic_entropy_math_reward": -0.005975749343633652, |
|
"step": 367 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.8831558422078896, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 368 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.8855557222138893, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 369 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.887955602219889, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 370 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.8903554822258887, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 371 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.8927553622318884, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 372 |
|
}, |
|
{ |
|
"completion_length": 10.996527791023254, |
|
"epoch": 0.8951552422378881, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 373 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.8975551222438878, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 374 |
|
}, |
|
{ |
|
"completion_length": 10.996527791023254, |
|
"epoch": 0.8999550022498876, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 375 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.9023548822558872, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 376 |
|
}, |
|
{ |
|
"completion_length": 10.998263895511627, |
|
"epoch": 0.9047547622618869, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 377 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.9071546422678866, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 378 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.9095545222738863, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 379 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.911954402279886, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 380 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.9143542822858857, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 381 |
|
}, |
|
{ |
|
"completion_length": 10.998263895511627, |
|
"epoch": 0.9167541622918854, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 382 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.9191540422978851, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 383 |
|
}, |
|
{ |
|
"completion_length": 10.993055582046509, |
|
"epoch": 0.9215539223038848, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 384 |
|
}, |
|
{ |
|
"completion_length": 10.979166686534882, |
|
"epoch": 0.9239538023098846, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 385 |
|
}, |
|
{ |
|
"completion_length": 10.994791686534882, |
|
"epoch": 0.9263536823158842, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 386 |
|
}, |
|
{ |
|
"completion_length": 10.996527791023254, |
|
"epoch": 0.9287535623218839, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 387 |
|
}, |
|
{ |
|
"completion_length": 10.998263895511627, |
|
"epoch": 0.9311534423278836, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 388 |
|
}, |
|
{ |
|
"completion_length": 10.996527791023254, |
|
"epoch": 0.9335533223338833, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 389 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.935953202339883, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 390 |
|
}, |
|
{ |
|
"completion_length": 10.998263895511627, |
|
"epoch": 0.9383530823458827, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 391 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.9407529623518824, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 392 |
|
}, |
|
{ |
|
"completion_length": 10.998263895511627, |
|
"epoch": 0.9431528423578821, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 393 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.9455527223638818, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 394 |
|
}, |
|
{ |
|
"completion_length": 10.998263895511627, |
|
"epoch": 0.9479526023698815, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 395 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.9503524823758812, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 396 |
|
}, |
|
{ |
|
"completion_length": 10.996527791023254, |
|
"epoch": 0.9527523623818809, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 397 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.9551522423878807, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 398 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.9575521223938803, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 399 |
|
}, |
|
{ |
|
"completion_length": 10.996527791023254, |
|
"epoch": 0.95995200239988, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 400 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.9623518824058797, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 401 |
|
}, |
|
{ |
|
"completion_length": 10.998263895511627, |
|
"epoch": 0.9647517624118794, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 402 |
|
}, |
|
{ |
|
"completion_length": 10.998263895511627, |
|
"epoch": 0.9671516424178791, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 403 |
|
}, |
|
{ |
|
"completion_length": 10.996527791023254, |
|
"epoch": 0.9695515224238788, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 404 |
|
}, |
|
{ |
|
"completion_length": 10.994791686534882, |
|
"epoch": 0.9719514024298785, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 405 |
|
}, |
|
{ |
|
"completion_length": 11.038194477558136, |
|
"epoch": 0.9743512824358782, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 406 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.9767511624418779, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 407 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.9791510424478777, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 408 |
|
}, |
|
{ |
|
"completion_length": 10.998263895511627, |
|
"epoch": 0.9815509224538773, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 409 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.983950802459877, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 410 |
|
}, |
|
{ |
|
"completion_length": 10.998263895511627, |
|
"epoch": 0.9863506824658768, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 411 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.9887505624718764, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 412 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.9911504424778761, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 413 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.9935503224838758, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 414 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.9959502024898755, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 415 |
|
}, |
|
{ |
|
"completion_length": 11.0, |
|
"epoch": 0.9983500824958752, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/semantic_entropy_math_reward": 0.0, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.9983500824958752, |
|
"step": 416, |
|
"total_flos": 0.0, |
|
"train_loss": -8.8036603985719e-09, |
|
"train_runtime": 28166.6905, |
|
"train_samples_per_second": 0.71, |
|
"train_steps_per_second": 0.015 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 416, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 10, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|