Haitao999's picture
Model save
9899458 verified
raw
history blame
123 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9983500824958752,
"eval_steps": 100,
"global_step": 416,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"completion_length": 579.5451488494873,
"epoch": 0.0023998800059997,
"grad_norm": 0.023609351366758347,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -1.4967548958957195,
"reward_std": 0.5467220321297646,
"rewards/semantic_entropy_math_reward": -1.4967548958957195,
"step": 1
},
{
"completion_length": 555.8125,
"epoch": 0.0047997600119994,
"grad_norm": 0.023468418046832085,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -1.2350558526813984,
"reward_std": 0.47530375327914953,
"rewards/semantic_entropy_math_reward": -1.2350558526813984,
"step": 2
},
{
"completion_length": 593.9114570617676,
"epoch": 0.0071996400179991,
"grad_norm": 0.020463040098547935,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -1.1972830928862095,
"reward_std": 0.41670812107622623,
"rewards/semantic_entropy_math_reward": -1.1972830928862095,
"step": 3
},
{
"completion_length": 534.1493110656738,
"epoch": 0.0095995200239988,
"grad_norm": 0.02234843000769615,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -1.2397769559174776,
"reward_std": 0.45711124083027244,
"rewards/semantic_entropy_math_reward": -1.2397769559174776,
"step": 4
},
{
"completion_length": 557.0486125946045,
"epoch": 0.0119994000299985,
"grad_norm": 0.022175000980496407,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -1.0718507505953312,
"reward_std": 0.40449281968176365,
"rewards/semantic_entropy_math_reward": -1.0718507505953312,
"step": 5
},
{
"completion_length": 532.0104236602783,
"epoch": 0.0143992800359982,
"grad_norm": 0.023260876536369324,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -1.2149364296346903,
"reward_std": 0.5302340695634484,
"rewards/semantic_entropy_math_reward": -1.2149364296346903,
"step": 6
},
{
"completion_length": 585.7517433166504,
"epoch": 0.0167991600419979,
"grad_norm": 0.02340966835618019,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -1.164964143652469,
"reward_std": 0.4511878960765898,
"rewards/semantic_entropy_math_reward": -1.164964143652469,
"step": 7
},
{
"completion_length": 610.2152843475342,
"epoch": 0.0191990400479976,
"grad_norm": 0.025966230779886246,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -1.1972486525774002,
"reward_std": 0.4742407090961933,
"rewards/semantic_entropy_math_reward": -1.1972486525774002,
"step": 8
},
{
"completion_length": 661.5781269073486,
"epoch": 0.0215989200539973,
"grad_norm": 0.019320230931043625,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -1.0158138242550194,
"reward_std": 0.3856792887672782,
"rewards/semantic_entropy_math_reward": -1.0158138242550194,
"step": 9
},
{
"completion_length": 553.5677089691162,
"epoch": 0.023998800059997,
"grad_norm": 0.02017727680504322,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -1.2822860479354858,
"reward_std": 0.5186142511665821,
"rewards/semantic_entropy_math_reward": -1.2822860479354858,
"step": 10
},
{
"completion_length": 689.8593769073486,
"epoch": 0.0263986800659967,
"grad_norm": 0.016234688460826874,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -1.1487259063869715,
"reward_std": 0.39036796567961574,
"rewards/semantic_entropy_math_reward": -1.1487259063869715,
"step": 11
},
{
"completion_length": 613.6718845367432,
"epoch": 0.0287985600719964,
"grad_norm": 0.01803727075457573,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -1.0811139764264226,
"reward_std": 0.4259672285988927,
"rewards/semantic_entropy_math_reward": -1.0811139764264226,
"step": 12
},
{
"completion_length": 620.7552165985107,
"epoch": 0.0311984400779961,
"grad_norm": 0.015589025802910328,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -0.9830317534506321,
"reward_std": 0.36907480750232935,
"rewards/semantic_entropy_math_reward": -0.9830317534506321,
"step": 13
},
{
"completion_length": 657.2968921661377,
"epoch": 0.0335983200839958,
"grad_norm": 0.016835488379001617,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -1.1987630133517087,
"reward_std": 0.5184819111600518,
"rewards/semantic_entropy_math_reward": -1.1987630133517087,
"step": 14
},
{
"completion_length": 610.79514503479,
"epoch": 0.0359982000899955,
"grad_norm": 0.0162822213023901,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -1.0888072960078716,
"reward_std": 0.39932171534746885,
"rewards/semantic_entropy_math_reward": -1.0888072960078716,
"step": 15
},
{
"completion_length": 563.517370223999,
"epoch": 0.0383980800959952,
"grad_norm": 0.019534002989530563,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -1.09929908066988,
"reward_std": 0.41265817545354366,
"rewards/semantic_entropy_math_reward": -1.09929908066988,
"step": 16
},
{
"completion_length": 637.720495223999,
"epoch": 0.0407979601019949,
"grad_norm": 0.01697668805718422,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -1.1787171624600887,
"reward_std": 0.428286274895072,
"rewards/semantic_entropy_math_reward": -1.1787171624600887,
"step": 17
},
{
"completion_length": 621.3993148803711,
"epoch": 0.0431978401079946,
"grad_norm": 0.017321443185210228,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -1.2735732290893793,
"reward_std": 0.43550457525998354,
"rewards/semantic_entropy_math_reward": -1.2735732290893793,
"step": 18
},
{
"completion_length": 586.9704875946045,
"epoch": 0.0455977201139943,
"grad_norm": 0.02127678506076336,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -1.2064109002240002,
"reward_std": 0.4663712582550943,
"rewards/semantic_entropy_math_reward": -1.2064109002240002,
"step": 19
},
{
"completion_length": 626.7257080078125,
"epoch": 0.047997600119994,
"grad_norm": 0.016206126660108566,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -1.152345221489668,
"reward_std": 0.39662991324439645,
"rewards/semantic_entropy_math_reward": -1.152345221489668,
"step": 20
},
{
"completion_length": 583.7916679382324,
"epoch": 0.0503974801259937,
"grad_norm": 0.019242137670516968,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -1.1798701924271882,
"reward_std": 0.4554295316338539,
"rewards/semantic_entropy_math_reward": -1.1798701924271882,
"step": 21
},
{
"completion_length": 587.8559036254883,
"epoch": 0.0527973601319934,
"grad_norm": 0.020106367766857147,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -1.211361431516707,
"reward_std": 0.5777098424732685,
"rewards/semantic_entropy_math_reward": -1.211361431516707,
"step": 22
},
{
"completion_length": 613.4635486602783,
"epoch": 0.0551972401379931,
"grad_norm": 0.019849685952067375,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -1.3117065764963627,
"reward_std": 0.45542027335613966,
"rewards/semantic_entropy_math_reward": -1.3117065764963627,
"step": 23
},
{
"completion_length": 609.819450378418,
"epoch": 0.0575971201439928,
"grad_norm": 0.019817881286144257,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -1.2621403858065605,
"reward_std": 0.4839176032692194,
"rewards/semantic_entropy_math_reward": -1.2621403858065605,
"step": 24
},
{
"completion_length": 730.8524436950684,
"epoch": 0.0599970001499925,
"grad_norm": 0.014975810423493385,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -1.2369180954992771,
"reward_std": 0.43273931834846735,
"rewards/semantic_entropy_math_reward": -1.2369180954992771,
"step": 25
},
{
"completion_length": 594.288200378418,
"epoch": 0.0623968801559922,
"grad_norm": 0.01721040904521942,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -1.1034620627760887,
"reward_std": 0.4124642931856215,
"rewards/semantic_entropy_math_reward": -1.1034620627760887,
"step": 26
},
{
"completion_length": 553.0868148803711,
"epoch": 0.0647967601619919,
"grad_norm": 0.019415754824876785,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -1.130167681723833,
"reward_std": 0.5143290963023901,
"rewards/semantic_entropy_math_reward": -1.130167681723833,
"step": 27
},
{
"completion_length": 653.2829856872559,
"epoch": 0.0671966401679916,
"grad_norm": 0.017851749435067177,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -0.989163676276803,
"reward_std": 0.4082563756965101,
"rewards/semantic_entropy_math_reward": -0.989163676276803,
"step": 28
},
{
"completion_length": 605.6371650695801,
"epoch": 0.0695965201739913,
"grad_norm": 0.023129364475607872,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -1.4734891951084137,
"reward_std": 0.5043481979519129,
"rewards/semantic_entropy_math_reward": -1.4734891951084137,
"step": 29
},
{
"completion_length": 654.7951488494873,
"epoch": 0.071996400179991,
"grad_norm": 0.020585162565112114,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -1.37895911000669,
"reward_std": 0.4850574918091297,
"rewards/semantic_entropy_math_reward": -1.37895911000669,
"step": 30
},
{
"completion_length": 601.3628540039062,
"epoch": 0.0743962801859907,
"grad_norm": 0.02046247385442257,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -1.0621395409107208,
"reward_std": 0.4105358109809458,
"rewards/semantic_entropy_math_reward": -1.0621395409107208,
"step": 31
},
{
"completion_length": 656.0902843475342,
"epoch": 0.0767961601919904,
"grad_norm": 0.021095257252454758,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -1.328016122803092,
"reward_std": 0.4542691232636571,
"rewards/semantic_entropy_math_reward": -1.328016122803092,
"step": 32
},
{
"completion_length": 585.4427185058594,
"epoch": 0.0791960401979901,
"grad_norm": 0.01907271519303322,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -1.0881526842713356,
"reward_std": 0.4317492740228772,
"rewards/semantic_entropy_math_reward": -1.0881526842713356,
"step": 33
},
{
"completion_length": 617.6753482818604,
"epoch": 0.0815959202039898,
"grad_norm": 0.022275349125266075,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -1.2841743230819702,
"reward_std": 0.47124351374804974,
"rewards/semantic_entropy_math_reward": -1.2841743230819702,
"step": 34
},
{
"completion_length": 591.3628520965576,
"epoch": 0.08399580020998951,
"grad_norm": 0.021941347047686577,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -1.0643355981446803,
"reward_std": 0.4094805922359228,
"rewards/semantic_entropy_math_reward": -1.0643355981446803,
"step": 35
},
{
"completion_length": 519.338544845581,
"epoch": 0.0863956802159892,
"grad_norm": 0.026442214846611023,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -0.9271132331341505,
"reward_std": 0.4420350408181548,
"rewards/semantic_entropy_math_reward": -0.9271132331341505,
"step": 36
},
{
"completion_length": 628.0937614440918,
"epoch": 0.0887955602219889,
"grad_norm": 0.022040951997041702,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -1.0198373273015022,
"reward_std": 0.41784033365547657,
"rewards/semantic_entropy_math_reward": -1.0198373273015022,
"step": 37
},
{
"completion_length": 608.0572986602783,
"epoch": 0.0911954402279886,
"grad_norm": 0.026434265077114105,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -1.3598360251635313,
"reward_std": 0.41981533356010914,
"rewards/semantic_entropy_math_reward": -1.3598360251635313,
"step": 38
},
{
"completion_length": 626.1857681274414,
"epoch": 0.0935953202339883,
"grad_norm": 0.042337119579315186,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -1.3184345178306103,
"reward_std": 0.48870162200182676,
"rewards/semantic_entropy_math_reward": -1.3184345178306103,
"step": 39
},
{
"completion_length": 627.5381984710693,
"epoch": 0.095995200239988,
"grad_norm": 0.03428055718541145,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -1.3468139134347439,
"reward_std": 0.4769498906098306,
"rewards/semantic_entropy_math_reward": -1.3468139134347439,
"step": 40
},
{
"completion_length": 633.9496536254883,
"epoch": 0.0983950802459877,
"grad_norm": 0.030928973108530045,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -1.132771894801408,
"reward_std": 0.5021341033279896,
"rewards/semantic_entropy_math_reward": -1.132771894801408,
"step": 41
},
{
"completion_length": 569.9253540039062,
"epoch": 0.1007949602519874,
"grad_norm": 0.026956375688314438,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -1.1975932940840721,
"reward_std": 0.467874969355762,
"rewards/semantic_entropy_math_reward": -1.1975932940840721,
"step": 42
},
{
"completion_length": 608.6944522857666,
"epoch": 0.1031948402579871,
"grad_norm": 0.0328996405005455,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -1.2175438068807125,
"reward_std": 0.4515871210023761,
"rewards/semantic_entropy_math_reward": -1.2175438068807125,
"step": 43
},
{
"completion_length": 619.1527767181396,
"epoch": 0.1055947202639868,
"grad_norm": 0.03257475048303604,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -1.2264893371611834,
"reward_std": 0.4431668370962143,
"rewards/semantic_entropy_math_reward": -1.2264893371611834,
"step": 44
},
{
"completion_length": 562.6493148803711,
"epoch": 0.1079946002699865,
"grad_norm": 0.03283218294382095,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -1.0574581907130778,
"reward_std": 0.48819410149008036,
"rewards/semantic_entropy_math_reward": -1.0574581907130778,
"step": 45
},
{
"completion_length": 597.8142395019531,
"epoch": 0.1103944802759862,
"grad_norm": 0.03596136346459389,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -1.3064512498676777,
"reward_std": 0.5052206655964255,
"rewards/semantic_entropy_math_reward": -1.3064512498676777,
"step": 46
},
{
"completion_length": 559.2951469421387,
"epoch": 0.1127943602819859,
"grad_norm": 0.04295654594898224,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -1.2744261305779219,
"reward_std": 0.4529256196692586,
"rewards/semantic_entropy_math_reward": -1.2744261305779219,
"step": 47
},
{
"completion_length": 574.0347270965576,
"epoch": 0.1151942402879856,
"grad_norm": 0.037087395787239075,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -1.1887072566896677,
"reward_std": 0.5219112485647202,
"rewards/semantic_entropy_math_reward": -1.1887072566896677,
"step": 48
},
{
"completion_length": 576.9548645019531,
"epoch": 0.1175941202939853,
"grad_norm": 0.0523596853017807,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -1.2185893571004272,
"reward_std": 0.5283103645779192,
"rewards/semantic_entropy_math_reward": -1.2185893571004272,
"step": 49
},
{
"completion_length": 587.7326488494873,
"epoch": 0.119994000299985,
"grad_norm": 0.07026118040084839,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -1.424642201513052,
"reward_std": 0.5118397288024426,
"rewards/semantic_entropy_math_reward": -1.424642201513052,
"step": 50
},
{
"completion_length": 607.8958377838135,
"epoch": 0.1223938803059847,
"grad_norm": 0.11793287098407745,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -1.2733404748141766,
"reward_std": 0.4436110374517739,
"rewards/semantic_entropy_math_reward": -1.2733404748141766,
"step": 51
},
{
"completion_length": 592.35764503479,
"epoch": 0.1247937603119844,
"grad_norm": 0.2582877576351166,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -1.28734240680933,
"reward_std": 0.43458423344418406,
"rewards/semantic_entropy_math_reward": -1.28734240680933,
"step": 52
},
{
"completion_length": 591.2170181274414,
"epoch": 0.1271936403179841,
"grad_norm": 0.9619891047477722,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -1.335528964176774,
"reward_std": 0.480956160929054,
"rewards/semantic_entropy_math_reward": -1.335528964176774,
"step": 53
},
{
"completion_length": 601.8802165985107,
"epoch": 0.1295935203239838,
"grad_norm": 1.464898705482483,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -1.0449805338867009,
"reward_std": 0.3896405389532447,
"rewards/semantic_entropy_math_reward": -1.0449805338867009,
"step": 54
},
{
"completion_length": 745.2829933166504,
"epoch": 0.1319934003299835,
"grad_norm": 2.908984422683716,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -0.8210013713687658,
"reward_std": 0.28385873371735215,
"rewards/semantic_entropy_math_reward": -0.8210013713687658,
"step": 55
},
{
"completion_length": 888.5954971313477,
"epoch": 0.1343932803359832,
"grad_norm": 2.303511381149292,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -0.28092469088733196,
"reward_std": 0.07076533045619726,
"rewards/semantic_entropy_math_reward": -0.28092469088733196,
"step": 56
},
{
"completion_length": 901.1823043823242,
"epoch": 0.1367931603419829,
"grad_norm": 1.215346336364746,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -0.24043410643935204,
"reward_std": 0.0535881663672626,
"rewards/semantic_entropy_math_reward": -0.24043410643935204,
"step": 57
},
{
"completion_length": 945.3715286254883,
"epoch": 0.1391930403479826,
"grad_norm": 0.24487844109535217,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": -0.04936212673783302,
"reward_std": 0.005620982963591814,
"rewards/semantic_entropy_math_reward": -0.04936212673783302,
"step": 58
},
{
"completion_length": 958.8437576293945,
"epoch": 0.1415929203539823,
"grad_norm": 0.3599923551082611,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -0.0954090766608715,
"reward_std": 0.015972360502928495,
"rewards/semantic_entropy_math_reward": -0.0954090766608715,
"step": 59
},
{
"completion_length": 919.9392471313477,
"epoch": 0.143992800359982,
"grad_norm": 0.6720292568206787,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": -0.04936212673783302,
"reward_std": 0.005620982963591814,
"rewards/semantic_entropy_math_reward": -0.04936212673783302,
"step": 60
},
{
"completion_length": 872.0069389343262,
"epoch": 0.14639268036598171,
"grad_norm": 0.7852513790130615,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -0.24349546059966087,
"reward_std": 0.03283530939370394,
"rewards/semantic_entropy_math_reward": -0.24349546059966087,
"step": 61
},
{
"completion_length": 885.3871574401855,
"epoch": 0.1487925603719814,
"grad_norm": 0.051829516887664795,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": -0.04936212673783302,
"reward_std": 0.005620982963591814,
"rewards/semantic_entropy_math_reward": -0.04936212673783302,
"step": 62
},
{
"completion_length": 852.8298721313477,
"epoch": 0.1511924403779811,
"grad_norm": 0.0997152253985405,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -0.1974485069513321,
"reward_std": 0.022483931854367256,
"rewards/semantic_entropy_math_reward": -0.1974485069513321,
"step": 63
},
{
"completion_length": 666.3194484710693,
"epoch": 0.1535923203839808,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 64
},
{
"completion_length": 453.2257013320923,
"epoch": 0.1559922003899805,
"grad_norm": 0.12662464380264282,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -0.48036056384444237,
"reward_std": 0.07513140747323632,
"rewards/semantic_entropy_math_reward": -0.48036056384444237,
"step": 65
},
{
"completion_length": 428.5538215637207,
"epoch": 0.1583920803959802,
"grad_norm": 0.11950503289699554,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -0.3143479097634554,
"reward_std": 0.06879218481481075,
"rewards/semantic_entropy_math_reward": -0.3143479097634554,
"step": 66
},
{
"completion_length": 372.9236145019531,
"epoch": 0.1607919604019799,
"grad_norm": 1.306576132774353,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -1.2195042371749878,
"reward_std": 0.30455823382362723,
"rewards/semantic_entropy_math_reward": -1.2195042371749878,
"step": 67
},
{
"completion_length": 405.38021087646484,
"epoch": 0.1631918404079796,
"grad_norm": 3.1170105934143066,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -1.187372762709856,
"reward_std": 0.4900816101580858,
"rewards/semantic_entropy_math_reward": -1.187372762709856,
"step": 68
},
{
"completion_length": 401.2066020965576,
"epoch": 0.1655917204139793,
"grad_norm": 1.0041663646697998,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -0.7445017509162426,
"reward_std": 0.19908591220155358,
"rewards/semantic_entropy_math_reward": -0.7445017509162426,
"step": 69
},
{
"completion_length": 368.0833320617676,
"epoch": 0.16799160041997901,
"grad_norm": 0.262119323015213,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -0.25673690997064114,
"reward_std": 0.0700853606685996,
"rewards/semantic_entropy_math_reward": -0.25673690997064114,
"step": 70
},
{
"completion_length": 295.34201431274414,
"epoch": 0.1703914804259787,
"grad_norm": 0.4609036445617676,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -0.18782146647572517,
"reward_std": 0.030400068033486605,
"rewards/semantic_entropy_math_reward": -0.18782146647572517,
"step": 71
},
{
"completion_length": 362.2968807220459,
"epoch": 0.1727913604319784,
"grad_norm": 0.1498037576675415,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -0.20401418767869473,
"reward_std": 0.0741230258718133,
"rewards/semantic_entropy_math_reward": -0.20401418767869473,
"step": 72
},
{
"completion_length": 398.3211898803711,
"epoch": 0.1751912404379781,
"grad_norm": 0.20637626945972443,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -0.33969277516007423,
"reward_std": 0.08184323832392693,
"rewards/semantic_entropy_math_reward": -0.33969277516007423,
"step": 73
},
{
"completion_length": 364.69270610809326,
"epoch": 0.1775911204439778,
"grad_norm": 0.3202667236328125,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -0.5564979244954884,
"reward_std": 0.17949713906273246,
"rewards/semantic_entropy_math_reward": -0.5564979244954884,
"step": 74
},
{
"completion_length": 398.5538263320923,
"epoch": 0.1799910004499775,
"grad_norm": 0.4271727502346039,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -0.5178813878446817,
"reward_std": 0.19856971083208919,
"rewards/semantic_entropy_math_reward": -0.5178813878446817,
"step": 75
},
{
"completion_length": 328.8177137374878,
"epoch": 0.1823908804559772,
"grad_norm": 1.2274174690246582,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -0.730956120416522,
"reward_std": 0.17488946160301566,
"rewards/semantic_entropy_math_reward": -0.730956120416522,
"step": 76
},
{
"completion_length": 435.6927089691162,
"epoch": 0.1847907604619769,
"grad_norm": 1.0755140781402588,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -1.0360593143850565,
"reward_std": 0.31746397400274873,
"rewards/semantic_entropy_math_reward": -1.0360593143850565,
"step": 77
},
{
"completion_length": 396.92187881469727,
"epoch": 0.1871906404679766,
"grad_norm": 0.49259528517723083,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -1.0349559504538774,
"reward_std": 0.30292816972360015,
"rewards/semantic_entropy_math_reward": -1.0349559504538774,
"step": 78
},
{
"completion_length": 303.45312213897705,
"epoch": 0.18959052047397632,
"grad_norm": 0.49153009057044983,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -0.5245293974876404,
"reward_std": 0.18332215631380677,
"rewards/semantic_entropy_math_reward": -0.5245293974876404,
"step": 79
},
{
"completion_length": 241.1944465637207,
"epoch": 0.191990400479976,
"grad_norm": 1.309309482574463,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -0.7875159978866577,
"reward_std": 0.2404517256654799,
"rewards/semantic_entropy_math_reward": -0.7875159978866577,
"step": 80
},
{
"completion_length": 106.01736164093018,
"epoch": 0.1943902804859757,
"grad_norm": 1.159730076789856,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -0.3929096572101116,
"reward_std": 0.10203729756176472,
"rewards/semantic_entropy_math_reward": -0.3929096572101116,
"step": 81
},
{
"completion_length": 64.85590314865112,
"epoch": 0.1967901604919754,
"grad_norm": 2.622030258178711,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -0.7893553748726845,
"reward_std": 0.1765661663375795,
"rewards/semantic_entropy_math_reward": -0.7893553748726845,
"step": 82
},
{
"completion_length": 233.77778005599976,
"epoch": 0.1991900404979751,
"grad_norm": 6.741254806518555,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -0.8342948034405708,
"reward_std": 0.30480349250137806,
"rewards/semantic_entropy_math_reward": -0.8342948034405708,
"step": 83
},
{
"completion_length": 252.50521039962769,
"epoch": 0.2015899205039748,
"grad_norm": 6.380768299102783,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -1.1298357415944338,
"reward_std": 0.4231584039516747,
"rewards/semantic_entropy_math_reward": -1.1298357415944338,
"step": 84
},
{
"completion_length": 85.42708444595337,
"epoch": 0.2039898005099745,
"grad_norm": 1.255273461341858,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -0.9342016261070967,
"reward_std": 0.37867429945617914,
"rewards/semantic_entropy_math_reward": -0.9342016261070967,
"step": 85
},
{
"completion_length": 83.58854204416275,
"epoch": 0.2063896805159742,
"grad_norm": 1.491475224494934,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -1.1672791484743357,
"reward_std": 0.411985841114074,
"rewards/semantic_entropy_math_reward": -1.1672791484743357,
"step": 86
},
{
"completion_length": 147.82638984918594,
"epoch": 0.2087895605219739,
"grad_norm": 1.796517252922058,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -1.186328262090683,
"reward_std": 0.5614343388006091,
"rewards/semantic_entropy_math_reward": -1.186328262090683,
"step": 87
},
{
"completion_length": 266.8281271457672,
"epoch": 0.2111894405279736,
"grad_norm": 0.8293857574462891,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -0.7227916922420263,
"reward_std": 0.5203290600329638,
"rewards/semantic_entropy_math_reward": -0.7227916922420263,
"step": 88
},
{
"completion_length": 419.9079918861389,
"epoch": 0.2135893205339733,
"grad_norm": 1.5567965507507324,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -0.6697393516078591,
"reward_std": 0.3471951074898243,
"rewards/semantic_entropy_math_reward": -0.6697393516078591,
"step": 89
},
{
"completion_length": 848.0347213745117,
"epoch": 0.215989200539973,
"grad_norm": 0.7054896354675293,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -0.4150713551789522,
"reward_std": 0.18047819379717112,
"rewards/semantic_entropy_math_reward": -0.4150713551789522,
"step": 90
},
{
"completion_length": 508.8593807220459,
"epoch": 0.2183890805459727,
"grad_norm": 3.7869491577148438,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -1.4939053989946842,
"reward_std": 0.6185994260013103,
"rewards/semantic_entropy_math_reward": -1.4939053989946842,
"step": 91
},
{
"completion_length": 589.859375,
"epoch": 0.2207889605519724,
"grad_norm": 0.05026252567768097,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -1.0941957477480173,
"reward_std": 0.5725127439945936,
"rewards/semantic_entropy_math_reward": -1.0941957477480173,
"step": 92
},
{
"completion_length": 605.2968769073486,
"epoch": 0.2231888405579721,
"grad_norm": 0.056097887456417084,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -1.2516395896673203,
"reward_std": 0.6146153416484594,
"rewards/semantic_entropy_math_reward": -1.2516395896673203,
"step": 93
},
{
"completion_length": 539.8055553436279,
"epoch": 0.2255887205639718,
"grad_norm": 0.07102109491825104,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -1.312748797237873,
"reward_std": 0.5412911372259259,
"rewards/semantic_entropy_math_reward": -1.312748797237873,
"step": 94
},
{
"completion_length": 502.1197929382324,
"epoch": 0.2279886005699715,
"grad_norm": 0.06010741740465164,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -0.9112661816179752,
"reward_std": 0.5259749758988619,
"rewards/semantic_entropy_math_reward": -0.9112661816179752,
"step": 95
},
{
"completion_length": 546.5416793823242,
"epoch": 0.2303884805759712,
"grad_norm": 0.05066002905368805,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -1.128136644139886,
"reward_std": 0.5719058271497488,
"rewards/semantic_entropy_math_reward": -1.128136644139886,
"step": 96
},
{
"completion_length": 492.8194522857666,
"epoch": 0.2327883605819709,
"grad_norm": 0.0559186227619648,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -1.1546523049473763,
"reward_std": 0.5111725647002459,
"rewards/semantic_entropy_math_reward": -1.1546523049473763,
"step": 97
},
{
"completion_length": 473.44097900390625,
"epoch": 0.2351882405879706,
"grad_norm": 0.05957731232047081,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -0.9957732241600752,
"reward_std": 0.5401759054511786,
"rewards/semantic_entropy_math_reward": -0.9957732241600752,
"step": 98
},
{
"completion_length": 489.8177146911621,
"epoch": 0.2375881205939703,
"grad_norm": 0.06533516943454742,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -0.8187971916049719,
"reward_std": 0.4784417259506881,
"rewards/semantic_entropy_math_reward": -0.8187971916049719,
"step": 99
},
{
"completion_length": 571.5191040039062,
"epoch": 0.23998800059997,
"grad_norm": 0.25294971466064453,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -0.9735191389918327,
"reward_std": 0.6345395464450121,
"rewards/semantic_entropy_math_reward": -0.9735191389918327,
"step": 100
},
{
"completion_length": 400.6996593475342,
"epoch": 0.2423878806059697,
"grad_norm": 0.3172270357608795,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -0.6215638350695372,
"reward_std": 0.38559078332036734,
"rewards/semantic_entropy_math_reward": -0.6215638350695372,
"step": 101
},
{
"completion_length": 238.6892409324646,
"epoch": 0.2447877606119694,
"grad_norm": 1.6551789045333862,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -0.39494438795372844,
"reward_std": 0.3323068944737315,
"rewards/semantic_entropy_math_reward": -0.39494438795372844,
"step": 102
},
{
"completion_length": 10.09375,
"epoch": 0.2471876406179691,
"grad_norm": 0.430303156375885,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -0.0410688160918653,
"reward_std": 0.07188181672245264,
"rewards/semantic_entropy_math_reward": -0.0410688160918653,
"step": 103
},
{
"completion_length": 10.078125059604645,
"epoch": 0.2495875206239688,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 104
},
{
"completion_length": 10.062499940395355,
"epoch": 0.2519874006299685,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 105
},
{
"completion_length": 10.045138835906982,
"epoch": 0.2543872806359682,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 106
},
{
"completion_length": 10.057291746139526,
"epoch": 0.2567871606419679,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 107
},
{
"completion_length": 10.008680582046509,
"epoch": 0.2591870406479676,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 108
},
{
"completion_length": 10.036458432674408,
"epoch": 0.2615869206539673,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 109
},
{
"completion_length": 10.032986104488373,
"epoch": 0.263986800659967,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 110
},
{
"completion_length": 10.013888835906982,
"epoch": 0.2663866806659667,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 111
},
{
"completion_length": 10.052083313465118,
"epoch": 0.2687865606719664,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 112
},
{
"completion_length": 10.013888895511627,
"epoch": 0.2711864406779661,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 113
},
{
"completion_length": 10.020833313465118,
"epoch": 0.2735863206839658,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 114
},
{
"completion_length": 10.019097208976746,
"epoch": 0.2759862006899655,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 115
},
{
"completion_length": 10.071180582046509,
"epoch": 0.2783860806959652,
"grad_norm": 0.2876928746700287,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": -0.011715315282344818,
"reward_std": 0.010351377539336681,
"rewards/semantic_entropy_math_reward": -0.011715315282344818,
"step": 116
},
{
"completion_length": 10.093750059604645,
"epoch": 0.2807859607019649,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 117
},
{
"completion_length": 10.124999940395355,
"epoch": 0.2831858407079646,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 118
},
{
"completion_length": 10.423611223697662,
"epoch": 0.2855857207139643,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 119
},
{
"completion_length": 10.47569453716278,
"epoch": 0.287985600719964,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 120
},
{
"completion_length": 10.59375011920929,
"epoch": 0.2903854807259637,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 121
},
{
"completion_length": 10.640625178813934,
"epoch": 0.29278536073196343,
"grad_norm": 0.23409874737262726,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": -0.005975749809294939,
"reward_std": 0.014421098865568638,
"rewards/semantic_entropy_math_reward": -0.005975749809294939,
"step": 122
},
{
"completion_length": 10.890625178813934,
"epoch": 0.2951852407379631,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 123
},
{
"completion_length": 10.881944596767426,
"epoch": 0.2975851207439628,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 124
},
{
"completion_length": 10.994791686534882,
"epoch": 0.2999850007499625,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 125
},
{
"completion_length": 10.973958432674408,
"epoch": 0.3023848807559622,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 126
},
{
"completion_length": 10.987847208976746,
"epoch": 0.3047847607619619,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 127
},
{
"completion_length": 10.996527791023254,
"epoch": 0.3071846407679616,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 128
},
{
"completion_length": 10.984375,
"epoch": 0.3095845207739613,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 129
},
{
"completion_length": 10.998263895511627,
"epoch": 0.311984400779961,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 130
},
{
"completion_length": 10.993055582046509,
"epoch": 0.3143842807859607,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 131
},
{
"completion_length": 11.0,
"epoch": 0.3167841607919604,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 132
},
{
"completion_length": 11.01909726858139,
"epoch": 0.3191840407979601,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 133
},
{
"completion_length": 11.0,
"epoch": 0.3215839208039598,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 134
},
{
"completion_length": 11.0,
"epoch": 0.3239838008099595,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 135
},
{
"completion_length": 10.994791686534882,
"epoch": 0.3263836808159592,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 136
},
{
"completion_length": 10.998263895511627,
"epoch": 0.3287835608219589,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 137
},
{
"completion_length": 11.0,
"epoch": 0.3311834408279586,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 138
},
{
"completion_length": 10.996527791023254,
"epoch": 0.3335833208339583,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 139
},
{
"completion_length": 10.998263895511627,
"epoch": 0.33598320083995803,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 140
},
{
"completion_length": 11.0,
"epoch": 0.3383830808459577,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 141
},
{
"completion_length": 10.996527791023254,
"epoch": 0.3407829608519574,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 142
},
{
"completion_length": 11.0,
"epoch": 0.3431828408579571,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 143
},
{
"completion_length": 11.017361164093018,
"epoch": 0.3455827208639568,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 144
},
{
"completion_length": 10.998263895511627,
"epoch": 0.3479826008699565,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 145
},
{
"completion_length": 10.996527791023254,
"epoch": 0.3503824808759562,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 146
},
{
"completion_length": 11.0,
"epoch": 0.3527823608819559,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 147
},
{
"completion_length": 11.0,
"epoch": 0.3551822408879556,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 148
},
{
"completion_length": 11.0,
"epoch": 0.3575821208939553,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 149
},
{
"completion_length": 10.998263895511627,
"epoch": 0.359982000899955,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 150
},
{
"completion_length": 11.0,
"epoch": 0.3623818809059547,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 151
},
{
"completion_length": 11.0,
"epoch": 0.3647817609119544,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 152
},
{
"completion_length": 10.994791686534882,
"epoch": 0.3671816409179541,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 153
},
{
"completion_length": 10.984375,
"epoch": 0.3695815209239538,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 154
},
{
"completion_length": 11.0,
"epoch": 0.3719814009299535,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 155
},
{
"completion_length": 10.998263895511627,
"epoch": 0.3743812809359532,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 156
},
{
"completion_length": 10.993055582046509,
"epoch": 0.3767811609419529,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 157
},
{
"completion_length": 11.0,
"epoch": 0.37918104094795263,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 158
},
{
"completion_length": 11.013888895511627,
"epoch": 0.3815809209539523,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 159
},
{
"completion_length": 11.0,
"epoch": 0.383980800959952,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 160
},
{
"completion_length": 11.0,
"epoch": 0.3863806809659517,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 161
},
{
"completion_length": 11.0,
"epoch": 0.3887805609719514,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 162
},
{
"completion_length": 10.996527791023254,
"epoch": 0.3911804409779511,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 163
},
{
"completion_length": 11.0,
"epoch": 0.3935803209839508,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 164
},
{
"completion_length": 11.024305582046509,
"epoch": 0.3959802009899505,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 165
},
{
"completion_length": 10.996527791023254,
"epoch": 0.3983800809959502,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 166
},
{
"completion_length": 10.996527791023254,
"epoch": 0.4007799610019499,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 167
},
{
"completion_length": 11.0,
"epoch": 0.4031798410079496,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 168
},
{
"completion_length": 10.998263895511627,
"epoch": 0.4055797210139493,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 169
},
{
"completion_length": 10.994791686534882,
"epoch": 0.407979601019949,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 170
},
{
"completion_length": 11.0,
"epoch": 0.4103794810259487,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 171
},
{
"completion_length": 11.0,
"epoch": 0.4127793610319484,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 172
},
{
"completion_length": 10.998263895511627,
"epoch": 0.4151792410379481,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 173
},
{
"completion_length": 11.0,
"epoch": 0.4175791210439478,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 174
},
{
"completion_length": 11.0,
"epoch": 0.4199790010499475,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 175
},
{
"completion_length": 11.0,
"epoch": 0.4223788810559472,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 176
},
{
"completion_length": 10.998263895511627,
"epoch": 0.4247787610619469,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 177
},
{
"completion_length": 11.0,
"epoch": 0.4271786410679466,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 178
},
{
"completion_length": 10.996527791023254,
"epoch": 0.4295785210739463,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 179
},
{
"completion_length": 10.998263895511627,
"epoch": 0.431978401079946,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 180
},
{
"completion_length": 10.998263895511627,
"epoch": 0.4343782810859457,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 181
},
{
"completion_length": 10.980902791023254,
"epoch": 0.4367781610919454,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 182
},
{
"completion_length": 11.0,
"epoch": 0.4391780410979451,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 183
},
{
"completion_length": 11.0,
"epoch": 0.4415779211039448,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 184
},
{
"completion_length": 11.0,
"epoch": 0.4439778011099445,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 185
},
{
"completion_length": 10.986111104488373,
"epoch": 0.4463776811159442,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 186
},
{
"completion_length": 10.994791686534882,
"epoch": 0.4487775611219439,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 187
},
{
"completion_length": 10.998263895511627,
"epoch": 0.4511774411279436,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 188
},
{
"completion_length": 10.984375,
"epoch": 0.4535773211339433,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 189
},
{
"completion_length": 10.996527791023254,
"epoch": 0.455977201139943,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 190
},
{
"completion_length": 10.998263895511627,
"epoch": 0.4583770811459427,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 191
},
{
"completion_length": 11.0,
"epoch": 0.4607769611519424,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 192
},
{
"completion_length": 11.0,
"epoch": 0.4631768411579421,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 193
},
{
"completion_length": 11.0,
"epoch": 0.4655767211639418,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 194
},
{
"completion_length": 11.0,
"epoch": 0.4679766011699415,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 195
},
{
"completion_length": 10.989583313465118,
"epoch": 0.4703764811759412,
"grad_norm": 0.07069958746433258,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -0.005975749809294939,
"reward_std": 0.014421098865568638,
"rewards/semantic_entropy_math_reward": -0.005975749809294939,
"step": 196
},
{
"completion_length": 10.998263895511627,
"epoch": 0.4727763611819409,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 197
},
{
"completion_length": 11.0,
"epoch": 0.4751762411879406,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 198
},
{
"completion_length": 11.0,
"epoch": 0.47757612119394033,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 199
},
{
"completion_length": 11.0,
"epoch": 0.47997600119994,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 200
},
{
"completion_length": 10.996527791023254,
"epoch": 0.4823758812059397,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 201
},
{
"completion_length": 10.998263895511627,
"epoch": 0.4847757612119394,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 202
},
{
"completion_length": 11.0,
"epoch": 0.4871756412179391,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 203
},
{
"completion_length": 11.0,
"epoch": 0.4895755212239388,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 204
},
{
"completion_length": 10.987847208976746,
"epoch": 0.4919754012299385,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 205
},
{
"completion_length": 11.0,
"epoch": 0.4943752812359382,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 206
},
{
"completion_length": 11.0,
"epoch": 0.4967751612419379,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 207
},
{
"completion_length": 11.0,
"epoch": 0.4991750412479376,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 208
},
{
"completion_length": 10.996527791023254,
"epoch": 0.5015749212539373,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 209
},
{
"completion_length": 10.998263895511627,
"epoch": 0.503974801259937,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 210
},
{
"completion_length": 10.980902791023254,
"epoch": 0.5063746812659367,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 211
},
{
"completion_length": 11.0,
"epoch": 0.5087745612719364,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 212
},
{
"completion_length": 11.0,
"epoch": 0.5111744412779361,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 213
},
{
"completion_length": 11.0,
"epoch": 0.5135743212839358,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 214
},
{
"completion_length": 10.996527791023254,
"epoch": 0.5159742012899355,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 215
},
{
"completion_length": 11.0,
"epoch": 0.5183740812959352,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 216
},
{
"completion_length": 11.0,
"epoch": 0.5207739613019349,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 217
},
{
"completion_length": 10.996527791023254,
"epoch": 0.5231738413079347,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 218
},
{
"completion_length": 10.996527791023254,
"epoch": 0.5255737213139343,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 219
},
{
"completion_length": 10.996527791023254,
"epoch": 0.527973601319934,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 220
},
{
"completion_length": 11.020833313465118,
"epoch": 0.5303734813259336,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 221
},
{
"completion_length": 11.0,
"epoch": 0.5327733613319334,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 222
},
{
"completion_length": 10.994791686534882,
"epoch": 0.5351732413379331,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 223
},
{
"completion_length": 11.0,
"epoch": 0.5375731213439328,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 224
},
{
"completion_length": 11.0,
"epoch": 0.5399730013499325,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 225
},
{
"completion_length": 11.0,
"epoch": 0.5423728813559322,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 226
},
{
"completion_length": 11.0,
"epoch": 0.5447727613619319,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 227
},
{
"completion_length": 11.0,
"epoch": 0.5471726413679316,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 228
},
{
"completion_length": 10.98784726858139,
"epoch": 0.5495725213739313,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 229
},
{
"completion_length": 11.0,
"epoch": 0.551972401379931,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 230
},
{
"completion_length": 10.996527791023254,
"epoch": 0.5543722813859308,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 231
},
{
"completion_length": 11.0,
"epoch": 0.5567721613919304,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 232
},
{
"completion_length": 11.0,
"epoch": 0.5591720413979301,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 233
},
{
"completion_length": 11.0,
"epoch": 0.5615719214039298,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 234
},
{
"completion_length": 11.0,
"epoch": 0.5639718014099295,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 235
},
{
"completion_length": 10.998263895511627,
"epoch": 0.5663716814159292,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 236
},
{
"completion_length": 10.998263895511627,
"epoch": 0.5687715614219289,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 237
},
{
"completion_length": 10.993055582046509,
"epoch": 0.5711714414279286,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 238
},
{
"completion_length": 11.0,
"epoch": 0.5735713214339283,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 239
},
{
"completion_length": 10.991319477558136,
"epoch": 0.575971201439928,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 240
},
{
"completion_length": 11.0,
"epoch": 0.5783710814459277,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 241
},
{
"completion_length": 11.0,
"epoch": 0.5807709614519274,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 242
},
{
"completion_length": 11.022569477558136,
"epoch": 0.5831708414579271,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 243
},
{
"completion_length": 11.0,
"epoch": 0.5855707214639269,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 244
},
{
"completion_length": 11.0,
"epoch": 0.5879706014699265,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 245
},
{
"completion_length": 11.0,
"epoch": 0.5903704814759262,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 246
},
{
"completion_length": 10.996527791023254,
"epoch": 0.5927703614819259,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 247
},
{
"completion_length": 10.998263895511627,
"epoch": 0.5951702414879256,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 248
},
{
"completion_length": 10.996527791023254,
"epoch": 0.5975701214939253,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 249
},
{
"completion_length": 10.991319477558136,
"epoch": 0.599970001499925,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 250
},
{
"completion_length": 11.0,
"epoch": 0.6023698815059247,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 251
},
{
"completion_length": 11.0,
"epoch": 0.6047697615119244,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 252
},
{
"completion_length": 11.0,
"epoch": 0.6071696415179241,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 253
},
{
"completion_length": 10.994791686534882,
"epoch": 0.6095695215239239,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 254
},
{
"completion_length": 11.0,
"epoch": 0.6119694015299235,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 255
},
{
"completion_length": 11.0,
"epoch": 0.6143692815359232,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 256
},
{
"completion_length": 10.987847208976746,
"epoch": 0.6167691615419229,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 257
},
{
"completion_length": 10.996527791023254,
"epoch": 0.6191690415479226,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 258
},
{
"completion_length": 10.996527791023254,
"epoch": 0.6215689215539223,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 259
},
{
"completion_length": 11.012152791023254,
"epoch": 0.623968801559922,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 260
},
{
"completion_length": 10.998263895511627,
"epoch": 0.6263686815659217,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 261
},
{
"completion_length": 10.998263895511627,
"epoch": 0.6287685615719214,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 262
},
{
"completion_length": 10.996527791023254,
"epoch": 0.6311684415779211,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 263
},
{
"completion_length": 11.0,
"epoch": 0.6335683215839208,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 264
},
{
"completion_length": 10.998263895511627,
"epoch": 0.6359682015899205,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 265
},
{
"completion_length": 11.0,
"epoch": 0.6383680815959202,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 266
},
{
"completion_length": 11.0,
"epoch": 0.64076796160192,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 267
},
{
"completion_length": 10.996527791023254,
"epoch": 0.6431678416079196,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 268
},
{
"completion_length": 11.0,
"epoch": 0.6455677216139193,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 269
},
{
"completion_length": 11.0,
"epoch": 0.647967601619919,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 270
},
{
"completion_length": 10.998263895511627,
"epoch": 0.6503674816259187,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 271
},
{
"completion_length": 11.0,
"epoch": 0.6527673616319184,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 272
},
{
"completion_length": 11.0,
"epoch": 0.6551672416379181,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 273
},
{
"completion_length": 10.984375059604645,
"epoch": 0.6575671216439178,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 274
},
{
"completion_length": 11.0,
"epoch": 0.6599670016499175,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 275
},
{
"completion_length": 11.0,
"epoch": 0.6623668816559172,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 276
},
{
"completion_length": 11.0,
"epoch": 0.664766761661917,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 277
},
{
"completion_length": 10.998263895511627,
"epoch": 0.6671666416679166,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 278
},
{
"completion_length": 11.0,
"epoch": 0.6695665216739163,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 279
},
{
"completion_length": 11.0,
"epoch": 0.6719664016799161,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 280
},
{
"completion_length": 10.994791686534882,
"epoch": 0.6743662816859157,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 281
},
{
"completion_length": 10.996527791023254,
"epoch": 0.6767661616919154,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 282
},
{
"completion_length": 11.0,
"epoch": 0.6791660416979151,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 283
},
{
"completion_length": 10.980902791023254,
"epoch": 0.6815659217039148,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 284
},
{
"completion_length": 11.0,
"epoch": 0.6839658017099145,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 285
},
{
"completion_length": 10.998263895511627,
"epoch": 0.6863656817159142,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 286
},
{
"completion_length": 10.998263895511627,
"epoch": 0.6887655617219139,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 287
},
{
"completion_length": 11.0,
"epoch": 0.6911654417279136,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 288
},
{
"completion_length": 10.994791686534882,
"epoch": 0.6935653217339133,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 289
},
{
"completion_length": 10.998263895511627,
"epoch": 0.695965201739913,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 290
},
{
"completion_length": 11.0,
"epoch": 0.6983650817459127,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 291
},
{
"completion_length": 11.0,
"epoch": 0.7007649617519124,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 292
},
{
"completion_length": 11.0,
"epoch": 0.703164841757912,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 293
},
{
"completion_length": 10.987847208976746,
"epoch": 0.7055647217639118,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 294
},
{
"completion_length": 10.994791686534882,
"epoch": 0.7079646017699115,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 295
},
{
"completion_length": 11.0,
"epoch": 0.7103644817759112,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 296
},
{
"completion_length": 11.0,
"epoch": 0.7127643617819109,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 297
},
{
"completion_length": 11.0,
"epoch": 0.7151642417879106,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 298
},
{
"completion_length": 10.996527791023254,
"epoch": 0.7175641217939103,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 299
},
{
"completion_length": 11.0,
"epoch": 0.71996400179991,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 300
},
{
"completion_length": 11.0,
"epoch": 0.7223638818059097,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 301
},
{
"completion_length": 11.0,
"epoch": 0.7247637618119094,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 302
},
{
"completion_length": 11.0,
"epoch": 0.7271636418179092,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 303
},
{
"completion_length": 11.0,
"epoch": 0.7295635218239088,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 304
},
{
"completion_length": 10.998263895511627,
"epoch": 0.7319634018299085,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 305
},
{
"completion_length": 11.0,
"epoch": 0.7343632818359082,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 306
},
{
"completion_length": 11.0,
"epoch": 0.7367631618419079,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 307
},
{
"completion_length": 10.996527791023254,
"epoch": 0.7391630418479076,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 308
},
{
"completion_length": 10.993055582046509,
"epoch": 0.7415629218539073,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 309
},
{
"completion_length": 11.0,
"epoch": 0.743962801859907,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 310
},
{
"completion_length": 11.0,
"epoch": 0.7463626818659067,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 311
},
{
"completion_length": 11.0,
"epoch": 0.7487625618719064,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 312
},
{
"completion_length": 11.0,
"epoch": 0.7511624418779062,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 313
},
{
"completion_length": 10.998263895511627,
"epoch": 0.7535623218839058,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 314
},
{
"completion_length": 10.993055582046509,
"epoch": 0.7559622018899055,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 315
},
{
"completion_length": 10.996527791023254,
"epoch": 0.7583620818959053,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 316
},
{
"completion_length": 10.998263895511627,
"epoch": 0.7607619619019049,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 317
},
{
"completion_length": 11.0,
"epoch": 0.7631618419079046,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 318
},
{
"completion_length": 10.998263895511627,
"epoch": 0.7655617219139043,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 319
},
{
"completion_length": 11.0,
"epoch": 0.767961601919904,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 320
},
{
"completion_length": 10.993055582046509,
"epoch": 0.7703614819259037,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 321
},
{
"completion_length": 11.0,
"epoch": 0.7727613619319034,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 322
},
{
"completion_length": 10.979166686534882,
"epoch": 0.7751612419379031,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 323
},
{
"completion_length": 11.0,
"epoch": 0.7775611219439028,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 324
},
{
"completion_length": 10.994791686534882,
"epoch": 0.7799610019499025,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 325
},
{
"completion_length": 10.991319477558136,
"epoch": 0.7823608819559023,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 326
},
{
"completion_length": 11.0,
"epoch": 0.7847607619619019,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 327
},
{
"completion_length": 11.0,
"epoch": 0.7871606419679016,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 328
},
{
"completion_length": 11.0,
"epoch": 0.7895605219739013,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 329
},
{
"completion_length": 11.0,
"epoch": 0.791960401979901,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 330
},
{
"completion_length": 10.993055582046509,
"epoch": 0.7943602819859007,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 331
},
{
"completion_length": 10.98784726858139,
"epoch": 0.7967601619919004,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 332
},
{
"completion_length": 11.0,
"epoch": 0.7991600419979001,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 333
},
{
"completion_length": 11.0,
"epoch": 0.8015599220038998,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 334
},
{
"completion_length": 10.996527791023254,
"epoch": 0.8039598020098995,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 335
},
{
"completion_length": 10.998263895511627,
"epoch": 0.8063596820158992,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 336
},
{
"completion_length": 10.998263895511627,
"epoch": 0.8087595620218989,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 337
},
{
"completion_length": 10.998263895511627,
"epoch": 0.8111594420278986,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 338
},
{
"completion_length": 10.996527791023254,
"epoch": 0.8135593220338984,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 339
},
{
"completion_length": 11.0,
"epoch": 0.815959202039898,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 340
},
{
"completion_length": 11.0,
"epoch": 0.8183590820458977,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 341
},
{
"completion_length": 11.0,
"epoch": 0.8207589620518974,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 342
},
{
"completion_length": 11.0,
"epoch": 0.8231588420578971,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 343
},
{
"completion_length": 11.0,
"epoch": 0.8255587220638968,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 344
},
{
"completion_length": 10.998263895511627,
"epoch": 0.8279586020698965,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 345
},
{
"completion_length": 10.998263895511627,
"epoch": 0.8303584820758962,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 346
},
{
"completion_length": 11.0,
"epoch": 0.8327583620818959,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 347
},
{
"completion_length": 11.0,
"epoch": 0.8351582420878956,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 348
},
{
"completion_length": 11.0,
"epoch": 0.8375581220938954,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 349
},
{
"completion_length": 10.998263895511627,
"epoch": 0.839958002099895,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 350
},
{
"completion_length": 10.979166686534882,
"epoch": 0.8423578821058947,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 351
},
{
"completion_length": 11.0,
"epoch": 0.8447577621118944,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 352
},
{
"completion_length": 11.0,
"epoch": 0.8471576421178941,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 353
},
{
"completion_length": 11.0,
"epoch": 0.8495575221238938,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 354
},
{
"completion_length": 11.0,
"epoch": 0.8519574021298935,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 355
},
{
"completion_length": 11.0,
"epoch": 0.8543572821358932,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 356
},
{
"completion_length": 11.0,
"epoch": 0.8567571621418929,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 357
},
{
"completion_length": 11.0,
"epoch": 0.8591570421478926,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 358
},
{
"completion_length": 10.998263895511627,
"epoch": 0.8615569221538923,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 359
},
{
"completion_length": 11.0,
"epoch": 0.863956802159892,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 360
},
{
"completion_length": 11.0,
"epoch": 0.8663566821658917,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 361
},
{
"completion_length": 11.022569477558136,
"epoch": 0.8687565621718915,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 362
},
{
"completion_length": 11.0,
"epoch": 0.8711564421778911,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 363
},
{
"completion_length": 10.998263895511627,
"epoch": 0.8735563221838908,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 364
},
{
"completion_length": 10.998263895511627,
"epoch": 0.8759562021898905,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 365
},
{
"completion_length": 11.0,
"epoch": 0.8783560821958902,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 366
},
{
"completion_length": 11.0,
"epoch": 0.8807559622018899,
"grad_norm": 0.02790955826640129,
"learning_rate": 1e-06,
"loss": -0.0,
"reward": -0.005975749343633652,
"reward_std": 0.014421098865568638,
"rewards/semantic_entropy_math_reward": -0.005975749343633652,
"step": 367
},
{
"completion_length": 11.0,
"epoch": 0.8831558422078896,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 368
},
{
"completion_length": 11.0,
"epoch": 0.8855557222138893,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 369
},
{
"completion_length": 11.0,
"epoch": 0.887955602219889,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 370
},
{
"completion_length": 11.0,
"epoch": 0.8903554822258887,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 371
},
{
"completion_length": 11.0,
"epoch": 0.8927553622318884,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 372
},
{
"completion_length": 10.996527791023254,
"epoch": 0.8951552422378881,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 373
},
{
"completion_length": 11.0,
"epoch": 0.8975551222438878,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 374
},
{
"completion_length": 10.996527791023254,
"epoch": 0.8999550022498876,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 375
},
{
"completion_length": 11.0,
"epoch": 0.9023548822558872,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 376
},
{
"completion_length": 10.998263895511627,
"epoch": 0.9047547622618869,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 377
},
{
"completion_length": 11.0,
"epoch": 0.9071546422678866,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 378
},
{
"completion_length": 11.0,
"epoch": 0.9095545222738863,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 379
},
{
"completion_length": 11.0,
"epoch": 0.911954402279886,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 380
},
{
"completion_length": 11.0,
"epoch": 0.9143542822858857,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 381
},
{
"completion_length": 10.998263895511627,
"epoch": 0.9167541622918854,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 382
},
{
"completion_length": 11.0,
"epoch": 0.9191540422978851,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 383
},
{
"completion_length": 10.993055582046509,
"epoch": 0.9215539223038848,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 384
},
{
"completion_length": 10.979166686534882,
"epoch": 0.9239538023098846,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 385
},
{
"completion_length": 10.994791686534882,
"epoch": 0.9263536823158842,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 386
},
{
"completion_length": 10.996527791023254,
"epoch": 0.9287535623218839,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 387
},
{
"completion_length": 10.998263895511627,
"epoch": 0.9311534423278836,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 388
},
{
"completion_length": 10.996527791023254,
"epoch": 0.9335533223338833,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 389
},
{
"completion_length": 11.0,
"epoch": 0.935953202339883,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 390
},
{
"completion_length": 10.998263895511627,
"epoch": 0.9383530823458827,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 391
},
{
"completion_length": 11.0,
"epoch": 0.9407529623518824,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 392
},
{
"completion_length": 10.998263895511627,
"epoch": 0.9431528423578821,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 393
},
{
"completion_length": 11.0,
"epoch": 0.9455527223638818,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 394
},
{
"completion_length": 10.998263895511627,
"epoch": 0.9479526023698815,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 395
},
{
"completion_length": 11.0,
"epoch": 0.9503524823758812,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 396
},
{
"completion_length": 10.996527791023254,
"epoch": 0.9527523623818809,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 397
},
{
"completion_length": 11.0,
"epoch": 0.9551522423878807,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 398
},
{
"completion_length": 11.0,
"epoch": 0.9575521223938803,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 399
},
{
"completion_length": 10.996527791023254,
"epoch": 0.95995200239988,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 400
},
{
"completion_length": 11.0,
"epoch": 0.9623518824058797,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 401
},
{
"completion_length": 10.998263895511627,
"epoch": 0.9647517624118794,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 402
},
{
"completion_length": 10.998263895511627,
"epoch": 0.9671516424178791,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 403
},
{
"completion_length": 10.996527791023254,
"epoch": 0.9695515224238788,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 404
},
{
"completion_length": 10.994791686534882,
"epoch": 0.9719514024298785,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 405
},
{
"completion_length": 11.038194477558136,
"epoch": 0.9743512824358782,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 406
},
{
"completion_length": 11.0,
"epoch": 0.9767511624418779,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 407
},
{
"completion_length": 11.0,
"epoch": 0.9791510424478777,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 408
},
{
"completion_length": 10.998263895511627,
"epoch": 0.9815509224538773,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 409
},
{
"completion_length": 11.0,
"epoch": 0.983950802459877,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 410
},
{
"completion_length": 10.998263895511627,
"epoch": 0.9863506824658768,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 411
},
{
"completion_length": 11.0,
"epoch": 0.9887505624718764,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 412
},
{
"completion_length": 11.0,
"epoch": 0.9911504424778761,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 413
},
{
"completion_length": 11.0,
"epoch": 0.9935503224838758,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 414
},
{
"completion_length": 11.0,
"epoch": 0.9959502024898755,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 415
},
{
"completion_length": 11.0,
"epoch": 0.9983500824958752,
"grad_norm": 0.0,
"learning_rate": 1e-06,
"loss": 0.0,
"reward": 0.0,
"reward_std": 0.0,
"rewards/semantic_entropy_math_reward": 0.0,
"step": 416
},
{
"epoch": 0.9983500824958752,
"step": 416,
"total_flos": 0.0,
"train_loss": -8.8036603985719e-09,
"train_runtime": 28166.6905,
"train_samples_per_second": 0.71,
"train_steps_per_second": 0.015
}
],
"logging_steps": 1,
"max_steps": 416,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 10,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}