{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 14.895626822157434, "eval_steps": 500, "global_step": 1440, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014927455357142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4068.0, "completions/mean_length": 604.30419921875, "completions/mean_terminated_length": 551.3922119140625, "completions/min_length": 10.0, "completions/min_terminated_length": 10.0, "epoch": 0.009329446064139942, "grad_norm": 0.15262630581855774, "learning_rate": 7.5e-07, "loss": 0.0054, "num_tokens": 9225409.0, "reward": 0.4954659938812256, "reward_std": 0.2608823776245117, "rewards/simpleverify_reward/mean": 0.4954659640789032, "rewards/simpleverify_reward/std": 0.4999968409538269, "step": 1 }, { "clip_ratio/high_max": 0.0021828670796821825, "clip_ratio/high_mean": 0.0010430404508952051, "clip_ratio/low_mean": 0.0005800386825285386, "clip_ratio/low_min": 3.6041175917489454e-05, "clip_ratio/region_mean": 0.001623079111595871, "epoch": 0.018658892128279883, "grad_norm": 0.13722161948680878, "learning_rate": 7.5e-07, "loss": -0.0247, "step": 2 }, { "clip_ratio/high_max": 0.0024839645775500685, "clip_ratio/high_mean": 0.0010180762346863048, "clip_ratio/low_mean": 0.0007331500710279215, "clip_ratio/low_min": 9.60086726990994e-05, "clip_ratio/region_mean": 0.0017512262056698091, "epoch": 0.027988338192419825, "grad_norm": 0.14988236129283905, "learning_rate": 7.5e-07, "loss": 0.0268, "step": 3 }, { "clip_ratio/high_max": 0.0026332422057748772, "clip_ratio/high_mean": 0.001171677591628395, "clip_ratio/low_mean": 0.0008862328613759018, "clip_ratio/low_min": 0.00015419408919115085, "clip_ratio/region_mean": 0.0020579104239004664, "epoch": 0.037317784256559766, "grad_norm": 0.1456938534975052, "learning_rate": 7.5e-07, "loss": 0.0513, "step": 4 }, { "clip_ratio/high_max": 0.0029758213277091272, "clip_ratio/high_mean": 0.0013013902498641983, "clip_ratio/low_mean": 0.0009558066994941328, "clip_ratio/low_min": 0.00012350576253084, "clip_ratio/region_mean": 0.0022571969748241827, "epoch": 0.04664723032069971, "grad_norm": 0.14521899819374084, "learning_rate": 7.5e-07, "loss": -0.014, "step": 5 }, { "clip_ratio/high_max": 0.0028507656388683245, "clip_ratio/high_mean": 0.0011477734442451037, "clip_ratio/low_mean": 0.0008111505831038812, "clip_ratio/low_min": 9.107571986533003e-05, "clip_ratio/region_mean": 0.001958924032805953, "epoch": 0.05597667638483965, "grad_norm": 0.1093846932053566, "learning_rate": 7.5e-07, "loss": -0.0237, "step": 6 }, { "clip_ratio/high_max": 0.002388161519775167, "clip_ratio/high_mean": 0.0011495671642478555, "clip_ratio/low_mean": 0.0012097475337213837, "clip_ratio/low_min": 0.00019700598750205245, "clip_ratio/region_mean": 0.002359314705245197, "epoch": 0.0653061224489796, "grad_norm": 0.13794727623462677, "learning_rate": 7.5e-07, "loss": -0.0004, "step": 7 }, { "clip_ratio/high_max": 0.0029256779816932976, "clip_ratio/high_mean": 0.00139823788049398, "clip_ratio/low_mean": 0.0011901162324647885, "clip_ratio/low_min": 0.0002595718915472389, "clip_ratio/region_mean": 0.0025883541311486624, "epoch": 0.07463556851311953, "grad_norm": 0.13317736983299255, "learning_rate": 7.5e-07, "loss": -0.0294, "step": 8 }, { "clip_ratio/high_max": 0.0030002644925843924, "clip_ratio/high_mean": 0.0012145928521931637, "clip_ratio/low_mean": 0.0012977576516277622, "clip_ratio/low_min": 0.00019238885579397902, "clip_ratio/region_mean": 0.0025123504747170955, "epoch": 0.08396501457725948, "grad_norm": 0.13477958738803864, "learning_rate": 7.5e-07, "loss": -0.0225, "step": 9 }, { "clip_ratio/high_max": 0.0031748560359119438, "clip_ratio/high_mean": 0.001460789077100344, "clip_ratio/low_mean": 0.0012950791460752953, "clip_ratio/low_min": 0.00020445266909518978, "clip_ratio/region_mean": 0.002755868117674254, "epoch": 0.09329446064139942, "grad_norm": 0.14634506404399872, "learning_rate": 7.5e-07, "loss": 0.0016, "step": 10 }, { "clip_ratio/high_max": 0.002592909244413022, "clip_ratio/high_mean": 0.0011647609790088609, "clip_ratio/low_mean": 0.001416679679095978, "clip_ratio/low_min": 0.00023174261605163338, "clip_ratio/region_mean": 0.0025814406617428176, "epoch": 0.10262390670553936, "grad_norm": 0.1383267492055893, "learning_rate": 7.5e-07, "loss": 0.044, "step": 11 }, { "clip_ratio/high_max": 0.0026922100223600864, "clip_ratio/high_mean": 0.0012577233246702235, "clip_ratio/low_mean": 0.0012580613110912964, "clip_ratio/low_min": 9.133945241046604e-05, "clip_ratio/region_mean": 0.0025157846393994987, "epoch": 0.1119533527696793, "grad_norm": 0.14850546419620514, "learning_rate": 7.5e-07, "loss": -0.0372, "step": 12 }, { "clip_ratio/high_max": 0.0032007027766667306, "clip_ratio/high_mean": 0.0012597993409144692, "clip_ratio/low_mean": 0.0011257350452069659, "clip_ratio/low_min": 7.65014465287095e-05, "clip_ratio/region_mean": 0.0023855344115872867, "epoch": 0.12128279883381925, "grad_norm": 0.12946826219558716, "learning_rate": 7.5e-07, "loss": -0.0288, "step": 13 }, { "clip_ratio/high_max": 0.002284749185491819, "clip_ratio/high_mean": 0.0012079480620741379, "clip_ratio/low_mean": 0.0012544916935439687, "clip_ratio/low_min": 0.00027166882318852004, "clip_ratio/region_mean": 0.002462439784721937, "epoch": 0.1306122448979592, "grad_norm": 0.11876867711544037, "learning_rate": 7.5e-07, "loss": 0.0341, "step": 14 }, { "clip_ratio/high_max": 0.0025012288606376387, "clip_ratio/high_mean": 0.001133116464188788, "clip_ratio/low_mean": 0.001184020013170084, "clip_ratio/low_min": 0.00021401104004326044, "clip_ratio/region_mean": 0.002317136495548766, "epoch": 0.13994169096209913, "grad_norm": 0.13809596002101898, "learning_rate": 7.5e-07, "loss": 0.0363, "step": 15 }, { "clip_ratio/high_max": 0.0025527154284645803, "clip_ratio/high_mean": 0.0011831514275399968, "clip_ratio/low_mean": 0.0010974943543260451, "clip_ratio/low_min": 0.0001039122544170823, "clip_ratio/region_mean": 0.002280645814607851, "epoch": 0.14927113702623906, "grad_norm": 0.1291833072900772, "learning_rate": 7.5e-07, "loss": -0.0026, "step": 16 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013253348214285698, "completions/max_length": 4096.0, "completions/max_terminated_length": 4077.0, "completions/mean_length": 605.7340698242188, "completions/mean_terminated_length": 558.8550415039062, "completions/min_length": 2.0, "completions/min_terminated_length": 2.0, "epoch": 0.158600583090379, "grad_norm": 0.1443503350019455, "learning_rate": 7.5e-07, "loss": 0.0033, "num_tokens": 18540556.0, "reward": 0.5189732313156128, "reward_std": 0.24857795238494873, "rewards/simpleverify_reward/mean": 0.5189732313156128, "rewards/simpleverify_reward/std": 0.4996573030948639, "step": 17 }, { "clip_ratio/high_max": 0.0025155939292744733, "clip_ratio/high_mean": 0.0010950273353955708, "clip_ratio/low_mean": 0.0005706984047719743, "clip_ratio/low_min": 7.936093243188225e-05, "clip_ratio/region_mean": 0.0016657257292536087, "epoch": 0.16793002915451896, "grad_norm": 0.13847780227661133, "learning_rate": 7.5e-07, "loss": -0.047, "step": 18 }, { "clip_ratio/high_max": 0.002194731219788082, "clip_ratio/high_mean": 0.00095924231573008, "clip_ratio/low_mean": 0.0007444019338436192, "clip_ratio/low_min": 6.650098112004343e-05, "clip_ratio/region_mean": 0.001703644280496519, "epoch": 0.1772594752186589, "grad_norm": 0.14865314960479736, "learning_rate": 7.5e-07, "loss": 0.0215, "step": 19 }, { "clip_ratio/high_max": 0.002392419191892259, "clip_ratio/high_mean": 0.0010380012245150283, "clip_ratio/low_mean": 0.0006614162921323441, "clip_ratio/low_min": 0.00010051229037344456, "clip_ratio/region_mean": 0.0016994175093714148, "epoch": 0.18658892128279883, "grad_norm": 0.12759557366371155, "learning_rate": 7.5e-07, "loss": -0.0109, "step": 20 }, { "clip_ratio/high_max": 0.0023648841088288464, "clip_ratio/high_mean": 0.0010217004164587706, "clip_ratio/low_mean": 0.0005771556225226959, "clip_ratio/low_min": 4.630241164704785e-05, "clip_ratio/region_mean": 0.0015988560262485407, "epoch": 0.19591836734693877, "grad_norm": 0.11540969461202621, "learning_rate": 7.5e-07, "loss": -0.0037, "step": 21 }, { "clip_ratio/high_max": 0.002030924541031709, "clip_ratio/high_mean": 0.0009217612587235635, "clip_ratio/low_mean": 0.0008256337532657199, "clip_ratio/low_min": 7.912302680779248e-05, "clip_ratio/region_mean": 0.0017473950429121032, "epoch": 0.20524781341107873, "grad_norm": 0.12672258913516998, "learning_rate": 7.5e-07, "loss": 0.056, "step": 22 }, { "clip_ratio/high_max": 0.0024981169844977558, "clip_ratio/high_mean": 0.0011217312421649694, "clip_ratio/low_mean": 0.0007321338489418849, "clip_ratio/low_min": 6.140835284895729e-05, "clip_ratio/region_mean": 0.0018538650692789815, "epoch": 0.21457725947521866, "grad_norm": 0.13716308772563934, "learning_rate": 7.5e-07, "loss": -0.0142, "step": 23 }, { "clip_ratio/high_max": 0.0025668708549346775, "clip_ratio/high_mean": 0.0011740433856175514, "clip_ratio/low_mean": 0.0006818987094447948, "clip_ratio/low_min": 3.0852471354592126e-05, "clip_ratio/region_mean": 0.001855942071415484, "epoch": 0.2239067055393586, "grad_norm": 0.14446280896663666, "learning_rate": 7.5e-07, "loss": -0.0584, "step": 24 }, { "clip_ratio/high_max": 0.0020179311832180247, "clip_ratio/high_mean": 0.0010031935962615535, "clip_ratio/low_mean": 0.0008993575665954268, "clip_ratio/low_min": 8.76979702297831e-05, "clip_ratio/region_mean": 0.0019025511282961816, "epoch": 0.23323615160349853, "grad_norm": 0.12553226947784424, "learning_rate": 7.5e-07, "loss": -0.019, "step": 25 }, { "clip_ratio/high_max": 0.00245853366504889, "clip_ratio/high_mean": 0.0011101950985903386, "clip_ratio/low_mean": 0.0008716051634110045, "clip_ratio/low_min": 0.00013052270878688432, "clip_ratio/region_mean": 0.0019818002547253855, "epoch": 0.2425655976676385, "grad_norm": 0.13147656619548798, "learning_rate": 7.5e-07, "loss": -0.0071, "step": 26 }, { "clip_ratio/high_max": 0.002321844491234515, "clip_ratio/high_mean": 0.0010510854881431442, "clip_ratio/low_mean": 0.0009835931741690729, "clip_ratio/low_min": 0.0001354602227365831, "clip_ratio/region_mean": 0.00203467868414009, "epoch": 0.2518950437317784, "grad_norm": 0.14126136898994446, "learning_rate": 7.5e-07, "loss": 0.0156, "step": 27 }, { "clip_ratio/high_max": 0.0024034341622609645, "clip_ratio/high_mean": 0.000982941144684446, "clip_ratio/low_mean": 0.0010366282331233378, "clip_ratio/low_min": 0.00013326032967597712, "clip_ratio/region_mean": 0.002019569401454646, "epoch": 0.2612244897959184, "grad_norm": 0.12237707525491714, "learning_rate": 7.5e-07, "loss": 0.0299, "step": 28 }, { "clip_ratio/high_max": 0.0027008155520888977, "clip_ratio/high_mean": 0.0012702350541076157, "clip_ratio/low_mean": 0.001234258190379478, "clip_ratio/low_min": 0.00018924228606920224, "clip_ratio/region_mean": 0.0025044932335731573, "epoch": 0.2705539358600583, "grad_norm": 0.13398613035678864, "learning_rate": 7.5e-07, "loss": 0.0023, "step": 29 }, { "clip_ratio/high_max": 0.002312462165718898, "clip_ratio/high_mean": 0.001096550840884447, "clip_ratio/low_mean": 0.0010547848760324996, "clip_ratio/low_min": 0.00019179949231329374, "clip_ratio/region_mean": 0.0021513357787625864, "epoch": 0.27988338192419826, "grad_norm": 0.12476468831300735, "learning_rate": 7.5e-07, "loss": 0.0042, "step": 30 }, { "clip_ratio/high_max": 0.002269750642881263, "clip_ratio/high_mean": 0.0010518722956476267, "clip_ratio/low_mean": 0.0012249405936017865, "clip_ratio/low_min": 0.000124344904179452, "clip_ratio/region_mean": 0.002276812920172233, "epoch": 0.2892128279883382, "grad_norm": 0.11031439155340195, "learning_rate": 7.5e-07, "loss": 0.0145, "step": 31 }, { "clip_ratio/high_max": 0.0022832488393760286, "clip_ratio/high_mean": 0.0010068188184959581, "clip_ratio/low_mean": 0.0011639767162705539, "clip_ratio/low_min": 0.0001869746902229963, "clip_ratio/region_mean": 0.0021707955238525756, "epoch": 0.29854227405247813, "grad_norm": 0.12717770040035248, "learning_rate": 7.5e-07, "loss": -0.0028, "step": 32 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015276227678571397, "completions/max_length": 4096.0, "completions/max_terminated_length": 4040.0, "completions/mean_length": 616.2281494140625, "completions/mean_terminated_length": 562.2456665039062, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 0.30787172011661806, "grad_norm": 0.13035358488559723, "learning_rate": 7.5e-07, "loss": 0.001, "num_tokens": 27915162.0, "reward": 0.5223912000656128, "reward_std": 0.22965237498283386, "rewards/simpleverify_reward/mean": 0.5223912000656128, "rewards/simpleverify_reward/std": 0.4995158016681671, "step": 33 }, { "clip_ratio/high_max": 0.0022141269291751087, "clip_ratio/high_mean": 0.000886543704837095, "clip_ratio/low_mean": 0.0005605877804555348, "clip_ratio/low_min": 2.6843235900742002e-05, "clip_ratio/region_mean": 0.001447131475288188, "epoch": 0.317201166180758, "grad_norm": 0.12810876965522766, "learning_rate": 7.5e-07, "loss": -0.0064, "step": 34 }, { "clip_ratio/high_max": 0.0019310583084006794, "clip_ratio/high_mean": 0.0008215308407670818, "clip_ratio/low_mean": 0.0006271193124121055, "clip_ratio/low_min": 4.523454026639229e-05, "clip_ratio/region_mean": 0.0014486501495412085, "epoch": 0.32653061224489793, "grad_norm": 0.13073325157165527, "learning_rate": 7.5e-07, "loss": 0.0392, "step": 35 }, { "clip_ratio/high_max": 0.0020206386179779656, "clip_ratio/high_mean": 0.000987433631962631, "clip_ratio/low_mean": 0.0006216783513082191, "clip_ratio/low_min": 4.707551124738529e-05, "clip_ratio/region_mean": 0.001609112019650638, "epoch": 0.3358600583090379, "grad_norm": 0.13293327391147614, "learning_rate": 7.5e-07, "loss": -0.0385, "step": 36 }, { "clip_ratio/high_max": 0.0022661817638436332, "clip_ratio/high_mean": 0.0009414966370968614, "clip_ratio/low_mean": 0.0006957539899303811, "clip_ratio/low_min": 8.440471992798848e-05, "clip_ratio/region_mean": 0.0016372505997424014, "epoch": 0.34518950437317786, "grad_norm": 0.12524276971817017, "learning_rate": 7.5e-07, "loss": -0.0409, "step": 37 }, { "clip_ratio/high_max": 0.0023337756720138714, "clip_ratio/high_mean": 0.000931976806896273, "clip_ratio/low_mean": 0.0006870176566735609, "clip_ratio/low_min": 1.528864959254861e-05, "clip_ratio/region_mean": 0.0016189944872166961, "epoch": 0.3545189504373178, "grad_norm": 0.12289758771657944, "learning_rate": 7.5e-07, "loss": -0.0399, "step": 38 }, { "clip_ratio/high_max": 0.0020693456535809673, "clip_ratio/high_mean": 0.0009962830154108815, "clip_ratio/low_mean": 0.0008399674552492797, "clip_ratio/low_min": 3.6390101740835235e-05, "clip_ratio/region_mean": 0.0018362504633842036, "epoch": 0.3638483965014577, "grad_norm": 0.12843535840511322, "learning_rate": 7.5e-07, "loss": -0.0138, "step": 39 }, { "clip_ratio/high_max": 0.0021056154946563765, "clip_ratio/high_mean": 0.0010506733397050994, "clip_ratio/low_mean": 0.000891108691575937, "clip_ratio/low_min": 3.750015730474843e-05, "clip_ratio/region_mean": 0.001941782124049496, "epoch": 0.37317784256559766, "grad_norm": 0.12484913319349289, "learning_rate": 7.5e-07, "loss": 0.0429, "step": 40 }, { "clip_ratio/high_max": 0.002062869163637515, "clip_ratio/high_mean": 0.0008534748521924485, "clip_ratio/low_mean": 0.0009273841951653594, "clip_ratio/low_min": 9.9997324468859e-05, "clip_ratio/region_mean": 0.0017808590200729668, "epoch": 0.3825072886297376, "grad_norm": 0.12544208765029907, "learning_rate": 7.5e-07, "loss": 0.0226, "step": 41 }, { "clip_ratio/high_max": 0.002386418251262512, "clip_ratio/high_mean": 0.0010104613902512938, "clip_ratio/low_mean": 0.0009486152175668394, "clip_ratio/low_min": 0.00010511766868148698, "clip_ratio/region_mean": 0.0019590766678447835, "epoch": 0.39183673469387753, "grad_norm": 0.12138772755861282, "learning_rate": 7.5e-07, "loss": 0.013, "step": 42 }, { "clip_ratio/high_max": 0.0022618132643401623, "clip_ratio/high_mean": 0.0010001373884733766, "clip_ratio/low_mean": 0.0008027535322980839, "clip_ratio/low_min": 8.907800838642288e-05, "clip_ratio/region_mean": 0.001802890925318934, "epoch": 0.40116618075801747, "grad_norm": 0.1254751831293106, "learning_rate": 7.5e-07, "loss": -0.0348, "step": 43 }, { "clip_ratio/high_max": 0.002218240544607397, "clip_ratio/high_mean": 0.0009985852775571402, "clip_ratio/low_mean": 0.0010783568868646398, "clip_ratio/low_min": 0.00010571134316705866, "clip_ratio/region_mean": 0.0020769421171280555, "epoch": 0.41049562682215746, "grad_norm": 0.12117357552051544, "learning_rate": 7.5e-07, "loss": 0.0038, "step": 44 }, { "clip_ratio/high_max": 0.0021810904327139724, "clip_ratio/high_mean": 0.0009179512580885785, "clip_ratio/low_mean": 0.0010112958298122976, "clip_ratio/low_min": 0.0001515403628218337, "clip_ratio/region_mean": 0.001929247118823696, "epoch": 0.4198250728862974, "grad_norm": 0.11569032818078995, "learning_rate": 7.5e-07, "loss": 0.0133, "step": 45 }, { "clip_ratio/high_max": 0.0023332869823207147, "clip_ratio/high_mean": 0.0009909142499964219, "clip_ratio/low_mean": 0.000990788612398319, "clip_ratio/low_min": 0.00014422667300095782, "clip_ratio/region_mean": 0.0019817028151010163, "epoch": 0.4291545189504373, "grad_norm": 0.12064990401268005, "learning_rate": 7.5e-07, "loss": -0.0049, "step": 46 }, { "clip_ratio/high_max": 0.002616253710584715, "clip_ratio/high_mean": 0.0010757240088423714, "clip_ratio/low_mean": 0.001025089470203966, "clip_ratio/low_min": 7.510272462241119e-05, "clip_ratio/region_mean": 0.0021008135154261254, "epoch": 0.43848396501457726, "grad_norm": 0.12389708310365677, "learning_rate": 7.5e-07, "loss": 0.0064, "step": 47 }, { "clip_ratio/high_max": 0.0017254929734917823, "clip_ratio/high_mean": 0.0008382853457078454, "clip_ratio/low_mean": 0.0011024527120753191, "clip_ratio/low_min": 9.103407865040936e-05, "clip_ratio/region_mean": 0.0019407380314078182, "epoch": 0.4478134110787172, "grad_norm": 0.11195557564496994, "learning_rate": 7.5e-07, "loss": 0.0304, "step": 48 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.014578683035714302, "completions/max_length": 4096.0, "completions/max_terminated_length": 3930.0, "completions/mean_length": 599.2545166015625, "completions/mean_terminated_length": 547.5223388671875, "completions/min_length": 6.0, "completions/min_terminated_length": 6.0, "epoch": 0.45714285714285713, "grad_norm": 0.13535796105861664, "learning_rate": 7.5e-07, "loss": 0.0245, "num_tokens": 37050034.0, "reward": 0.5533621907234192, "reward_std": 0.22587960958480835, "rewards/simpleverify_reward/mean": 0.5533621907234192, "rewards/simpleverify_reward/std": 0.4971616566181183, "step": 49 }, { "clip_ratio/high_max": 0.0023655610493733548, "clip_ratio/high_mean": 0.0009841951650741976, "clip_ratio/low_mean": 0.0006762307330063777, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001660425892623607, "epoch": 0.46647230320699706, "grad_norm": 0.12884028255939484, "learning_rate": 7.5e-07, "loss": 0.042, "step": 50 }, { "clip_ratio/high_max": 0.001919005771924276, "clip_ratio/high_mean": 0.0008840184327709721, "clip_ratio/low_mean": 0.0006199974050105084, "clip_ratio/low_min": 5.866589071956696e-05, "clip_ratio/region_mean": 0.0015040158432384487, "epoch": 0.47580174927113705, "grad_norm": 0.12096419185400009, "learning_rate": 7.5e-07, "loss": 0.0369, "step": 51 }, { "clip_ratio/high_max": 0.0024325455306097865, "clip_ratio/high_mean": 0.0009969210841518361, "clip_ratio/low_mean": 0.0008020549685170408, "clip_ratio/low_min": 9.882146878226195e-05, "clip_ratio/region_mean": 0.0017989760017371736, "epoch": 0.485131195335277, "grad_norm": 0.13347789645195007, "learning_rate": 7.5e-07, "loss": 0.0406, "step": 52 }, { "clip_ratio/high_max": 0.0023233584797708318, "clip_ratio/high_mean": 0.000887008605786832, "clip_ratio/low_mean": 0.0006776669979444705, "clip_ratio/low_min": 4.040324256493477e-05, "clip_ratio/region_mean": 0.001564675651025027, "epoch": 0.4944606413994169, "grad_norm": 0.12443052977323532, "learning_rate": 7.5e-07, "loss": -0.0314, "step": 53 }, { "clip_ratio/high_max": 0.0022277103453234304, "clip_ratio/high_mean": 0.0009176050807582214, "clip_ratio/low_mean": 0.0007205460387922358, "clip_ratio/low_min": 1.8628912584972568e-05, "clip_ratio/region_mean": 0.0016381510758947115, "epoch": 0.5037900874635568, "grad_norm": 0.12363655120134354, "learning_rate": 7.5e-07, "loss": 0.0119, "step": 54 }, { "clip_ratio/high_max": 0.0022800405786256306, "clip_ratio/high_mean": 0.0009724547417135909, "clip_ratio/low_mean": 0.0006870526995044202, "clip_ratio/low_min": 2.7311721169098746e-05, "clip_ratio/region_mean": 0.0016595074266660959, "epoch": 0.5131195335276968, "grad_norm": 0.12403528392314911, "learning_rate": 7.5e-07, "loss": 0.0135, "step": 55 }, { "clip_ratio/high_max": 0.0025900038017425686, "clip_ratio/high_mean": 0.0010997623248840682, "clip_ratio/low_mean": 0.0008103067539195763, "clip_ratio/low_min": 1.8706974515225738e-05, "clip_ratio/region_mean": 0.0019100690769846551, "epoch": 0.5224489795918368, "grad_norm": 0.12608177959918976, "learning_rate": 7.5e-07, "loss": -0.0287, "step": 56 }, { "clip_ratio/high_max": 0.0022254520881688222, "clip_ratio/high_mean": 0.0010691310544643784, "clip_ratio/low_mean": 0.0008073027165664826, "clip_ratio/low_min": 8.356023863598239e-05, "clip_ratio/region_mean": 0.001876433780125808, "epoch": 0.5317784256559767, "grad_norm": 0.12500731647014618, "learning_rate": 7.5e-07, "loss": 0.0114, "step": 57 }, { "clip_ratio/high_max": 0.0022869183449074626, "clip_ratio/high_mean": 0.001007811963063432, "clip_ratio/low_mean": 0.0008094080130831571, "clip_ratio/low_min": 4.0697772419662215e-05, "clip_ratio/region_mean": 0.0018172199779655784, "epoch": 0.5411078717201167, "grad_norm": 0.12129050493240356, "learning_rate": 7.5e-07, "loss": -0.0336, "step": 58 }, { "clip_ratio/high_max": 0.0024718895510886796, "clip_ratio/high_mean": 0.0010803790974023286, "clip_ratio/low_mean": 0.0009299161738454131, "clip_ratio/low_min": 0.00012436679753591307, "clip_ratio/region_mean": 0.002010295291256625, "epoch": 0.5504373177842565, "grad_norm": 0.13225725293159485, "learning_rate": 7.5e-07, "loss": 0.0137, "step": 59 }, { "clip_ratio/high_max": 0.002428885636618361, "clip_ratio/high_mean": 0.0011203889043827076, "clip_ratio/low_mean": 0.000982267780273105, "clip_ratio/low_min": 9.975695320463274e-05, "clip_ratio/region_mean": 0.002102656682836823, "epoch": 0.5597667638483965, "grad_norm": 0.12820260226726532, "learning_rate": 7.5e-07, "loss": -0.0136, "step": 60 }, { "clip_ratio/high_max": 0.00245327837910736, "clip_ratio/high_mean": 0.0011375888716429472, "clip_ratio/low_mean": 0.0010340037260903046, "clip_ratio/low_min": 5.253732524579391e-05, "clip_ratio/region_mean": 0.0021715925686294213, "epoch": 0.5690962099125364, "grad_norm": 0.14021356403827667, "learning_rate": 7.5e-07, "loss": -0.0115, "step": 61 }, { "clip_ratio/high_max": 0.002329655639186967, "clip_ratio/high_mean": 0.000988372399660875, "clip_ratio/low_mean": 0.0010402615553175565, "clip_ratio/low_min": 8.761466688156361e-05, "clip_ratio/region_mean": 0.002028633956797421, "epoch": 0.5784256559766764, "grad_norm": 0.1274394989013672, "learning_rate": 7.5e-07, "loss": -0.0051, "step": 62 }, { "clip_ratio/high_max": 0.0026647734994185157, "clip_ratio/high_mean": 0.001089242818125058, "clip_ratio/low_mean": 0.0011810681171482429, "clip_ratio/low_min": 0.00017129157822637353, "clip_ratio/region_mean": 0.0022703109061694704, "epoch": 0.5877551020408164, "grad_norm": 0.12593336403369904, "learning_rate": 7.5e-07, "loss": 0.04, "step": 63 }, { "clip_ratio/high_max": 0.0026520584215177223, "clip_ratio/high_mean": 0.0011334529845044017, "clip_ratio/low_mean": 0.0011385327998141292, "clip_ratio/low_min": 0.0001238934328284813, "clip_ratio/region_mean": 0.0022719857952324674, "epoch": 0.5970845481049563, "grad_norm": 0.12692873179912567, "learning_rate": 7.5e-07, "loss": -0.0132, "step": 64 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.017299107142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4015.0, "completions/mean_length": 630.8339233398438, "completions/mean_terminated_length": 569.8344116210938, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 0.6064139941690962, "grad_norm": 0.1351737678050995, "learning_rate": 7.5e-07, "loss": 0.0375, "num_tokens": 46552261.0, "reward": 0.5427595376968384, "reward_std": 0.22168387472629547, "rewards/simpleverify_reward/mean": 0.5427594780921936, "rewards/simpleverify_reward/std": 0.4981856644153595, "step": 65 }, { "clip_ratio/high_max": 0.0016777687887952197, "clip_ratio/high_mean": 0.000660038258502027, "clip_ratio/low_mean": 0.0006890730383020127, "clip_ratio/low_min": 5.0007231038762257e-05, "clip_ratio/region_mean": 0.0013491112913470715, "epoch": 0.6157434402332361, "grad_norm": 0.11591223627328873, "learning_rate": 7.5e-07, "loss": 0.0643, "step": 66 }, { "clip_ratio/high_max": 0.002224428953923052, "clip_ratio/high_mean": 0.0009810924475459615, "clip_ratio/low_mean": 0.0006771427360945381, "clip_ratio/low_min": 1.6104097085190006e-05, "clip_ratio/region_mean": 0.0016582351963734254, "epoch": 0.6250728862973761, "grad_norm": 0.1309317797422409, "learning_rate": 7.5e-07, "loss": 0.0204, "step": 67 }, { "clip_ratio/high_max": 0.002192222011217382, "clip_ratio/high_mean": 0.0009547087756800465, "clip_ratio/low_mean": 0.0006482307107944507, "clip_ratio/low_min": 3.9251761336345226e-05, "clip_ratio/region_mean": 0.001602939453732688, "epoch": 0.634402332361516, "grad_norm": 0.11639600992202759, "learning_rate": 7.5e-07, "loss": -0.0091, "step": 68 }, { "clip_ratio/high_max": 0.002497686553397216, "clip_ratio/high_mean": 0.001005971946142381, "clip_ratio/low_mean": 0.0006685115167783806, "clip_ratio/low_min": 2.1724017642554827e-05, "clip_ratio/region_mean": 0.0016744834720157087, "epoch": 0.643731778425656, "grad_norm": 0.12290813773870468, "learning_rate": 7.5e-07, "loss": 0.0354, "step": 69 }, { "clip_ratio/high_max": 0.002126446233887691, "clip_ratio/high_mean": 0.000909708618564764, "clip_ratio/low_mean": 0.0006319246149359969, "clip_ratio/low_min": 1.5443538359249942e-05, "clip_ratio/region_mean": 0.0015416332462336868, "epoch": 0.6530612244897959, "grad_norm": 0.12035003304481506, "learning_rate": 7.5e-07, "loss": -0.0218, "step": 70 }, { "clip_ratio/high_max": 0.0021898312988923863, "clip_ratio/high_mean": 0.001000202753857593, "clip_ratio/low_mean": 0.0006587938587472308, "clip_ratio/low_min": 5.064456036052434e-05, "clip_ratio/region_mean": 0.001658996639889665, "epoch": 0.6623906705539359, "grad_norm": 0.12516234815120697, "learning_rate": 7.5e-07, "loss": -0.0196, "step": 71 }, { "clip_ratio/high_max": 0.0021835834595549386, "clip_ratio/high_mean": 0.0009890626515698386, "clip_ratio/low_mean": 0.0008387410780414939, "clip_ratio/low_min": 4.445860304258531e-05, "clip_ratio/region_mean": 0.0018278037896379828, "epoch": 0.6717201166180758, "grad_norm": 0.12776419520378113, "learning_rate": 7.5e-07, "loss": 0.0109, "step": 72 }, { "clip_ratio/high_max": 0.0023379055928671733, "clip_ratio/high_mean": 0.001034852222801419, "clip_ratio/low_mean": 0.0009142067210632376, "clip_ratio/low_min": 0.00012901226546091493, "clip_ratio/region_mean": 0.0019490589038468897, "epoch": 0.6810495626822157, "grad_norm": 0.12549160420894623, "learning_rate": 7.5e-07, "loss": 0.0168, "step": 73 }, { "clip_ratio/high_max": 0.0024560683086747304, "clip_ratio/high_mean": 0.0010135426364286104, "clip_ratio/low_mean": 0.0008368097951461095, "clip_ratio/low_min": 8.20748809928773e-05, "clip_ratio/region_mean": 0.0018503523751860484, "epoch": 0.6903790087463557, "grad_norm": 0.12960796058177948, "learning_rate": 7.5e-07, "loss": -0.025, "step": 74 }, { "clip_ratio/high_max": 0.0023467284700018354, "clip_ratio/high_mean": 0.0010466763378644828, "clip_ratio/low_mean": 0.0009183542597384076, "clip_ratio/low_min": 0.0001633593556107371, "clip_ratio/region_mean": 0.0019650305912364274, "epoch": 0.6997084548104956, "grad_norm": 0.13185515999794006, "learning_rate": 7.5e-07, "loss": -0.0055, "step": 75 }, { "clip_ratio/high_max": 0.0021428786967589986, "clip_ratio/high_mean": 0.0009041770863404963, "clip_ratio/low_mean": 0.0008837801706249593, "clip_ratio/low_min": 0.00012321544909354998, "clip_ratio/region_mean": 0.001787957276974339, "epoch": 0.7090379008746356, "grad_norm": 0.11031434684991837, "learning_rate": 7.5e-07, "loss": 0.0071, "step": 76 }, { "clip_ratio/high_max": 0.0019293301957077347, "clip_ratio/high_mean": 0.0008862379290803801, "clip_ratio/low_mean": 0.0010444114122947212, "clip_ratio/low_min": 0.00013423870586848352, "clip_ratio/region_mean": 0.0019306493413751014, "epoch": 0.7183673469387755, "grad_norm": 0.11370403319597244, "learning_rate": 7.5e-07, "loss": 0.0213, "step": 77 }, { "clip_ratio/high_max": 0.002237447686638916, "clip_ratio/high_mean": 0.0010017671120294835, "clip_ratio/low_mean": 0.0009586755204509245, "clip_ratio/low_min": 0.00012437514214980183, "clip_ratio/region_mean": 0.0019604426197474822, "epoch": 0.7276967930029155, "grad_norm": 0.12025897204875946, "learning_rate": 7.5e-07, "loss": -0.0128, "step": 78 }, { "clip_ratio/high_max": 0.0020930780010530725, "clip_ratio/high_mean": 0.0009441870315640699, "clip_ratio/low_mean": 0.0009030957262439188, "clip_ratio/low_min": 5.362000683817314e-05, "clip_ratio/region_mean": 0.0018472827578079887, "epoch": 0.7370262390670554, "grad_norm": 0.1161620020866394, "learning_rate": 7.5e-07, "loss": -0.0215, "step": 79 }, { "clip_ratio/high_max": 0.001905592012917623, "clip_ratio/high_mean": 0.0008682740481162909, "clip_ratio/low_mean": 0.00085708239566884, "clip_ratio/low_min": 9.291635342378868e-05, "clip_ratio/region_mean": 0.0017253564146813005, "epoch": 0.7463556851311953, "grad_norm": 0.1159445270895958, "learning_rate": 7.5e-07, "loss": 0.0004, "step": 80 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.013950892857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4074.0, "completions/mean_length": 618.1241455078125, "completions/mean_terminated_length": 568.9181518554688, "completions/min_length": 7.0, "completions/min_terminated_length": 7.0, "epoch": 0.7556851311953353, "grad_norm": 0.1272365003824234, "learning_rate": 7.5e-07, "loss": 0.0205, "num_tokens": 56012312.0, "reward": 0.5681501626968384, "reward_std": 0.20913077890872955, "rewards/simpleverify_reward/mean": 0.5681501030921936, "rewards/simpleverify_reward/std": 0.49535107612609863, "step": 81 }, { "clip_ratio/high_max": 0.0017669835360720754, "clip_ratio/high_mean": 0.0007644509969395585, "clip_ratio/low_mean": 0.0005627893642667914, "clip_ratio/low_min": 1.5031265320430975e-05, "clip_ratio/region_mean": 0.001327240370301297, "epoch": 0.7650145772594752, "grad_norm": 0.12344862520694733, "learning_rate": 7.5e-07, "loss": 0.0521, "step": 82 }, { "clip_ratio/high_max": 0.001814390197978355, "clip_ratio/high_mean": 0.0007479656769646681, "clip_ratio/low_mean": 0.0006519025464513106, "clip_ratio/low_min": 2.763602606137283e-05, "clip_ratio/region_mean": 0.0013998682261444628, "epoch": 0.7743440233236152, "grad_norm": 0.11796759814023972, "learning_rate": 7.5e-07, "loss": 0.0405, "step": 83 }, { "clip_ratio/high_max": 0.0020210069124004804, "clip_ratio/high_mean": 0.0008626311519037699, "clip_ratio/low_mean": 0.0005278277876641368, "clip_ratio/low_min": 4.015978538518539e-05, "clip_ratio/region_mean": 0.0013904589759476949, "epoch": 0.7836734693877551, "grad_norm": 0.12199980765581131, "learning_rate": 7.5e-07, "loss": 0.0145, "step": 84 }, { "clip_ratio/high_max": 0.0017789663907024078, "clip_ratio/high_mean": 0.0007441508114425233, "clip_ratio/low_mean": 0.0006399189551302698, "clip_ratio/low_min": 3.4103873986168765e-05, "clip_ratio/region_mean": 0.0013840697647538036, "epoch": 0.793002915451895, "grad_norm": 0.12758439779281616, "learning_rate": 7.5e-07, "loss": 0.0211, "step": 85 }, { "clip_ratio/high_max": 0.002079372279695235, "clip_ratio/high_mean": 0.0008589842182118446, "clip_ratio/low_mean": 0.0006575595580216032, "clip_ratio/low_min": 1.85570079338504e-05, "clip_ratio/region_mean": 0.0015165437580435537, "epoch": 0.8023323615160349, "grad_norm": 0.12577228248119354, "learning_rate": 7.5e-07, "loss": 0.0333, "step": 86 }, { "clip_ratio/high_max": 0.0023031626842566766, "clip_ratio/high_mean": 0.0009260037968488177, "clip_ratio/low_mean": 0.0006527350706164725, "clip_ratio/low_min": 6.757568735338282e-05, "clip_ratio/region_mean": 0.0015787388329044916, "epoch": 0.8116618075801749, "grad_norm": 0.13118503987789154, "learning_rate": 7.5e-07, "loss": 0.0197, "step": 87 }, { "clip_ratio/high_max": 0.002223630712251179, "clip_ratio/high_mean": 0.0008451027442788472, "clip_ratio/low_mean": 0.0008140555983118247, "clip_ratio/low_min": 8.488829462294234e-05, "clip_ratio/region_mean": 0.0016591583080298733, "epoch": 0.8209912536443149, "grad_norm": 0.13057321310043335, "learning_rate": 7.5e-07, "loss": 0.0153, "step": 88 }, { "clip_ratio/high_max": 0.0024345878264284693, "clip_ratio/high_mean": 0.0010499587042431813, "clip_ratio/low_mean": 0.0007003118043940049, "clip_ratio/low_min": 4.143188562011346e-05, "clip_ratio/region_mean": 0.001750270530465059, "epoch": 0.8303206997084548, "grad_norm": 0.12716466188430786, "learning_rate": 7.5e-07, "loss": -0.0303, "step": 89 }, { "clip_ratio/high_max": 0.0023987409949768335, "clip_ratio/high_mean": 0.0008666641097079264, "clip_ratio/low_mean": 0.0007330823764277739, "clip_ratio/low_min": 8.073590197454905e-05, "clip_ratio/region_mean": 0.0015997464797692373, "epoch": 0.8396501457725948, "grad_norm": 0.11441989988088608, "learning_rate": 7.5e-07, "loss": 0.0025, "step": 90 }, { "clip_ratio/high_max": 0.002107116008119192, "clip_ratio/high_mean": 0.0008217019294534111, "clip_ratio/low_mean": 0.0008972172599897021, "clip_ratio/low_min": 1.5919511497486383e-05, "clip_ratio/region_mean": 0.001718919214908965, "epoch": 0.8489795918367347, "grad_norm": 0.10743547230958939, "learning_rate": 7.5e-07, "loss": 0.0388, "step": 91 }, { "clip_ratio/high_max": 0.002067408917355351, "clip_ratio/high_mean": 0.0009250521070498507, "clip_ratio/low_mean": 0.0007779064835631289, "clip_ratio/low_min": 2.861721623048652e-05, "clip_ratio/region_mean": 0.001702958601526916, "epoch": 0.8583090379008746, "grad_norm": 0.12482812255620956, "learning_rate": 7.5e-07, "loss": -0.0061, "step": 92 }, { "clip_ratio/high_max": 0.0022695706138620153, "clip_ratio/high_mean": 0.0009532248150208034, "clip_ratio/low_mean": 0.0008908949330361793, "clip_ratio/low_min": 4.355854161985917e-05, "clip_ratio/region_mean": 0.0018441197025822476, "epoch": 0.8676384839650145, "grad_norm": 0.11792949587106705, "learning_rate": 7.5e-07, "loss": -0.0018, "step": 93 }, { "clip_ratio/high_max": 0.0024329711523023434, "clip_ratio/high_mean": 0.0009809769544517621, "clip_ratio/low_mean": 0.0008491279158988618, "clip_ratio/low_min": 9.866932850854937e-05, "clip_ratio/region_mean": 0.001830104854889214, "epoch": 0.8769679300291545, "grad_norm": 0.10169944912195206, "learning_rate": 7.5e-07, "loss": -0.0339, "step": 94 }, { "clip_ratio/high_max": 0.002163531524274731, "clip_ratio/high_mean": 0.0009895138446154306, "clip_ratio/low_mean": 0.0009904103571898304, "clip_ratio/low_min": 2.4361723262700252e-05, "clip_ratio/region_mean": 0.001979924229090102, "epoch": 0.8862973760932945, "grad_norm": 0.12128250300884247, "learning_rate": 7.5e-07, "loss": -0.0236, "step": 95 }, { "clip_ratio/high_max": 0.0019322699554322753, "clip_ratio/high_mean": 0.0009400359485880472, "clip_ratio/low_mean": 0.0008860779489623383, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018261139121023007, "epoch": 0.8956268221574344, "grad_norm": 0.11831124871969223, "learning_rate": 7.5e-07, "loss": 0.0081, "step": 96 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015345982142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4092.0, "completions/mean_length": 616.0974731445312, "completions/mean_terminated_length": 561.8626098632812, "completions/min_length": 11.0, "completions/min_terminated_length": 11.0, "epoch": 1.00932944606414, "grad_norm": 0.12217328697443008, "learning_rate": 7.5e-07, "loss": 0.0592, "num_tokens": 65395813.0, "reward": 0.5593611001968384, "reward_std": 0.21416760981082916, "rewards/simpleverify_reward/mean": 0.5593610405921936, "rewards/simpleverify_reward/std": 0.49648112058639526, "step": 97 }, { "clip_ratio/high_max": 0.0022005592109053396, "clip_ratio/high_mean": 0.0009085512465389911, "clip_ratio/low_mean": 0.0005805279761261772, "clip_ratio/low_min": 1.2271745617908891e-05, "clip_ratio/region_mean": 0.0014890792044752743, "epoch": 1.01865889212828, "grad_norm": 0.12586010992527008, "learning_rate": 7.5e-07, "loss": 0.02, "step": 98 }, { "clip_ratio/high_max": 0.0020927977602696046, "clip_ratio/high_mean": 0.0009819447877816856, "clip_ratio/low_mean": 0.000555441903998144, "clip_ratio/low_min": 1.8865077436203137e-05, "clip_ratio/region_mean": 0.0015373867063317448, "epoch": 1.0279883381924197, "grad_norm": 0.1348189264535904, "learning_rate": 7.5e-07, "loss": -0.0161, "step": 99 }, { "clip_ratio/high_max": 0.0020296824295655824, "clip_ratio/high_mean": 0.0008851591010170523, "clip_ratio/low_mean": 0.0006073368913348531, "clip_ratio/low_min": 4.112943042855477e-05, "clip_ratio/region_mean": 0.001492495990532916, "epoch": 1.0373177842565597, "grad_norm": 0.1277037113904953, "learning_rate": 7.5e-07, "loss": 0.0078, "step": 100 }, { "clip_ratio/high_max": 0.00202625125166378, "clip_ratio/high_mean": 0.0009114883578149602, "clip_ratio/low_mean": 0.0006282588165049674, "clip_ratio/low_min": 4.6373824261536356e-05, "clip_ratio/region_mean": 0.0015397471688629594, "epoch": 1.0466472303206997, "grad_norm": 0.12374728918075562, "learning_rate": 7.5e-07, "loss": 0.0122, "step": 101 }, { "clip_ratio/high_max": 0.002026980626396835, "clip_ratio/high_mean": 0.0008327449304488255, "clip_ratio/low_mean": 0.0006645517387369182, "clip_ratio/low_min": 1.6951451470959e-05, "clip_ratio/region_mean": 0.0014972966309869662, "epoch": 1.0559766763848397, "grad_norm": 0.12344570457935333, "learning_rate": 7.5e-07, "loss": 0.0137, "step": 102 }, { "clip_ratio/high_max": 0.002202790099545382, "clip_ratio/high_mean": 0.0009338469099020585, "clip_ratio/low_mean": 0.0006649131373706041, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001598760049091652, "epoch": 1.0653061224489795, "grad_norm": 0.13729514181613922, "learning_rate": 7.5e-07, "loss": -0.0513, "step": 103 }, { "clip_ratio/high_max": 0.0019263881222286727, "clip_ratio/high_mean": 0.0008496636082782061, "clip_ratio/low_mean": 0.0007613669622514863, "clip_ratio/low_min": 9.548093566991156e-05, "clip_ratio/region_mean": 0.0016110305732581764, "epoch": 1.0746355685131195, "grad_norm": 0.12240830808877945, "learning_rate": 7.5e-07, "loss": 0.0326, "step": 104 }, { "clip_ratio/high_max": 0.0018776849210553337, "clip_ratio/high_mean": 0.0008927987473725807, "clip_ratio/low_mean": 0.000769267266150564, "clip_ratio/low_min": 5.114046871312894e-05, "clip_ratio/region_mean": 0.0016620659807813354, "epoch": 1.0839650145772595, "grad_norm": 0.11665531992912292, "learning_rate": 7.5e-07, "loss": 0.0203, "step": 105 }, { "clip_ratio/high_max": 0.002249160534120165, "clip_ratio/high_mean": 0.0010263770345773082, "clip_ratio/low_mean": 0.0007986998516571475, "clip_ratio/low_min": 0.00010088506860483903, "clip_ratio/region_mean": 0.0018250768407597207, "epoch": 1.0932944606413995, "grad_norm": 0.12229840457439423, "learning_rate": 7.5e-07, "loss": -0.0215, "step": 106 }, { "clip_ratio/high_max": 0.0020617719419533387, "clip_ratio/high_mean": 0.0009135072632489027, "clip_ratio/low_mean": 0.0008624817992313183, "clip_ratio/low_min": 0.0001113373273256002, "clip_ratio/region_mean": 0.0017759890724846628, "epoch": 1.1026239067055394, "grad_norm": 0.11623378843069077, "learning_rate": 7.5e-07, "loss": -0.0027, "step": 107 }, { "clip_ratio/high_max": 0.002272362540679751, "clip_ratio/high_mean": 0.0009281134443881456, "clip_ratio/low_mean": 0.0008399531925533665, "clip_ratio/low_min": 3.8887255868758075e-05, "clip_ratio/region_mean": 0.0017680666569503956, "epoch": 1.1119533527696792, "grad_norm": 0.11970115453004837, "learning_rate": 7.5e-07, "loss": -0.0079, "step": 108 }, { "clip_ratio/high_max": 0.0022323186421999708, "clip_ratio/high_mean": 0.0009865841420833021, "clip_ratio/low_mean": 0.0009615592953196028, "clip_ratio/low_min": 5.7510926126269624e-05, "clip_ratio/region_mean": 0.0019481434137560427, "epoch": 1.1212827988338192, "grad_norm": 0.11114727705717087, "learning_rate": 7.5e-07, "loss": -0.003, "step": 109 }, { "clip_ratio/high_max": 0.0021998622323735617, "clip_ratio/high_mean": 0.0009083453933271812, "clip_ratio/low_mean": 0.0008950643459684215, "clip_ratio/low_min": 0.00010267798370477976, "clip_ratio/region_mean": 0.0018034097665804438, "epoch": 1.1306122448979592, "grad_norm": 0.12159287184476852, "learning_rate": 7.5e-07, "loss": -0.0119, "step": 110 }, { "clip_ratio/high_max": 0.00216600776911946, "clip_ratio/high_mean": 0.0009142569251707755, "clip_ratio/low_mean": 0.0008858647834131261, "clip_ratio/low_min": 4.2351843148935586e-05, "clip_ratio/region_mean": 0.0018001216885750182, "epoch": 1.1399416909620992, "grad_norm": 0.12913736701011658, "learning_rate": 7.5e-07, "loss": -0.0194, "step": 111 }, { "clip_ratio/high_max": 0.0019988118001492694, "clip_ratio/high_mean": 0.0008152987757057417, "clip_ratio/low_mean": 0.0010842091360245831, "clip_ratio/low_min": 8.812111627776176e-05, "clip_ratio/region_mean": 0.0018995079444721341, "epoch": 1.149271137026239, "grad_norm": 0.1258009672164917, "learning_rate": 7.5e-07, "loss": 0.0876, "step": 112 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.01708984375, "completions/max_length": 4096.0, "completions/max_terminated_length": 4052.0, "completions/mean_length": 617.205078125, "completions/mean_terminated_length": 556.7192993164062, "completions/min_length": 20.0, "completions/min_terminated_length": 20.0, "epoch": 1.158600583090379, "grad_norm": 0.13391870260238647, "learning_rate": 7.5e-07, "loss": 0.018, "num_tokens": 74662353.0, "reward": 0.566964328289032, "reward_std": 0.20685595273971558, "rewards/simpleverify_reward/mean": 0.5669642686843872, "rewards/simpleverify_reward/std": 0.4955127239227295, "step": 113 }, { "clip_ratio/high_max": 0.0023972053750185296, "clip_ratio/high_mean": 0.0010991960552928504, "clip_ratio/low_mean": 0.0005199390716370544, "clip_ratio/low_min": 4.463488585315645e-05, "clip_ratio/region_mean": 0.0016191351496672723, "epoch": 1.167930029154519, "grad_norm": 0.11856097728013992, "learning_rate": 7.5e-07, "loss": -0.0268, "step": 114 }, { "clip_ratio/high_max": 0.0018703161113080569, "clip_ratio/high_mean": 0.0008103438231046312, "clip_ratio/low_mean": 0.0005887207535124617, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013990645420562942, "epoch": 1.177259475218659, "grad_norm": 0.12290913611650467, "learning_rate": 7.5e-07, "loss": 0.0306, "step": 115 }, { "clip_ratio/high_max": 0.0019549860080587678, "clip_ratio/high_mean": 0.0008291308486150228, "clip_ratio/low_mean": 0.0006384895896189846, "clip_ratio/low_min": 4.527337659965269e-05, "clip_ratio/region_mean": 0.001467620451876428, "epoch": 1.186588921282799, "grad_norm": 0.12089542299509048, "learning_rate": 7.5e-07, "loss": 0.0456, "step": 116 }, { "clip_ratio/high_max": 0.0019465915465843864, "clip_ratio/high_mean": 0.000805553752798005, "clip_ratio/low_mean": 0.000601567022386007, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014071207697270438, "epoch": 1.1959183673469387, "grad_norm": 0.1292574256658554, "learning_rate": 7.5e-07, "loss": -0.0174, "step": 117 }, { "clip_ratio/high_max": 0.0017876598467410076, "clip_ratio/high_mean": 0.0006879927814225084, "clip_ratio/low_mean": 0.0006083218140702229, "clip_ratio/low_min": 4.2484906771278474e-05, "clip_ratio/region_mean": 0.0012963145854882896, "epoch": 1.2052478134110787, "grad_norm": 0.11605941504240036, "learning_rate": 7.5e-07, "loss": 0.0451, "step": 118 }, { "clip_ratio/high_max": 0.0021194224536884576, "clip_ratio/high_mean": 0.0008986777647805866, "clip_ratio/low_mean": 0.0007619776206411188, "clip_ratio/low_min": 0.00010943616689473856, "clip_ratio/region_mean": 0.001660655365412822, "epoch": 1.2145772594752187, "grad_norm": 0.12844668328762054, "learning_rate": 7.5e-07, "loss": 0.0267, "step": 119 }, { "clip_ratio/high_max": 0.0022224541535251774, "clip_ratio/high_mean": 0.0010149552435905207, "clip_ratio/low_mean": 0.000616474089838448, "clip_ratio/low_min": 1.5996927686501294e-05, "clip_ratio/region_mean": 0.0016314293243340217, "epoch": 1.2239067055393587, "grad_norm": 0.1203896701335907, "learning_rate": 7.5e-07, "loss": -0.004, "step": 120 }, { "clip_ratio/high_max": 0.0023568551332573406, "clip_ratio/high_mean": 0.0009300439105572877, "clip_ratio/low_mean": 0.0006705530522594927, "clip_ratio/low_min": 1.299376344832126e-05, "clip_ratio/region_mean": 0.0016005969664547592, "epoch": 1.2332361516034984, "grad_norm": 0.12271235883235931, "learning_rate": 7.5e-07, "loss": -0.015, "step": 121 }, { "clip_ratio/high_max": 0.0020444852889340837, "clip_ratio/high_mean": 0.0009600585090083769, "clip_ratio/low_mean": 0.0007253164931171341, "clip_ratio/low_min": 3.0502684239763767e-05, "clip_ratio/region_mean": 0.0016853750057634898, "epoch": 1.2425655976676384, "grad_norm": 0.1287597417831421, "learning_rate": 7.5e-07, "loss": -0.0256, "step": 122 }, { "clip_ratio/high_max": 0.0022328028644551523, "clip_ratio/high_mean": 0.0008788074665062595, "clip_ratio/low_mean": 0.0007138810651667882, "clip_ratio/low_min": 9.918209707393544e-05, "clip_ratio/region_mean": 0.0015926884916552808, "epoch": 1.2518950437317784, "grad_norm": 0.12669065594673157, "learning_rate": 7.5e-07, "loss": 0.0022, "step": 123 }, { "clip_ratio/high_max": 0.0023358411344815977, "clip_ratio/high_mean": 0.000928411798668094, "clip_ratio/low_mean": 0.0007985833035490941, "clip_ratio/low_min": 4.9517007028043736e-05, "clip_ratio/region_mean": 0.0017269951131311245, "epoch": 1.2612244897959184, "grad_norm": 0.139856219291687, "learning_rate": 7.5e-07, "loss": -0.0242, "step": 124 }, { "clip_ratio/high_max": 0.0020956173248123378, "clip_ratio/high_mean": 0.0009441572983632796, "clip_ratio/low_mean": 0.0008031585603021085, "clip_ratio/low_min": 4.562866342894267e-05, "clip_ratio/region_mean": 0.0017473158368375152, "epoch": 1.2705539358600584, "grad_norm": 0.11814989149570465, "learning_rate": 7.5e-07, "loss": -0.0156, "step": 125 }, { "clip_ratio/high_max": 0.002463448436174076, "clip_ratio/high_mean": 0.0009347480336145964, "clip_ratio/low_mean": 0.0008168021495293942, "clip_ratio/low_min": 4.4379650717019103e-05, "clip_ratio/region_mean": 0.0017515501895104535, "epoch": 1.2798833819241984, "grad_norm": 0.12216034531593323, "learning_rate": 7.5e-07, "loss": -0.0011, "step": 126 }, { "clip_ratio/high_max": 0.0020190230607113335, "clip_ratio/high_mean": 0.0009114046897593653, "clip_ratio/low_mean": 0.0007733274742349749, "clip_ratio/low_min": 0.00011677988914016169, "clip_ratio/region_mean": 0.0016847321385284886, "epoch": 1.2892128279883381, "grad_norm": 0.12442462891340256, "learning_rate": 7.5e-07, "loss": 0.0043, "step": 127 }, { "clip_ratio/high_max": 0.0020697533909697086, "clip_ratio/high_mean": 0.0008416442105954047, "clip_ratio/low_mean": 0.000993318659311626, "clip_ratio/low_min": 8.537428948329762e-05, "clip_ratio/region_mean": 0.0018349628735450096, "epoch": 1.2985422740524781, "grad_norm": 0.1232854574918747, "learning_rate": 7.5e-07, "loss": 0.0361, "step": 128 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0160435267857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4088.0, "completions/mean_length": 623.5657958984375, "completions/mean_terminated_length": 566.9473266601562, "completions/min_length": 68.0, "completions/min_terminated_length": 68.0, "epoch": 1.3078717201166181, "grad_norm": 0.12647536396980286, "learning_rate": 7.5e-07, "loss": 0.0031, "num_tokens": 84107624.0, "reward": 0.5672433376312256, "reward_std": 0.19994957745075226, "rewards/simpleverify_reward/mean": 0.5672432780265808, "rewards/simpleverify_reward/std": 0.49547499418258667, "step": 129 }, { "clip_ratio/high_max": 0.0020175133540760726, "clip_ratio/high_mean": 0.0008100773902697256, "clip_ratio/low_mean": 0.000577012816847855, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013870902403141372, "epoch": 1.3172011661807579, "grad_norm": 0.12056925892829895, "learning_rate": 7.5e-07, "loss": 0.0085, "step": 130 }, { "clip_ratio/high_max": 0.0019477479690976907, "clip_ratio/high_mean": 0.0008166130464815069, "clip_ratio/low_mean": 0.0005637681651933235, "clip_ratio/low_min": 8.080927182163578e-05, "clip_ratio/region_mean": 0.0013803812362311874, "epoch": 1.3265306122448979, "grad_norm": 0.11883944272994995, "learning_rate": 7.5e-07, "loss": -0.0011, "step": 131 }, { "clip_ratio/high_max": 0.001989130243600812, "clip_ratio/high_mean": 0.0008246038032666547, "clip_ratio/low_mean": 0.0006250386250030715, "clip_ratio/low_min": 3.685855790536152e-05, "clip_ratio/region_mean": 0.0014496424264507368, "epoch": 1.3358600583090379, "grad_norm": 0.13418690860271454, "learning_rate": 7.5e-07, "loss": 0.0154, "step": 132 }, { "clip_ratio/high_max": 0.002092211572744418, "clip_ratio/high_mean": 0.0009038567368406802, "clip_ratio/low_mean": 0.0007229901511891512, "clip_ratio/low_min": 6.433190901589114e-05, "clip_ratio/region_mean": 0.0016268468752969056, "epoch": 1.3451895043731779, "grad_norm": 0.11540927737951279, "learning_rate": 7.5e-07, "loss": -0.0132, "step": 133 }, { "clip_ratio/high_max": 0.0017707024890114553, "clip_ratio/high_mean": 0.0008328885596711189, "clip_ratio/low_mean": 0.0006903056255396223, "clip_ratio/low_min": 1.4702422959089745e-05, "clip_ratio/region_mean": 0.0015231941943056881, "epoch": 1.3545189504373178, "grad_norm": 0.115590900182724, "learning_rate": 7.5e-07, "loss": 0.0353, "step": 134 }, { "clip_ratio/high_max": 0.002544262111769058, "clip_ratio/high_mean": 0.0010405802713648882, "clip_ratio/low_mean": 0.0006443331521950313, "clip_ratio/low_min": 5.714796770917019e-05, "clip_ratio/region_mean": 0.0016849134262884036, "epoch": 1.3638483965014578, "grad_norm": 0.11943043768405914, "learning_rate": 7.5e-07, "loss": -0.0205, "step": 135 }, { "clip_ratio/high_max": 0.0019668045388243627, "clip_ratio/high_mean": 0.0009092120690183947, "clip_ratio/low_mean": 0.0006966495557207963, "clip_ratio/low_min": 4.149826600041706e-05, "clip_ratio/region_mean": 0.0016058616165537387, "epoch": 1.3731778425655976, "grad_norm": 0.12349311262369156, "learning_rate": 7.5e-07, "loss": 0.0103, "step": 136 }, { "clip_ratio/high_max": 0.0021243313822196797, "clip_ratio/high_mean": 0.00083166493641329, "clip_ratio/low_mean": 0.0007570973757538013, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015887622939771973, "epoch": 1.3825072886297376, "grad_norm": 0.1201181709766388, "learning_rate": 7.5e-07, "loss": 0.0224, "step": 137 }, { "clip_ratio/high_max": 0.0020075863030797336, "clip_ratio/high_mean": 0.0009398261372552952, "clip_ratio/low_mean": 0.0008426688473264221, "clip_ratio/low_min": 5.167578910914017e-05, "clip_ratio/region_mean": 0.0017824949318310246, "epoch": 1.3918367346938776, "grad_norm": 0.119373619556427, "learning_rate": 7.5e-07, "loss": 0.0039, "step": 138 }, { "clip_ratio/high_max": 0.0022447511582868174, "clip_ratio/high_mean": 0.0008620258540759096, "clip_ratio/low_mean": 0.000835596802062355, "clip_ratio/low_min": 6.103937721491093e-05, "clip_ratio/region_mean": 0.00169762263976736, "epoch": 1.4011661807580174, "grad_norm": 0.12616772949695587, "learning_rate": 7.5e-07, "loss": 0.02, "step": 139 }, { "clip_ratio/high_max": 0.00233949747052975, "clip_ratio/high_mean": 0.0009486588260188, "clip_ratio/low_mean": 0.0007216421417979291, "clip_ratio/low_min": 4.095996337127872e-05, "clip_ratio/region_mean": 0.0016703009459888563, "epoch": 1.4104956268221573, "grad_norm": 0.12103062123060226, "learning_rate": 7.5e-07, "loss": 0.0013, "step": 140 }, { "clip_ratio/high_max": 0.002368454232055228, "clip_ratio/high_mean": 0.0010277663604938425, "clip_ratio/low_mean": 0.0007415259278786834, "clip_ratio/low_min": 7.506656947953161e-05, "clip_ratio/region_mean": 0.0017692922847345471, "epoch": 1.4198250728862973, "grad_norm": 0.12630027532577515, "learning_rate": 7.5e-07, "loss": -0.0332, "step": 141 }, { "clip_ratio/high_max": 0.0018229670968139544, "clip_ratio/high_mean": 0.0008043452180572785, "clip_ratio/low_mean": 0.0009829368136706762, "clip_ratio/low_min": 4.840017300011823e-05, "clip_ratio/region_mean": 0.0017872820171760395, "epoch": 1.4291545189504373, "grad_norm": 0.12614338099956512, "learning_rate": 7.5e-07, "loss": 0.047, "step": 142 }, { "clip_ratio/high_max": 0.001768222118698759, "clip_ratio/high_mean": 0.0007641743441126891, "clip_ratio/low_mean": 0.000805931847935426, "clip_ratio/low_min": 3.819815538008697e-05, "clip_ratio/region_mean": 0.0015701061784056947, "epoch": 1.4384839650145773, "grad_norm": 0.12201427668333054, "learning_rate": 7.5e-07, "loss": 0.0072, "step": 143 }, { "clip_ratio/high_max": 0.002185724093578756, "clip_ratio/high_mean": 0.0009586429405317176, "clip_ratio/low_mean": 0.0009757671614352148, "clip_ratio/low_min": 0.00010738170749391429, "clip_ratio/region_mean": 0.0019344101019669324, "epoch": 1.4478134110787173, "grad_norm": 0.12821567058563232, "learning_rate": 7.5e-07, "loss": 0.0128, "step": 144 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.015625, "completions/max_length": 4096.0, "completions/max_terminated_length": 4054.0, "completions/mean_length": 628.0765991210938, "completions/mean_terminated_length": 573.0302124023438, "completions/min_length": 11.0, "completions/min_terminated_length": 11.0, "epoch": 1.457142857142857, "grad_norm": 0.11956070363521576, "learning_rate": 7.5e-07, "loss": 0.0381, "num_tokens": 93628226.0, "reward": 0.5627092719078064, "reward_std": 0.19895125925540924, "rewards/simpleverify_reward/mean": 0.5627092719078064, "rewards/simpleverify_reward/std": 0.4960692524909973, "step": 145 }, { "clip_ratio/high_max": 0.0020696779174613766, "clip_ratio/high_mean": 0.0008162263129634084, "clip_ratio/low_mean": 0.0005664950112986844, "clip_ratio/low_min": 1.6622339899186045e-05, "clip_ratio/region_mean": 0.0013827213224431034, "epoch": 1.466472303206997, "grad_norm": 0.11350875347852707, "learning_rate": 7.5e-07, "loss": -0.0088, "step": 146 }, { "clip_ratio/high_max": 0.002012738106714096, "clip_ratio/high_mean": 0.0007683588773943484, "clip_ratio/low_mean": 0.000658743974781828, "clip_ratio/low_min": 6.307924195425585e-05, "clip_ratio/region_mean": 0.0014271028194343671, "epoch": 1.475801749271137, "grad_norm": 0.12523815035820007, "learning_rate": 7.5e-07, "loss": 0.052, "step": 147 }, { "clip_ratio/high_max": 0.002115386352670612, "clip_ratio/high_mean": 0.0009058340692718048, "clip_ratio/low_mean": 0.0006192535183799919, "clip_ratio/low_min": 1.0735142495832406e-05, "clip_ratio/region_mean": 0.0015250875949277543, "epoch": 1.485131195335277, "grad_norm": 0.12435288727283478, "learning_rate": 7.5e-07, "loss": -0.0182, "step": 148 }, { "clip_ratio/high_max": 0.0020622311931219883, "clip_ratio/high_mean": 0.0008732571350265061, "clip_ratio/low_mean": 0.0004832663216802757, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013565234548877925, "epoch": 1.4944606413994168, "grad_norm": 0.11706077307462692, "learning_rate": 7.5e-07, "loss": -0.0464, "step": 149 }, { "clip_ratio/high_max": 0.0019668005625135265, "clip_ratio/high_mean": 0.0007772586941428017, "clip_ratio/low_mean": 0.0005586462893916178, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013359050099097658, "epoch": 1.5037900874635568, "grad_norm": 0.11060047149658203, "learning_rate": 7.5e-07, "loss": 0.0092, "step": 150 }, { "clip_ratio/high_max": 0.0020249501612852328, "clip_ratio/high_mean": 0.0008333934474649141, "clip_ratio/low_mean": 0.0006169938305902178, "clip_ratio/low_min": 7.80336004027049e-05, "clip_ratio/region_mean": 0.0014503872662317008, "epoch": 1.5131195335276968, "grad_norm": 0.12657620012760162, "learning_rate": 7.5e-07, "loss": -0.0083, "step": 151 }, { "clip_ratio/high_max": 0.001580593660037266, "clip_ratio/high_mean": 0.0006107994568083086, "clip_ratio/low_mean": 0.0007512384527217364, "clip_ratio/low_min": 4.339900624472648e-05, "clip_ratio/region_mean": 0.0013620379140775185, "epoch": 1.5224489795918368, "grad_norm": 0.11510039120912552, "learning_rate": 7.5e-07, "loss": 0.0529, "step": 152 }, { "clip_ratio/high_max": 0.0019918264406442177, "clip_ratio/high_mean": 0.0009660637624619994, "clip_ratio/low_mean": 0.0005395755542849656, "clip_ratio/low_min": 1.4768431356060319e-05, "clip_ratio/region_mean": 0.0015056393021950498, "epoch": 1.5317784256559768, "grad_norm": 0.11706139892339706, "learning_rate": 7.5e-07, "loss": -0.0699, "step": 153 }, { "clip_ratio/high_max": 0.0021129134111106396, "clip_ratio/high_mean": 0.0008216795358748641, "clip_ratio/low_mean": 0.0007630921518284595, "clip_ratio/low_min": 0.00010577414559520548, "clip_ratio/region_mean": 0.0015847716967982706, "epoch": 1.5411078717201168, "grad_norm": 0.13050329685211182, "learning_rate": 7.5e-07, "loss": -0.0067, "step": 154 }, { "clip_ratio/high_max": 0.0017787644137570169, "clip_ratio/high_mean": 0.0008362176813534461, "clip_ratio/low_mean": 0.0006953346619411604, "clip_ratio/low_min": 5.726492872781819e-05, "clip_ratio/region_mean": 0.0015315523705794476, "epoch": 1.5504373177842565, "grad_norm": 0.11576955020427704, "learning_rate": 7.5e-07, "loss": 0.0332, "step": 155 }, { "clip_ratio/high_max": 0.002132718749635387, "clip_ratio/high_mean": 0.0008108006259135436, "clip_ratio/low_mean": 0.0007700865608057939, "clip_ratio/low_min": 6.15811859461246e-05, "clip_ratio/region_mean": 0.0015808871903573163, "epoch": 1.5597667638483965, "grad_norm": 0.12015880644321442, "learning_rate": 7.5e-07, "loss": 0.0176, "step": 156 }, { "clip_ratio/high_max": 0.0018037269910564646, "clip_ratio/high_mean": 0.0008251523831859231, "clip_ratio/low_mean": 0.0007426233278238215, "clip_ratio/low_min": 5.606597915175371e-05, "clip_ratio/region_mean": 0.0015677757437515538, "epoch": 1.5690962099125363, "grad_norm": 0.10694063454866409, "learning_rate": 7.5e-07, "loss": -0.0025, "step": 157 }, { "clip_ratio/high_max": 0.0021475529501913115, "clip_ratio/high_mean": 0.0009365361438540276, "clip_ratio/low_mean": 0.0007907998533482896, "clip_ratio/low_min": 0.00012127634909120388, "clip_ratio/region_mean": 0.0017273360135732219, "epoch": 1.5784256559766763, "grad_norm": 0.12961988151073456, "learning_rate": 7.5e-07, "loss": -0.0084, "step": 158 }, { "clip_ratio/high_max": 0.002050243674602825, "clip_ratio/high_mean": 0.0009834486700128764, "clip_ratio/low_mean": 0.0008994439231173601, "clip_ratio/low_min": 1.7538935935590416e-05, "clip_ratio/region_mean": 0.0018828926331480034, "epoch": 1.5877551020408163, "grad_norm": 0.12160208821296692, "learning_rate": 7.5e-07, "loss": 0.0045, "step": 159 }, { "clip_ratio/high_max": 0.002326614412595518, "clip_ratio/high_mean": 0.0008785558929957915, "clip_ratio/low_mean": 0.0010196602434007218, "clip_ratio/low_min": 0.00012989703463972546, "clip_ratio/region_mean": 0.0018982161418534815, "epoch": 1.5970845481049563, "grad_norm": 0.12828201055526733, "learning_rate": 7.5e-07, "loss": 0.029, "step": 160 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.018415178571428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 4075.0, "completions/mean_length": 628.1151123046875, "completions/mean_terminated_length": 563.0552978515625, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 1.6064139941690962, "grad_norm": 0.1461038738489151, "learning_rate": 7.5e-07, "loss": 0.0001, "num_tokens": 102997236.0, "reward": 0.57177734375, "reward_std": 0.2038561999797821, "rewards/simpleverify_reward/mean": 0.57177734375, "rewards/simpleverify_reward/std": 0.4948384463787079, "step": 161 }, { "clip_ratio/high_max": 0.0019364229374332353, "clip_ratio/high_mean": 0.0008477495466649998, "clip_ratio/low_mean": 0.000585282778047258, "clip_ratio/low_min": 2.6205866561213043e-05, "clip_ratio/region_mean": 0.0014330323465401307, "epoch": 1.6157434402332362, "grad_norm": 0.12198937684297562, "learning_rate": 7.5e-07, "loss": 0.005, "step": 162 }, { "clip_ratio/high_max": 0.0023268371078302152, "clip_ratio/high_mean": 0.0009887589621939696, "clip_ratio/low_mean": 0.0006057567934476538, "clip_ratio/low_min": 5.1088094551232643e-05, "clip_ratio/region_mean": 0.0015945157574606128, "epoch": 1.6250728862973762, "grad_norm": 0.12645390629768372, "learning_rate": 7.5e-07, "loss": 0.0159, "step": 163 }, { "clip_ratio/high_max": 0.002106963402184192, "clip_ratio/high_mean": 0.000859100464367657, "clip_ratio/low_mean": 0.0006152974819997326, "clip_ratio/low_min": 5.588607382378541e-05, "clip_ratio/region_mean": 0.001474397948186379, "epoch": 1.634402332361516, "grad_norm": 0.12707805633544922, "learning_rate": 7.5e-07, "loss": 0.0167, "step": 164 }, { "clip_ratio/high_max": 0.0021392330818343908, "clip_ratio/high_mean": 0.0009260790520784212, "clip_ratio/low_mean": 0.0006243256138986908, "clip_ratio/low_min": 2.6266164240951184e-05, "clip_ratio/region_mean": 0.0015504046859859955, "epoch": 1.643731778425656, "grad_norm": 0.1210993081331253, "learning_rate": 7.5e-07, "loss": -0.0477, "step": 165 }, { "clip_ratio/high_max": 0.0022558239070349373, "clip_ratio/high_mean": 0.0009459783559577772, "clip_ratio/low_mean": 0.0005183206631045323, "clip_ratio/low_min": 1.4737090168637224e-05, "clip_ratio/region_mean": 0.0014642990718130022, "epoch": 1.6530612244897958, "grad_norm": 0.12436870485544205, "learning_rate": 7.5e-07, "loss": -0.0101, "step": 166 }, { "clip_ratio/high_max": 0.002131937086232938, "clip_ratio/high_mean": 0.00090270359578426, "clip_ratio/low_mean": 0.0005750532218371518, "clip_ratio/low_min": 1.70068033185089e-05, "clip_ratio/region_mean": 0.001477756832173327, "epoch": 1.6623906705539357, "grad_norm": 0.1220717579126358, "learning_rate": 7.5e-07, "loss": 0.0077, "step": 167 }, { "clip_ratio/high_max": 0.0018288478277099784, "clip_ratio/high_mean": 0.0007573029752165894, "clip_ratio/low_mean": 0.0006933232507435605, "clip_ratio/low_min": 6.90693505021045e-05, "clip_ratio/region_mean": 0.001450626237783581, "epoch": 1.6717201166180757, "grad_norm": 0.12188774347305298, "learning_rate": 7.5e-07, "loss": 0.0062, "step": 168 }, { "clip_ratio/high_max": 0.002011058466450777, "clip_ratio/high_mean": 0.0008676960969751235, "clip_ratio/low_mean": 0.0007043758951112977, "clip_ratio/low_min": 9.587649037712254e-05, "clip_ratio/region_mean": 0.0015720719893579371, "epoch": 1.6810495626822157, "grad_norm": 0.11680170893669128, "learning_rate": 7.5e-07, "loss": -0.0034, "step": 169 }, { "clip_ratio/high_max": 0.002048047746939119, "clip_ratio/high_mean": 0.0008601393155913684, "clip_ratio/low_mean": 0.0007822606130503118, "clip_ratio/low_min": 5.0582852054503746e-05, "clip_ratio/region_mean": 0.0016423998858954292, "epoch": 1.6903790087463557, "grad_norm": 0.1212146058678627, "learning_rate": 7.5e-07, "loss": -0.005, "step": 170 }, { "clip_ratio/high_max": 0.001902346899441909, "clip_ratio/high_mean": 0.0007858897879486904, "clip_ratio/low_mean": 0.0007532682229793863, "clip_ratio/low_min": 2.7568636141950265e-05, "clip_ratio/region_mean": 0.0015391579800052568, "epoch": 1.6997084548104957, "grad_norm": 0.11088095605373383, "learning_rate": 7.5e-07, "loss": 0.0158, "step": 171 }, { "clip_ratio/high_max": 0.0020730474643642083, "clip_ratio/high_mean": 0.0008482657813146943, "clip_ratio/low_mean": 0.0008756616007303819, "clip_ratio/low_min": 5.759406303695869e-05, "clip_ratio/region_mean": 0.0017239273365703411, "epoch": 1.7090379008746357, "grad_norm": 0.11329641938209534, "learning_rate": 7.5e-07, "loss": 0.0163, "step": 172 }, { "clip_ratio/high_max": 0.002154852896637749, "clip_ratio/high_mean": 0.0008934329052863177, "clip_ratio/low_mean": 0.0007543512638221728, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001647784149099607, "epoch": 1.7183673469387755, "grad_norm": 0.11767783015966415, "learning_rate": 7.5e-07, "loss": -0.0371, "step": 173 }, { "clip_ratio/high_max": 0.001952467267983593, "clip_ratio/high_mean": 0.0008813445128907915, "clip_ratio/low_mean": 0.001021602136461297, "clip_ratio/low_min": 0.00015199457629933022, "clip_ratio/region_mean": 0.0019029466202482581, "epoch": 1.7276967930029155, "grad_norm": 0.12130790203809738, "learning_rate": 7.5e-07, "loss": 0.0432, "step": 174 }, { "clip_ratio/high_max": 0.0022461998159997165, "clip_ratio/high_mean": 0.0009345910584670492, "clip_ratio/low_mean": 0.0008050592750805663, "clip_ratio/low_min": 1.9154153051204048e-05, "clip_ratio/region_mean": 0.0017396503171767108, "epoch": 1.7370262390670554, "grad_norm": 0.1344863474369049, "learning_rate": 7.5e-07, "loss": 0.0166, "step": 175 }, { "clip_ratio/high_max": 0.0019671327027026564, "clip_ratio/high_mean": 0.0008171898953150958, "clip_ratio/low_mean": 0.000939270048547769, "clip_ratio/low_min": 1.345243254036177e-05, "clip_ratio/region_mean": 0.0017564599475008436, "epoch": 1.7463556851311952, "grad_norm": 0.1243777722120285, "learning_rate": 7.5e-07, "loss": 0.0162, "step": 176 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0215541294642857, "completions/max_length": 4096.0, "completions/max_terminated_length": 3948.0, "completions/mean_length": 651.179931640625, "completions/mean_terminated_length": 575.294189453125, "completions/min_length": 2.0, "completions/min_terminated_length": 2.0, "epoch": 1.7556851311953352, "grad_norm": 0.1333264708518982, "learning_rate": 7.5e-07, "loss": -0.0215, "num_tokens": 112479375.0, "reward": 0.565011203289032, "reward_std": 0.1998385637998581, "rewards/simpleverify_reward/mean": 0.5650111436843872, "rewards/simpleverify_reward/std": 0.4957728087902069, "step": 177 }, { "clip_ratio/high_max": 0.0019882938140654005, "clip_ratio/high_mean": 0.0008359416624443838, "clip_ratio/low_mean": 0.0005691788246622309, "clip_ratio/low_min": 5.041555323259672e-05, "clip_ratio/region_mean": 0.0014051205143914558, "epoch": 1.7650145772594752, "grad_norm": 0.11274171620607376, "learning_rate": 7.5e-07, "loss": -0.0104, "step": 178 }, { "clip_ratio/high_max": 0.002133985053660581, "clip_ratio/high_mean": 0.0009277416156692198, "clip_ratio/low_mean": 0.0006137396558187902, "clip_ratio/low_min": 6.817782013968099e-05, "clip_ratio/region_mean": 0.0015414812642120523, "epoch": 1.7743440233236152, "grad_norm": 0.22126473486423492, "learning_rate": 7.5e-07, "loss": -0.0188, "step": 179 }, { "clip_ratio/high_max": 0.00215448305971222, "clip_ratio/high_mean": 0.0008304002994918847, "clip_ratio/low_mean": 0.000668251524984953, "clip_ratio/low_min": 1.9740997231565416e-05, "clip_ratio/region_mean": 0.0014986518363002688, "epoch": 1.7836734693877552, "grad_norm": 0.12909036874771118, "learning_rate": 7.5e-07, "loss": 0.0016, "step": 180 }, { "clip_ratio/high_max": 0.001942301863891771, "clip_ratio/high_mean": 0.0008194026504497742, "clip_ratio/low_mean": 0.0006734293701811112, "clip_ratio/low_min": 4.9704212869983166e-05, "clip_ratio/region_mean": 0.0014928320451872423, "epoch": 1.7930029154518952, "grad_norm": 0.11999892443418503, "learning_rate": 7.5e-07, "loss": 0.0072, "step": 181 }, { "clip_ratio/high_max": 0.001964334776857868, "clip_ratio/high_mean": 0.000799381467004423, "clip_ratio/low_mean": 0.0005611672931991052, "clip_ratio/low_min": 2.7280662834527902e-05, "clip_ratio/region_mean": 0.0013605487474706024, "epoch": 1.802332361516035, "grad_norm": 0.12245429307222366, "learning_rate": 7.5e-07, "loss": -0.008, "step": 182 }, { "clip_ratio/high_max": 0.0018132077384507284, "clip_ratio/high_mean": 0.0008277606502815615, "clip_ratio/low_mean": 0.0006882901416247478, "clip_ratio/low_min": 4.0709320273890626e-05, "clip_ratio/region_mean": 0.0015160508046392351, "epoch": 1.811661807580175, "grad_norm": 0.13216952979564667, "learning_rate": 7.5e-07, "loss": -0.0032, "step": 183 }, { "clip_ratio/high_max": 0.002343828135053627, "clip_ratio/high_mean": 0.0008943943485064665, "clip_ratio/low_mean": 0.0007300854813365731, "clip_ratio/low_min": 0.0001079250741895521, "clip_ratio/region_mean": 0.0016244798171101138, "epoch": 1.820991253644315, "grad_norm": 0.11057619750499725, "learning_rate": 7.5e-07, "loss": 0.0093, "step": 184 }, { "clip_ratio/high_max": 0.0021799235364596825, "clip_ratio/high_mean": 0.0008645050784252817, "clip_ratio/low_mean": 0.0008480373981001321, "clip_ratio/low_min": 5.586313272942789e-05, "clip_ratio/region_mean": 0.0017125424892583396, "epoch": 1.8303206997084547, "grad_norm": 0.12997668981552124, "learning_rate": 7.5e-07, "loss": -0.0081, "step": 185 }, { "clip_ratio/high_max": 0.0019633299198176246, "clip_ratio/high_mean": 0.0009020439920277568, "clip_ratio/low_mean": 0.0008136718406603904, "clip_ratio/low_min": 7.51273037167266e-05, "clip_ratio/region_mean": 0.0017157158436020836, "epoch": 1.8396501457725947, "grad_norm": 0.12089799344539642, "learning_rate": 7.5e-07, "loss": -0.0326, "step": 186 }, { "clip_ratio/high_max": 0.0017270672433369327, "clip_ratio/high_mean": 0.0006630115713051055, "clip_ratio/low_mean": 0.0009420290389243746, "clip_ratio/low_min": 9.994933316193055e-05, "clip_ratio/region_mean": 0.0016050405756686814, "epoch": 1.8489795918367347, "grad_norm": 0.1254173219203949, "learning_rate": 7.5e-07, "loss": 0.0544, "step": 187 }, { "clip_ratio/high_max": 0.001967299649550114, "clip_ratio/high_mean": 0.0008779503314144677, "clip_ratio/low_mean": 0.0008513186585332733, "clip_ratio/low_min": 9.557758494338486e-05, "clip_ratio/region_mean": 0.0017292689735768363, "epoch": 1.8583090379008746, "grad_norm": 0.1279982626438141, "learning_rate": 7.5e-07, "loss": 0.0215, "step": 188 }, { "clip_ratio/high_max": 0.0022823183389846236, "clip_ratio/high_mean": 0.0010174388971790904, "clip_ratio/low_mean": 0.0009471376433793921, "clip_ratio/low_min": 7.655057561350986e-05, "clip_ratio/region_mean": 0.0019645765351015143, "epoch": 1.8676384839650146, "grad_norm": 0.13214661180973053, "learning_rate": 7.5e-07, "loss": -0.0125, "step": 189 }, { "clip_ratio/high_max": 0.002188999467762187, "clip_ratio/high_mean": 0.0009373639422847191, "clip_ratio/low_mean": 0.00083519718828029, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001772561139659956, "epoch": 1.8769679300291546, "grad_norm": 0.12578491866588593, "learning_rate": 7.5e-07, "loss": -0.0139, "step": 190 }, { "clip_ratio/high_max": 0.0023513930282206275, "clip_ratio/high_mean": 0.0008515197932865703, "clip_ratio/low_mean": 0.0008458050724584609, "clip_ratio/low_min": 0.00010057049257738981, "clip_ratio/region_mean": 0.0016973248639260419, "epoch": 1.8862973760932946, "grad_norm": 0.12703430652618408, "learning_rate": 7.5e-07, "loss": 0.0235, "step": 191 }, { "clip_ratio/high_max": 0.0022801828308729455, "clip_ratio/high_mean": 0.0009359572432003915, "clip_ratio/low_mean": 0.0009916927228914574, "clip_ratio/low_min": 4.1026651160791516e-05, "clip_ratio/region_mean": 0.0019276499588158913, "epoch": 1.8956268221574344, "grad_norm": 0.13357076048851013, "learning_rate": 7.5e-07, "loss": 0.0174, "step": 192 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0199497767857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4007.0, "completions/mean_length": 640.5494384765625, "completions/mean_terminated_length": 570.210693359375, "completions/min_length": 89.0, "completions/min_terminated_length": 89.0, "epoch": 2.00932944606414, "grad_norm": 0.1314922422170639, "learning_rate": 7.5e-07, "loss": 0.0003, "num_tokens": 121944219.0, "reward": 0.586495578289032, "reward_std": 0.19470664858818054, "rewards/simpleverify_reward/mean": 0.5864955186843872, "rewards/simpleverify_reward/std": 0.4924788773059845, "step": 193 }, { "clip_ratio/high_max": 0.0018377406267973129, "clip_ratio/high_mean": 0.0006774483144909027, "clip_ratio/low_mean": 0.0006425548235711176, "clip_ratio/low_min": 5.3050852329761256e-05, "clip_ratio/region_mean": 0.0013200031135056634, "epoch": 2.01865889212828, "grad_norm": 0.12263994663953781, "learning_rate": 7.5e-07, "loss": 0.0343, "step": 194 }, { "clip_ratio/high_max": 0.001759062706696568, "clip_ratio/high_mean": 0.0007945804463815875, "clip_ratio/low_mean": 0.0006706701315124519, "clip_ratio/low_min": 3.578792438929668e-05, "clip_ratio/region_mean": 0.001465250566980103, "epoch": 2.02798833819242, "grad_norm": 0.12617705762386322, "learning_rate": 7.5e-07, "loss": 0.0304, "step": 195 }, { "clip_ratio/high_max": 0.0020256293573766015, "clip_ratio/high_mean": 0.000880103410963784, "clip_ratio/low_mean": 0.0006622363252972718, "clip_ratio/low_min": 1.2603347386175301e-05, "clip_ratio/region_mean": 0.0015423397089762148, "epoch": 2.03731778425656, "grad_norm": 0.12555097043514252, "learning_rate": 7.5e-07, "loss": -0.0504, "step": 196 }, { "clip_ratio/high_max": 0.0018143241031793877, "clip_ratio/high_mean": 0.0007330297121370677, "clip_ratio/low_mean": 0.00062940414318291, "clip_ratio/low_min": 7.931327854748815e-05, "clip_ratio/region_mean": 0.0013624338535009883, "epoch": 2.0466472303206995, "grad_norm": 0.11481919884681702, "learning_rate": 7.5e-07, "loss": -0.0197, "step": 197 }, { "clip_ratio/high_max": 0.0020153793884674087, "clip_ratio/high_mean": 0.0008345375226781471, "clip_ratio/low_mean": 0.0007404434927593684, "clip_ratio/low_min": 5.33741113031283e-05, "clip_ratio/region_mean": 0.0015749809754197486, "epoch": 2.0559766763848395, "grad_norm": 0.12324080616235733, "learning_rate": 7.5e-07, "loss": 0.0063, "step": 198 }, { "clip_ratio/high_max": 0.0020532530616037548, "clip_ratio/high_mean": 0.0007931781128718285, "clip_ratio/low_mean": 0.0007179542399171623, "clip_ratio/low_min": 5.849762146681314e-05, "clip_ratio/region_mean": 0.0015111323446035385, "epoch": 2.0653061224489795, "grad_norm": 0.13921619951725006, "learning_rate": 7.5e-07, "loss": 0.0002, "step": 199 }, { "clip_ratio/high_max": 0.0020630551953217946, "clip_ratio/high_mean": 0.0008039867443585536, "clip_ratio/low_mean": 0.0007892990979598835, "clip_ratio/low_min": 2.059986763924826e-05, "clip_ratio/region_mean": 0.001593285873241257, "epoch": 2.0746355685131195, "grad_norm": 0.12063732743263245, "learning_rate": 7.5e-07, "loss": 0.0267, "step": 200 }, { "clip_ratio/high_max": 0.0020537498203339055, "clip_ratio/high_mean": 0.0008478390300297178, "clip_ratio/low_mean": 0.0006805799002904678, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015284189430531114, "epoch": 2.0839650145772595, "grad_norm": 0.12411027401685715, "learning_rate": 7.5e-07, "loss": 0.0005, "step": 201 }, { "clip_ratio/high_max": 0.0023566358431708068, "clip_ratio/high_mean": 0.0009166151176032145, "clip_ratio/low_mean": 0.0007280093341250904, "clip_ratio/low_min": 1.4060742614674382e-05, "clip_ratio/region_mean": 0.0016446244007966015, "epoch": 2.0932944606413995, "grad_norm": 0.13142608106136322, "learning_rate": 7.5e-07, "loss": 0.0077, "step": 202 }, { "clip_ratio/high_max": 0.002268526302941609, "clip_ratio/high_mean": 0.0008572206916142022, "clip_ratio/low_mean": 0.0007643463932254235, "clip_ratio/low_min": 6.821085753472289e-05, "clip_ratio/region_mean": 0.0016215670329984277, "epoch": 2.1026239067055394, "grad_norm": 0.158853217959404, "learning_rate": 7.5e-07, "loss": 0.0, "step": 203 }, { "clip_ratio/high_max": 0.0018556572467787191, "clip_ratio/high_mean": 0.0007847202505217865, "clip_ratio/low_mean": 0.0009148639383056434, "clip_ratio/low_min": 7.421822920150589e-05, "clip_ratio/region_mean": 0.0016995841724565253, "epoch": 2.1119533527696794, "grad_norm": 0.13422055542469025, "learning_rate": 7.5e-07, "loss": 0.0274, "step": 204 }, { "clip_ratio/high_max": 0.0018616503875819035, "clip_ratio/high_mean": 0.0008128523331834003, "clip_ratio/low_mean": 0.0009095620334846899, "clip_ratio/low_min": 0.00013542441138270078, "clip_ratio/region_mean": 0.001722414352116175, "epoch": 2.1212827988338194, "grad_norm": 0.12593169510364532, "learning_rate": 7.5e-07, "loss": 0.0246, "step": 205 }, { "clip_ratio/high_max": 0.0020803686347790062, "clip_ratio/high_mean": 0.000825093866296811, "clip_ratio/low_mean": 0.000902860903806868, "clip_ratio/low_min": 5.065855020802701e-05, "clip_ratio/region_mean": 0.0017279547537327744, "epoch": 2.130612244897959, "grad_norm": 0.11731145530939102, "learning_rate": 7.5e-07, "loss": 0.0037, "step": 206 }, { "clip_ratio/high_max": 0.0025992376467911527, "clip_ratio/high_mean": 0.0010499677118787076, "clip_ratio/low_mean": 0.00096824662432482, "clip_ratio/low_min": 8.079164126684191e-05, "clip_ratio/region_mean": 0.002018214319832623, "epoch": 2.139941690962099, "grad_norm": 0.13808046281337738, "learning_rate": 7.5e-07, "loss": -0.0085, "step": 207 }, { "clip_ratio/high_max": 0.002114731149049476, "clip_ratio/high_mean": 0.0008776321956247557, "clip_ratio/low_mean": 0.0009146725242317189, "clip_ratio/low_min": 4.341908970673103e-05, "clip_ratio/region_mean": 0.001792304712580517, "epoch": 2.149271137026239, "grad_norm": 0.1236065998673439, "learning_rate": 7.5e-07, "loss": -0.0039, "step": 208 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.019740513392857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4054.0, "completions/mean_length": 627.6524658203125, "completions/mean_terminated_length": 557.8067016601562, "completions/min_length": 109.0, "completions/min_terminated_length": 109.0, "epoch": 2.158600583090379, "grad_norm": 0.12620647251605988, "learning_rate": 7.5e-07, "loss": -0.0224, "num_tokens": 131177340.0, "reward": 0.5912388563156128, "reward_std": 0.18805521726608276, "rewards/simpleverify_reward/mean": 0.5912388563156128, "rewards/simpleverify_reward/std": 0.4916221499443054, "step": 209 }, { "clip_ratio/high_max": 0.002027037611696869, "clip_ratio/high_mean": 0.0007200882409961196, "clip_ratio/low_mean": 0.0006473174780694535, "clip_ratio/low_min": 5.003134265280096e-05, "clip_ratio/region_mean": 0.0013674057117896155, "epoch": 2.167930029154519, "grad_norm": 0.12368282675743103, "learning_rate": 7.5e-07, "loss": 0.0484, "step": 210 }, { "clip_ratio/high_max": 0.002013333680224605, "clip_ratio/high_mean": 0.0009153744267678121, "clip_ratio/low_mean": 0.0005333955004971358, "clip_ratio/low_min": 2.7662524189508986e-05, "clip_ratio/region_mean": 0.0014487699154415168, "epoch": 2.177259475218659, "grad_norm": 0.12037297338247299, "learning_rate": 7.5e-07, "loss": -0.0298, "step": 211 }, { "clip_ratio/high_max": 0.0018059688991343137, "clip_ratio/high_mean": 0.0007411850983771728, "clip_ratio/low_mean": 0.0005821628528792644, "clip_ratio/low_min": 4.085491855221335e-05, "clip_ratio/region_mean": 0.0013233479694463313, "epoch": 2.186588921282799, "grad_norm": 0.11723510921001434, "learning_rate": 7.5e-07, "loss": 0.038, "step": 212 }, { "clip_ratio/high_max": 0.0019425257996772416, "clip_ratio/high_mean": 0.0006827126708230935, "clip_ratio/low_mean": 0.0005920118765061488, "clip_ratio/low_min": 5.4432945034932345e-05, "clip_ratio/region_mean": 0.001274724545510253, "epoch": 2.195918367346939, "grad_norm": 0.11747456341981888, "learning_rate": 7.5e-07, "loss": 0.0414, "step": 213 }, { "clip_ratio/high_max": 0.0023103731218725443, "clip_ratio/high_mean": 0.001005162310320884, "clip_ratio/low_mean": 0.0007176672042987775, "clip_ratio/low_min": 5.723926551581826e-05, "clip_ratio/region_mean": 0.0017228295546374284, "epoch": 2.205247813411079, "grad_norm": 0.14162896573543549, "learning_rate": 7.5e-07, "loss": -0.0135, "step": 214 }, { "clip_ratio/high_max": 0.002260229808598524, "clip_ratio/high_mean": 0.0008547287088731537, "clip_ratio/low_mean": 0.0005839732866661507, "clip_ratio/low_min": 3.43689544024528e-05, "clip_ratio/region_mean": 0.001438702005543746, "epoch": 2.2145772594752184, "grad_norm": 0.12335532903671265, "learning_rate": 7.5e-07, "loss": -0.0249, "step": 215 }, { "clip_ratio/high_max": 0.0022079460904933512, "clip_ratio/high_mean": 0.0009355513684567995, "clip_ratio/low_mean": 0.0006252257890082547, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001560777163831517, "epoch": 2.2239067055393584, "grad_norm": 0.12777957320213318, "learning_rate": 7.5e-07, "loss": -0.0649, "step": 216 }, { "clip_ratio/high_max": 0.0018569779203971848, "clip_ratio/high_mean": 0.0007688300029258244, "clip_ratio/low_mean": 0.0008096062811091542, "clip_ratio/low_min": 2.8986440156586468e-05, "clip_ratio/region_mean": 0.001578436294948915, "epoch": 2.2332361516034984, "grad_norm": 0.15015757083892822, "learning_rate": 7.5e-07, "loss": 0.0414, "step": 217 }, { "clip_ratio/high_max": 0.002208548266935395, "clip_ratio/high_mean": 0.0008868657187122153, "clip_ratio/low_mean": 0.0008234906454163138, "clip_ratio/low_min": 0.00019200240058125928, "clip_ratio/region_mean": 0.0017103563877753913, "epoch": 2.2425655976676384, "grad_norm": 0.12338457256555557, "learning_rate": 7.5e-07, "loss": 0.0201, "step": 218 }, { "clip_ratio/high_max": 0.0019297143153380603, "clip_ratio/high_mean": 0.000783665385824861, "clip_ratio/low_mean": 0.0007262743993123877, "clip_ratio/low_min": 7.260212260007393e-05, "clip_ratio/region_mean": 0.0015099397787707858, "epoch": 2.2518950437317784, "grad_norm": 0.12405642867088318, "learning_rate": 7.5e-07, "loss": 0.0147, "step": 219 }, { "clip_ratio/high_max": 0.0023150925335357897, "clip_ratio/high_mean": 0.0008237237580033252, "clip_ratio/low_mean": 0.0007066646885505179, "clip_ratio/low_min": 1.950686601048801e-05, "clip_ratio/region_mean": 0.0015303884392778855, "epoch": 2.2612244897959184, "grad_norm": 0.13472528755664825, "learning_rate": 7.5e-07, "loss": 0.0166, "step": 220 }, { "clip_ratio/high_max": 0.00210048069129698, "clip_ratio/high_mean": 0.0008517735186615027, "clip_ratio/low_mean": 0.0009905362421704922, "clip_ratio/low_min": 8.160719426086871e-05, "clip_ratio/region_mean": 0.0018423098008497618, "epoch": 2.2705539358600584, "grad_norm": 0.13670377433300018, "learning_rate": 7.5e-07, "loss": 0.0229, "step": 221 }, { "clip_ratio/high_max": 0.0019279459083918482, "clip_ratio/high_mean": 0.0008518946524418425, "clip_ratio/low_mean": 0.0008508510236424627, "clip_ratio/low_min": 1.3516436411009636e-05, "clip_ratio/region_mean": 0.0017027456233336125, "epoch": 2.2798833819241984, "grad_norm": 0.11787401139736176, "learning_rate": 7.5e-07, "loss": 0.0171, "step": 222 }, { "clip_ratio/high_max": 0.0022810484988440294, "clip_ratio/high_mean": 0.0008559142515878193, "clip_ratio/low_mean": 0.0008369001334358472, "clip_ratio/low_min": 3.818482491624309e-05, "clip_ratio/region_mean": 0.0016928143886616454, "epoch": 2.2892128279883384, "grad_norm": 0.12318341434001923, "learning_rate": 7.5e-07, "loss": -0.009, "step": 223 }, { "clip_ratio/high_max": 0.0023342297245108057, "clip_ratio/high_mean": 0.0008961887060650042, "clip_ratio/low_mean": 0.0008466916779070743, "clip_ratio/low_min": 0.0001390950119457557, "clip_ratio/region_mean": 0.0017428803912480362, "epoch": 2.298542274052478, "grad_norm": 0.12740980088710785, "learning_rate": 7.5e-07, "loss": -0.0132, "step": 224 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.019810267857142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4088.0, "completions/mean_length": 631.1838989257812, "completions/mean_terminated_length": 561.15771484375, "completions/min_length": 4.0, "completions/min_terminated_length": 4.0, "epoch": 2.307871720116618, "grad_norm": 0.12912075221538544, "learning_rate": 7.5e-07, "loss": -0.0122, "num_tokens": 140487328.0, "reward": 0.5877511501312256, "reward_std": 0.19044680893421173, "rewards/simpleverify_reward/mean": 0.5877510905265808, "rewards/simpleverify_reward/std": 0.49225670099258423, "step": 225 }, { "clip_ratio/high_max": 0.00204030627355678, "clip_ratio/high_mean": 0.0008004286410141503, "clip_ratio/low_mean": 0.0006168998224893585, "clip_ratio/low_min": 3.173539244016865e-05, "clip_ratio/region_mean": 0.001417328450770583, "epoch": 2.317201166180758, "grad_norm": 0.1162475124001503, "learning_rate": 7.5e-07, "loss": 0.036, "step": 226 }, { "clip_ratio/high_max": 0.0019864681889885105, "clip_ratio/high_mean": 0.0008274942301795818, "clip_ratio/low_mean": 0.0006551736551045906, "clip_ratio/low_min": 5.539618268812774e-05, "clip_ratio/region_mean": 0.001482667874370236, "epoch": 2.326530612244898, "grad_norm": 0.12946182489395142, "learning_rate": 7.5e-07, "loss": 0.0093, "step": 227 }, { "clip_ratio/high_max": 0.0019917083554901183, "clip_ratio/high_mean": 0.0007815912122168811, "clip_ratio/low_mean": 0.000632036068054731, "clip_ratio/low_min": 2.2839733901491854e-05, "clip_ratio/region_mean": 0.001413627280271612, "epoch": 2.335860058309038, "grad_norm": 0.11481428891420364, "learning_rate": 7.5e-07, "loss": 0.0304, "step": 228 }, { "clip_ratio/high_max": 0.001898596492537763, "clip_ratio/high_mean": 0.0007737522028037347, "clip_ratio/low_mean": 0.000681710364005994, "clip_ratio/low_min": 6.412075981643284e-05, "clip_ratio/region_mean": 0.0014554625740856864, "epoch": 2.345189504373178, "grad_norm": 0.1250775158405304, "learning_rate": 7.5e-07, "loss": 0.0228, "step": 229 }, { "clip_ratio/high_max": 0.0019414332637097687, "clip_ratio/high_mean": 0.000835218716019881, "clip_ratio/low_mean": 0.0007121573107724544, "clip_ratio/low_min": 4.62144080302096e-05, "clip_ratio/region_mean": 0.0015473759995074943, "epoch": 2.354518950437318, "grad_norm": 0.12868787348270416, "learning_rate": 7.5e-07, "loss": -0.0017, "step": 230 }, { "clip_ratio/high_max": 0.0023048228686093353, "clip_ratio/high_mean": 0.0010081767923111329, "clip_ratio/low_mean": 0.0005476981123138103, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015558749146293849, "epoch": 2.363848396501458, "grad_norm": 0.14041060209274292, "learning_rate": 7.5e-07, "loss": -0.0437, "step": 231 }, { "clip_ratio/high_max": 0.001937418470333796, "clip_ratio/high_mean": 0.0007440968583978247, "clip_ratio/low_mean": 0.0006152016649139114, "clip_ratio/low_min": 5.0049809033225756e-05, "clip_ratio/region_mean": 0.0013592985160357784, "epoch": 2.373177842565598, "grad_norm": 0.12489339709281921, "learning_rate": 7.5e-07, "loss": -0.0192, "step": 232 }, { "clip_ratio/high_max": 0.0019469893886707723, "clip_ratio/high_mean": 0.0008214515328290872, "clip_ratio/low_mean": 0.000723297445802018, "clip_ratio/low_min": 5.496760786627419e-05, "clip_ratio/region_mean": 0.0015447489713551477, "epoch": 2.3825072886297374, "grad_norm": 0.12206066399812698, "learning_rate": 7.5e-07, "loss": -0.0139, "step": 233 }, { "clip_ratio/high_max": 0.0019719899300980614, "clip_ratio/high_mean": 0.0008424602037848672, "clip_ratio/low_mean": 0.000588444016102585, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014309042344393674, "epoch": 2.3918367346938774, "grad_norm": 0.11530240625143051, "learning_rate": 7.5e-07, "loss": -0.017, "step": 234 }, { "clip_ratio/high_max": 0.002099925637594424, "clip_ratio/high_mean": 0.0008500240983266849, "clip_ratio/low_mean": 0.0008377607009606436, "clip_ratio/low_min": 3.981080044468399e-05, "clip_ratio/region_mean": 0.0016877847447176464, "epoch": 2.4011661807580174, "grad_norm": 0.12984396517276764, "learning_rate": 7.5e-07, "loss": -0.0012, "step": 235 }, { "clip_ratio/high_max": 0.0018985993083333597, "clip_ratio/high_mean": 0.0008485088856104994, "clip_ratio/low_mean": 0.0007982556726346957, "clip_ratio/low_min": 9.00360300875036e-05, "clip_ratio/region_mean": 0.0016467645255033858, "epoch": 2.4104956268221573, "grad_norm": 0.11835947632789612, "learning_rate": 7.5e-07, "loss": 0.0334, "step": 236 }, { "clip_ratio/high_max": 0.001860998427218874, "clip_ratio/high_mean": 0.0007846977223380236, "clip_ratio/low_mean": 0.0009402604118804447, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001724958110571606, "epoch": 2.4198250728862973, "grad_norm": 0.1354372203350067, "learning_rate": 7.5e-07, "loss": 0.0062, "step": 237 }, { "clip_ratio/high_max": 0.002067051660560537, "clip_ratio/high_mean": 0.0007930176634545205, "clip_ratio/low_mean": 0.0008739486875128932, "clip_ratio/low_min": 4.216551133140456e-05, "clip_ratio/region_mean": 0.001666966338234488, "epoch": 2.4291545189504373, "grad_norm": 0.12286464869976044, "learning_rate": 7.5e-07, "loss": 0.0076, "step": 238 }, { "clip_ratio/high_max": 0.00253047893056646, "clip_ratio/high_mean": 0.001089610654162243, "clip_ratio/low_mean": 0.000781998673119233, "clip_ratio/low_min": 3.185652258252958e-05, "clip_ratio/region_mean": 0.0018716092890826985, "epoch": 2.4384839650145773, "grad_norm": 0.13784615695476532, "learning_rate": 7.5e-07, "loss": -0.0262, "step": 239 }, { "clip_ratio/high_max": 0.0020198296260787174, "clip_ratio/high_mean": 0.0008751674249651842, "clip_ratio/low_mean": 0.0008514514793205308, "clip_ratio/low_min": 4.08763880841434e-05, "clip_ratio/region_mean": 0.0017266189024667256, "epoch": 2.4478134110787173, "grad_norm": 0.11742354184389114, "learning_rate": 7.5e-07, "loss": 0.0127, "step": 240 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.02294921875, "completions/max_length": 4096.0, "completions/max_terminated_length": 4092.0, "completions/mean_length": 655.6318359375, "completions/mean_terminated_length": 574.8236083984375, "completions/min_length": 37.0, "completions/min_terminated_length": 37.0, "epoch": 2.4571428571428573, "grad_norm": 0.12906600534915924, "learning_rate": 7.5e-07, "loss": 0.0101, "num_tokens": 149990722.0, "reward": 0.5841239094734192, "reward_std": 0.18676458299160004, "rewards/simpleverify_reward/mean": 0.5841239094734192, "rewards/simpleverify_reward/std": 0.49288955330848694, "step": 241 }, { "clip_ratio/high_max": 0.0018700803229876328, "clip_ratio/high_mean": 0.0007012561027295305, "clip_ratio/low_mean": 0.0005303634834490367, "clip_ratio/low_min": 4.870144766755402e-05, "clip_ratio/region_mean": 0.0012316195861785673, "epoch": 2.466472303206997, "grad_norm": 0.10772235691547394, "learning_rate": 7.5e-07, "loss": 0.0112, "step": 242 }, { "clip_ratio/high_max": 0.0019651847760542296, "clip_ratio/high_mean": 0.0007940348659758456, "clip_ratio/low_mean": 0.0005421155046860804, "clip_ratio/low_min": 2.70829041255638e-05, "clip_ratio/region_mean": 0.0013361503661144525, "epoch": 2.4758017492711373, "grad_norm": 0.1241592988371849, "learning_rate": 7.5e-07, "loss": -0.0179, "step": 243 }, { "clip_ratio/high_max": 0.0017666722706053406, "clip_ratio/high_mean": 0.0007759787567920284, "clip_ratio/low_mean": 0.0005820130609208718, "clip_ratio/low_min": 5.906410842726473e-05, "clip_ratio/region_mean": 0.0013579918231698684, "epoch": 2.485131195335277, "grad_norm": 0.11345313489437103, "learning_rate": 7.5e-07, "loss": -0.0028, "step": 244 }, { "clip_ratio/high_max": 0.0020741901244036853, "clip_ratio/high_mean": 0.0008839090623951051, "clip_ratio/low_mean": 0.0006501380794361467, "clip_ratio/low_min": 1.4856192137813196e-05, "clip_ratio/region_mean": 0.0015340471436502412, "epoch": 2.494460641399417, "grad_norm": 0.1300712674856186, "learning_rate": 7.5e-07, "loss": -0.047, "step": 245 }, { "clip_ratio/high_max": 0.00200485646200832, "clip_ratio/high_mean": 0.0007994504358066479, "clip_ratio/low_mean": 0.0006948708105483092, "clip_ratio/low_min": 6.854242383269593e-05, "clip_ratio/region_mean": 0.0014943212663638406, "epoch": 2.503790087463557, "grad_norm": 0.1276429146528244, "learning_rate": 7.5e-07, "loss": -0.0118, "step": 246 }, { "clip_ratio/high_max": 0.0018552813271526247, "clip_ratio/high_mean": 0.0008656185618747259, "clip_ratio/low_mean": 0.0007258625237227534, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015914810937829316, "epoch": 2.513119533527697, "grad_norm": 0.12743426859378815, "learning_rate": 7.5e-07, "loss": 0.0266, "step": 247 }, { "clip_ratio/high_max": 0.0017259652304346673, "clip_ratio/high_mean": 0.0006558008026331663, "clip_ratio/low_mean": 0.0006853762806713348, "clip_ratio/low_min": 1.7198679415741935e-05, "clip_ratio/region_mean": 0.0013411770923994482, "epoch": 2.522448979591837, "grad_norm": 0.1102890819311142, "learning_rate": 7.5e-07, "loss": -0.0001, "step": 248 }, { "clip_ratio/high_max": 0.0023183942030300386, "clip_ratio/high_mean": 0.0008494115609209985, "clip_ratio/low_mean": 0.0008840785540087381, "clip_ratio/low_min": 9.29229827306699e-06, "clip_ratio/region_mean": 0.0017334901494905353, "epoch": 2.5317784256559768, "grad_norm": 0.11256512254476547, "learning_rate": 7.5e-07, "loss": 0.0072, "step": 249 }, { "clip_ratio/high_max": 0.0023374347874778323, "clip_ratio/high_mean": 0.0008215693032980198, "clip_ratio/low_mean": 0.0007814513010089286, "clip_ratio/low_min": 1.1208751857338939e-05, "clip_ratio/region_mean": 0.0016030206097639166, "epoch": 2.5411078717201168, "grad_norm": 0.12500019371509552, "learning_rate": 7.5e-07, "loss": -0.0212, "step": 250 }, { "clip_ratio/high_max": 0.0018470958493708167, "clip_ratio/high_mean": 0.0007921609048935352, "clip_ratio/low_mean": 0.0007391125891444972, "clip_ratio/low_min": 6.17041487203096e-05, "clip_ratio/region_mean": 0.0015312734976760112, "epoch": 2.5504373177842563, "grad_norm": 0.13777244091033936, "learning_rate": 7.5e-07, "loss": -0.0138, "step": 251 }, { "clip_ratio/high_max": 0.0022576493720407598, "clip_ratio/high_mean": 0.0008440185210929485, "clip_ratio/low_mean": 0.0008189945710910251, "clip_ratio/low_min": 7.145357994886581e-05, "clip_ratio/region_mean": 0.0016630130885459948, "epoch": 2.5597667638483967, "grad_norm": 0.12635305523872375, "learning_rate": 7.5e-07, "loss": 0.0095, "step": 252 }, { "clip_ratio/high_max": 0.0020280490134609863, "clip_ratio/high_mean": 0.0007946987716422882, "clip_ratio/low_mean": 0.0008746402336328174, "clip_ratio/low_min": 3.5963329537480604e-05, "clip_ratio/region_mean": 0.001669339009822579, "epoch": 2.5690962099125363, "grad_norm": 0.16254574060440063, "learning_rate": 7.5e-07, "loss": -0.008, "step": 253 }, { "clip_ratio/high_max": 0.001968639193364652, "clip_ratio/high_mean": 0.000809704677976697, "clip_ratio/low_mean": 0.0009676397603470832, "clip_ratio/low_min": 6.877596479171189e-05, "clip_ratio/region_mean": 0.0017773444051272236, "epoch": 2.5784256559766763, "grad_norm": 0.1361626237630844, "learning_rate": 7.5e-07, "loss": 0.0649, "step": 254 }, { "clip_ratio/high_max": 0.0016573050197621342, "clip_ratio/high_mean": 0.0006904832480358891, "clip_ratio/low_mean": 0.0008098671805782942, "clip_ratio/low_min": 3.305608879600186e-05, "clip_ratio/region_mean": 0.0015003504231572151, "epoch": 2.5877551020408163, "grad_norm": 0.11784699559211731, "learning_rate": 7.5e-07, "loss": 0.0056, "step": 255 }, { "clip_ratio/high_max": 0.002011499022046337, "clip_ratio/high_mean": 0.0007121706548787188, "clip_ratio/low_mean": 0.0007783120454405434, "clip_ratio/low_min": 7.0760471317044e-05, "clip_ratio/region_mean": 0.0014904827366990503, "epoch": 2.5970845481049563, "grad_norm": 0.12365245819091797, "learning_rate": 7.5e-07, "loss": 0.0253, "step": 256 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0244140625, "completions/max_length": 4096.0, "completions/max_terminated_length": 4077.0, "completions/mean_length": 652.3703002929688, "completions/mean_terminated_length": 566.193359375, "completions/min_length": 6.0, "completions/min_terminated_length": 6.0, "epoch": 2.6064139941690962, "grad_norm": 0.13199710845947266, "learning_rate": 7.5e-07, "loss": -0.0109, "num_tokens": 159336446.0, "reward": 0.5847517251968384, "reward_std": 0.18934302031993866, "rewards/simpleverify_reward/mean": 0.5847516655921936, "rewards/simpleverify_reward/std": 0.49278199672698975, "step": 257 }, { "clip_ratio/high_max": 0.0015979199888533913, "clip_ratio/high_mean": 0.0006893960307934321, "clip_ratio/low_mean": 0.0006355441564664943, "clip_ratio/low_min": 3.9589704101672396e-05, "clip_ratio/region_mean": 0.0013249401599750854, "epoch": 2.6157434402332362, "grad_norm": 0.12878069281578064, "learning_rate": 7.5e-07, "loss": 0.031, "step": 258 }, { "clip_ratio/high_max": 0.0017400485248799669, "clip_ratio/high_mean": 0.0007165526917560783, "clip_ratio/low_mean": 0.0005477070444612764, "clip_ratio/low_min": 2.0836805560975336e-05, "clip_ratio/region_mean": 0.0012642597321246285, "epoch": 2.6250728862973762, "grad_norm": 0.13520103693008423, "learning_rate": 7.5e-07, "loss": 0.0422, "step": 259 }, { "clip_ratio/high_max": 0.002081702052237233, "clip_ratio/high_mean": 0.0008166720617737155, "clip_ratio/low_mean": 0.0005844982060807524, "clip_ratio/low_min": 1.5838824765523896e-05, "clip_ratio/region_mean": 0.0014011702805873938, "epoch": 2.6344023323615158, "grad_norm": 0.11662790179252625, "learning_rate": 7.5e-07, "loss": -0.0248, "step": 260 }, { "clip_ratio/high_max": 0.0017787793913157657, "clip_ratio/high_mean": 0.0008692174524185248, "clip_ratio/low_mean": 0.0006727370027874713, "clip_ratio/low_min": 5.201824023970403e-05, "clip_ratio/region_mean": 0.0015419544652104378, "epoch": 2.643731778425656, "grad_norm": 0.12313420325517654, "learning_rate": 7.5e-07, "loss": 0.0114, "step": 261 }, { "clip_ratio/high_max": 0.001982646674150601, "clip_ratio/high_mean": 0.000759659698815085, "clip_ratio/low_mean": 0.0006016446513967821, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013613043192890473, "epoch": 2.6530612244897958, "grad_norm": 0.11890937387943268, "learning_rate": 7.5e-07, "loss": 0.0123, "step": 262 }, { "clip_ratio/high_max": 0.0017580423191247974, "clip_ratio/high_mean": 0.0007060313000692986, "clip_ratio/low_mean": 0.0006277019438130083, "clip_ratio/low_min": 7.805014956829837e-05, "clip_ratio/region_mean": 0.0013337332529772539, "epoch": 2.6623906705539357, "grad_norm": 0.13126344978809357, "learning_rate": 7.5e-07, "loss": 0.0087, "step": 263 }, { "clip_ratio/high_max": 0.0017901440187415574, "clip_ratio/high_mean": 0.0007803551925462671, "clip_ratio/low_mean": 0.0005376498183977674, "clip_ratio/low_min": 3.093293707934208e-05, "clip_ratio/region_mean": 0.0013180050191294868, "epoch": 2.6717201166180757, "grad_norm": 0.12480075657367706, "learning_rate": 7.5e-07, "loss": -0.0298, "step": 264 }, { "clip_ratio/high_max": 0.0016590770792390686, "clip_ratio/high_mean": 0.0006736046198057011, "clip_ratio/low_mean": 0.0007793163022142835, "clip_ratio/low_min": 1.5420675481436774e-05, "clip_ratio/region_mean": 0.0014529209074680693, "epoch": 2.6810495626822157, "grad_norm": 0.11289356648921967, "learning_rate": 7.5e-07, "loss": -0.011, "step": 265 }, { "clip_ratio/high_max": 0.0017779153895389754, "clip_ratio/high_mean": 0.000692783234626404, "clip_ratio/low_mean": 0.0007890772103564814, "clip_ratio/low_min": 2.966854299302213e-05, "clip_ratio/region_mean": 0.0014818604468018748, "epoch": 2.6903790087463557, "grad_norm": 0.13482584059238434, "learning_rate": 7.5e-07, "loss": 0.0252, "step": 266 }, { "clip_ratio/high_max": 0.001837731819250621, "clip_ratio/high_mean": 0.0008084688142844243, "clip_ratio/low_mean": 0.0007100652046574396, "clip_ratio/low_min": 6.581585512321908e-05, "clip_ratio/region_mean": 0.0015185339871095493, "epoch": 2.6997084548104957, "grad_norm": 0.1401849240064621, "learning_rate": 7.5e-07, "loss": -0.0295, "step": 267 }, { "clip_ratio/high_max": 0.0019048771791858599, "clip_ratio/high_mean": 0.0008572812294005416, "clip_ratio/low_mean": 0.0007506476504204329, "clip_ratio/low_min": 4.635653840523446e-05, "clip_ratio/region_mean": 0.0016079289198387414, "epoch": 2.7090379008746357, "grad_norm": 0.13046927750110626, "learning_rate": 7.5e-07, "loss": -0.0469, "step": 268 }, { "clip_ratio/high_max": 0.002198640700953547, "clip_ratio/high_mean": 0.0008473386769765057, "clip_ratio/low_mean": 0.0007656694906472694, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016130081712617539, "epoch": 2.7183673469387752, "grad_norm": 0.1320391446352005, "learning_rate": 7.5e-07, "loss": -0.0283, "step": 269 }, { "clip_ratio/high_max": 0.0019619791783043183, "clip_ratio/high_mean": 0.0008443780861853156, "clip_ratio/low_mean": 0.0008579217887927371, "clip_ratio/low_min": 9.22313665796537e-05, "clip_ratio/region_mean": 0.0017022999163600616, "epoch": 2.7276967930029157, "grad_norm": 0.11247613281011581, "learning_rate": 7.5e-07, "loss": -0.0002, "step": 270 }, { "clip_ratio/high_max": 0.0020599756426236127, "clip_ratio/high_mean": 0.0007879411023168359, "clip_ratio/low_mean": 0.000829225308734749, "clip_ratio/low_min": 2.9367149181780405e-05, "clip_ratio/region_mean": 0.0016171664537978359, "epoch": 2.7370262390670552, "grad_norm": 0.12205163389444351, "learning_rate": 7.5e-07, "loss": 0.0195, "step": 271 }, { "clip_ratio/high_max": 0.0021855262530152686, "clip_ratio/high_mean": 0.0008851976745063439, "clip_ratio/low_mean": 0.000864945410285145, "clip_ratio/low_min": 2.7462253456178587e-05, "clip_ratio/region_mean": 0.001750143066601595, "epoch": 2.746355685131195, "grad_norm": 0.1316508948802948, "learning_rate": 7.5e-07, "loss": -0.0005, "step": 272 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0230189732142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4081.0, "completions/mean_length": 644.58251953125, "completions/mean_terminated_length": 563.2625732421875, "completions/min_length": 6.0, "completions/min_terminated_length": 6.0, "epoch": 2.755685131195335, "grad_norm": 0.13628947734832764, "learning_rate": 7.5e-07, "loss": -0.026, "num_tokens": 168622901.0, "reward": 0.5793806314468384, "reward_std": 0.19097308814525604, "rewards/simpleverify_reward/mean": 0.5793805718421936, "rewards/simpleverify_reward/std": 0.49367570877075195, "step": 273 }, { "clip_ratio/high_max": 0.0019341516963322647, "clip_ratio/high_mean": 0.0008456612722511636, "clip_ratio/low_mean": 0.000575943206968077, "clip_ratio/low_min": 1.1359506061126012e-05, "clip_ratio/region_mean": 0.001421604505594587, "epoch": 2.765014577259475, "grad_norm": 0.11626263707876205, "learning_rate": 7.5e-07, "loss": -0.0031, "step": 274 }, { "clip_ratio/high_max": 0.002021186468482483, "clip_ratio/high_mean": 0.0008038832138481666, "clip_ratio/low_mean": 0.0006325225140244584, "clip_ratio/low_min": 0.00011743427239707671, "clip_ratio/region_mean": 0.0014364057569764555, "epoch": 2.774344023323615, "grad_norm": 0.1345091462135315, "learning_rate": 7.5e-07, "loss": 0.0208, "step": 275 }, { "clip_ratio/high_max": 0.0022526313550770283, "clip_ratio/high_mean": 0.000835821370856138, "clip_ratio/low_mean": 0.0005779831626568921, "clip_ratio/low_min": 4.510545295488555e-05, "clip_ratio/region_mean": 0.0014138045553409029, "epoch": 2.783673469387755, "grad_norm": 0.13081569969654083, "learning_rate": 7.5e-07, "loss": -0.0409, "step": 276 }, { "clip_ratio/high_max": 0.0017856469894468319, "clip_ratio/high_mean": 0.0006427245643862989, "clip_ratio/low_mean": 0.0006723198530380614, "clip_ratio/low_min": 4.037286817037966e-05, "clip_ratio/region_mean": 0.0013150443810445722, "epoch": 2.793002915451895, "grad_norm": 0.12474372982978821, "learning_rate": 7.5e-07, "loss": 0.0561, "step": 277 }, { "clip_ratio/high_max": 0.0018330255006731022, "clip_ratio/high_mean": 0.000819468905319809, "clip_ratio/low_mean": 0.0006607026716665132, "clip_ratio/low_min": 3.979480607085861e-05, "clip_ratio/region_mean": 0.0014801715587964281, "epoch": 2.8023323615160347, "grad_norm": 0.1359148919582367, "learning_rate": 7.5e-07, "loss": -0.0029, "step": 278 }, { "clip_ratio/high_max": 0.00190937721345108, "clip_ratio/high_mean": 0.0007756015093036694, "clip_ratio/low_mean": 0.0007273722185345832, "clip_ratio/low_min": 6.159077202028129e-05, "clip_ratio/region_mean": 0.0015029737187433057, "epoch": 2.811661807580175, "grad_norm": 0.13172301650047302, "learning_rate": 7.5e-07, "loss": 0.0203, "step": 279 }, { "clip_ratio/high_max": 0.0020771822819369845, "clip_ratio/high_mean": 0.0008497573217027821, "clip_ratio/low_mean": 0.0006283693674049573, "clip_ratio/low_min": 2.583178320492152e-05, "clip_ratio/region_mean": 0.00147812668728875, "epoch": 2.8209912536443147, "grad_norm": 0.12067868560552597, "learning_rate": 7.5e-07, "loss": -0.0326, "step": 280 }, { "clip_ratio/high_max": 0.0018148283525079023, "clip_ratio/high_mean": 0.0008335116981470492, "clip_ratio/low_mean": 0.0007083274467731826, "clip_ratio/low_min": 4.82331288367277e-05, "clip_ratio/region_mean": 0.0015418391667481046, "epoch": 2.8303206997084547, "grad_norm": 0.12956741452217102, "learning_rate": 7.5e-07, "loss": -0.0176, "step": 281 }, { "clip_ratio/high_max": 0.0019307585098431446, "clip_ratio/high_mean": 0.0008431958194705658, "clip_ratio/low_mean": 0.0006295154962572269, "clip_ratio/low_min": 4.881380846200045e-05, "clip_ratio/region_mean": 0.0014727113011758775, "epoch": 2.8396501457725947, "grad_norm": 0.13321539759635925, "learning_rate": 7.5e-07, "loss": -0.0045, "step": 282 }, { "clip_ratio/high_max": 0.00195836866623722, "clip_ratio/high_mean": 0.0008767385588726029, "clip_ratio/low_mean": 0.0008214416793634882, "clip_ratio/low_min": 9.274377498513786e-05, "clip_ratio/region_mean": 0.0016981802400550805, "epoch": 2.8489795918367347, "grad_norm": 0.1264815479516983, "learning_rate": 7.5e-07, "loss": 0.006, "step": 283 }, { "clip_ratio/high_max": 0.0020933619744027965, "clip_ratio/high_mean": 0.0008645716825412819, "clip_ratio/low_mean": 0.0007820334976713639, "clip_ratio/low_min": 5.6034305089269765e-05, "clip_ratio/region_mean": 0.0016466051602037624, "epoch": 2.8583090379008746, "grad_norm": 0.12158481031656265, "learning_rate": 7.5e-07, "loss": 0.0238, "step": 284 }, { "clip_ratio/high_max": 0.0018378786589892115, "clip_ratio/high_mean": 0.0007085591150826076, "clip_ratio/low_mean": 0.0007037998802843504, "clip_ratio/low_min": 4.5190704440756235e-05, "clip_ratio/region_mean": 0.0014123590008239262, "epoch": 2.8676384839650146, "grad_norm": 0.12888884544372559, "learning_rate": 7.5e-07, "loss": 0.027, "step": 285 }, { "clip_ratio/high_max": 0.002245790012239013, "clip_ratio/high_mean": 0.0008526397759851534, "clip_ratio/low_mean": 0.0008686717137607047, "clip_ratio/low_min": 6.359151757351356e-05, "clip_ratio/region_mean": 0.001721311520668678, "epoch": 2.8769679300291546, "grad_norm": 0.12541624903678894, "learning_rate": 7.5e-07, "loss": -0.0046, "step": 286 }, { "clip_ratio/high_max": 0.0024638376926304772, "clip_ratio/high_mean": 0.0009187097330141114, "clip_ratio/low_mean": 0.0009083184231712949, "clip_ratio/low_min": 0.000114466462036944, "clip_ratio/region_mean": 0.0018270281289005652, "epoch": 2.8862973760932946, "grad_norm": 0.12161947786808014, "learning_rate": 7.5e-07, "loss": -0.0049, "step": 287 }, { "clip_ratio/high_max": 0.001909046979562845, "clip_ratio/high_mean": 0.0009033155638462631, "clip_ratio/low_mean": 0.0006495921570603969, "clip_ratio/low_min": 4.0836326661519706e-05, "clip_ratio/region_mean": 0.0015529077463725116, "epoch": 2.8956268221574346, "grad_norm": 0.1635885238647461, "learning_rate": 7.5e-07, "loss": 0.0031, "step": 288 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0224609375, "completions/max_length": 4096.0, "completions/max_terminated_length": 4067.0, "completions/mean_length": 646.952880859375, "completions/mean_terminated_length": 567.7040405273438, "completions/min_length": 3.0, "completions/min_terminated_length": 3.0, "epoch": 3.00932944606414, "grad_norm": 0.12722325325012207, "learning_rate": 7.5e-07, "loss": 0.0052, "num_tokens": 178012793.0, "reward": 0.590401828289032, "reward_std": 0.1870073676109314, "rewards/simpleverify_reward/mean": 0.5904017686843872, "rewards/simpleverify_reward/std": 0.4917767643928528, "step": 289 }, { "clip_ratio/high_max": 0.001951206497324165, "clip_ratio/high_mean": 0.0007050424828776158, "clip_ratio/low_mean": 0.0006436743369704345, "clip_ratio/low_min": 3.995343286078423e-05, "clip_ratio/region_mean": 0.0013487168471328914, "epoch": 3.01865889212828, "grad_norm": 0.13551762700080872, "learning_rate": 7.5e-07, "loss": 0.026, "step": 290 }, { "clip_ratio/high_max": 0.001950903060787823, "clip_ratio/high_mean": 0.0008319462540384848, "clip_ratio/low_mean": 0.0005498614655152778, "clip_ratio/low_min": 3.376257973286556e-05, "clip_ratio/region_mean": 0.0013818077168252785, "epoch": 3.02798833819242, "grad_norm": 0.13324132561683655, "learning_rate": 7.5e-07, "loss": -0.0084, "step": 291 }, { "clip_ratio/high_max": 0.0017656021300354041, "clip_ratio/high_mean": 0.0007336056078202091, "clip_ratio/low_mean": 0.0005722040905311587, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013058097138127778, "epoch": 3.03731778425656, "grad_norm": 0.1359037458896637, "learning_rate": 7.5e-07, "loss": 0.0039, "step": 292 }, { "clip_ratio/high_max": 0.0017463765652792063, "clip_ratio/high_mean": 0.0007111731338227401, "clip_ratio/low_mean": 0.000681492208968848, "clip_ratio/low_min": 5.0514977374405134e-05, "clip_ratio/region_mean": 0.0013926653482485563, "epoch": 3.0466472303206995, "grad_norm": 0.11980525404214859, "learning_rate": 7.5e-07, "loss": 0.0269, "step": 293 }, { "clip_ratio/high_max": 0.001985297833016375, "clip_ratio/high_mean": 0.0009011558704514755, "clip_ratio/low_mean": 0.0005806195913464762, "clip_ratio/low_min": 1.4029180420038756e-05, "clip_ratio/region_mean": 0.0014817754563409835, "epoch": 3.0559766763848395, "grad_norm": 0.1328798234462738, "learning_rate": 7.5e-07, "loss": -0.0351, "step": 294 }, { "clip_ratio/high_max": 0.0022269895271165296, "clip_ratio/high_mean": 0.0009952533200703328, "clip_ratio/low_mean": 0.0006332352249955875, "clip_ratio/low_min": 2.6063386030727997e-05, "clip_ratio/region_mean": 0.0016284886078210548, "epoch": 3.0653061224489795, "grad_norm": 0.1372148096561432, "learning_rate": 7.5e-07, "loss": -0.0514, "step": 295 }, { "clip_ratio/high_max": 0.0017482733019278385, "clip_ratio/high_mean": 0.0007777788068779046, "clip_ratio/low_mean": 0.0007904312515165657, "clip_ratio/low_min": 4.229343721817713e-05, "clip_ratio/region_mean": 0.0015682100638514385, "epoch": 3.0746355685131195, "grad_norm": 0.12782485783100128, "learning_rate": 7.5e-07, "loss": 0.0053, "step": 296 }, { "clip_ratio/high_max": 0.0019472360145300627, "clip_ratio/high_mean": 0.0007763386238366365, "clip_ratio/low_mean": 0.0006735129154549213, "clip_ratio/low_min": 3.3144433473353274e-05, "clip_ratio/region_mean": 0.0014498515301966108, "epoch": 3.0839650145772595, "grad_norm": 0.13459181785583496, "learning_rate": 7.5e-07, "loss": -0.0156, "step": 297 }, { "clip_ratio/high_max": 0.0021401020931079984, "clip_ratio/high_mean": 0.0009596563304512529, "clip_ratio/low_mean": 0.0007610824150106055, "clip_ratio/low_min": 3.54460298694903e-05, "clip_ratio/region_mean": 0.0017207387209055014, "epoch": 3.0932944606413995, "grad_norm": 0.1346547156572342, "learning_rate": 7.5e-07, "loss": -0.0141, "step": 298 }, { "clip_ratio/high_max": 0.002172485248593148, "clip_ratio/high_mean": 0.0008717773926036898, "clip_ratio/low_mean": 0.0006844468152849004, "clip_ratio/low_min": 2.814681465679314e-05, "clip_ratio/region_mean": 0.0015562242042506114, "epoch": 3.1026239067055394, "grad_norm": 0.12479761987924576, "learning_rate": 7.5e-07, "loss": -0.0044, "step": 299 }, { "clip_ratio/high_max": 0.0018234893432236277, "clip_ratio/high_mean": 0.0007648475420864997, "clip_ratio/low_mean": 0.0006873508646094706, "clip_ratio/low_min": 4.1863885599013884e-05, "clip_ratio/region_mean": 0.0014521984157909174, "epoch": 3.1119533527696794, "grad_norm": 0.11742979288101196, "learning_rate": 7.5e-07, "loss": 0.0086, "step": 300 }, { "clip_ratio/high_max": 0.0019662269551190548, "clip_ratio/high_mean": 0.0008250094742834335, "clip_ratio/low_mean": 0.0007584528066217899, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015834623009141069, "epoch": 3.1212827988338194, "grad_norm": 0.1232069581747055, "learning_rate": 7.5e-07, "loss": 0.0071, "step": 301 }, { "clip_ratio/high_max": 0.0017457308968005236, "clip_ratio/high_mean": 0.0007353570617851801, "clip_ratio/low_mean": 0.0008346073191205505, "clip_ratio/low_min": 8.210590567614418e-05, "clip_ratio/region_mean": 0.0015699643699917942, "epoch": 3.130612244897959, "grad_norm": 0.12370191514492035, "learning_rate": 7.5e-07, "loss": 0.0382, "step": 302 }, { "clip_ratio/high_max": 0.0020218033678247593, "clip_ratio/high_mean": 0.0008696518871147418, "clip_ratio/low_mean": 0.0006924857889316627, "clip_ratio/low_min": 5.036646598455263e-05, "clip_ratio/region_mean": 0.0015621376551280264, "epoch": 3.139941690962099, "grad_norm": 0.12471511214971542, "learning_rate": 7.5e-07, "loss": -0.0183, "step": 303 }, { "clip_ratio/high_max": 0.0020180773353786208, "clip_ratio/high_mean": 0.000784562117587484, "clip_ratio/low_mean": 0.0009683744192443555, "clip_ratio/low_min": 4.464511584956199e-05, "clip_ratio/region_mean": 0.0017529365359223448, "epoch": 3.149271137026239, "grad_norm": 0.13413412868976593, "learning_rate": 7.5e-07, "loss": 0.0206, "step": 304 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0262974330357143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4074.0, "completions/mean_length": 682.8043212890625, "completions/mean_terminated_length": 590.621826171875, "completions/min_length": 93.0, "completions/min_terminated_length": 93.0, "epoch": 3.158600583090379, "grad_norm": 0.13254548609256744, "learning_rate": 7.5e-07, "loss": -0.0494, "num_tokens": 187723403.0, "reward": 0.5706613063812256, "reward_std": 0.18208761513233185, "rewards/simpleverify_reward/mean": 0.5706612467765808, "rewards/simpleverify_reward/std": 0.49499908089637756, "step": 305 }, { "clip_ratio/high_max": 0.0017008570357575081, "clip_ratio/high_mean": 0.0006379671467584558, "clip_ratio/low_mean": 0.00048440404680150095, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011223711881029885, "epoch": 3.167930029154519, "grad_norm": 0.1353403776884079, "learning_rate": 7.5e-07, "loss": 0.0276, "step": 306 }, { "clip_ratio/high_max": 0.0017978840696741827, "clip_ratio/high_mean": 0.0006838792105554603, "clip_ratio/low_mean": 0.0006854054608993465, "clip_ratio/low_min": 4.067059580847854e-05, "clip_ratio/region_mean": 0.001369284676911775, "epoch": 3.177259475218659, "grad_norm": 0.136386901140213, "learning_rate": 7.5e-07, "loss": 0.0171, "step": 307 }, { "clip_ratio/high_max": 0.0018319591508770827, "clip_ratio/high_mean": 0.0008172157031367533, "clip_ratio/low_mean": 0.0005621606369459187, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013793763282592408, "epoch": 3.186588921282799, "grad_norm": 0.13148236274719238, "learning_rate": 7.5e-07, "loss": -0.0146, "step": 308 }, { "clip_ratio/high_max": 0.0017103626814787276, "clip_ratio/high_mean": 0.0007266227785294177, "clip_ratio/low_mean": 0.0006459422247644397, "clip_ratio/low_min": 1.749230250425171e-05, "clip_ratio/region_mean": 0.0013725650060223415, "epoch": 3.195918367346939, "grad_norm": 0.12635250389575958, "learning_rate": 7.5e-07, "loss": 0.0008, "step": 309 }, { "clip_ratio/high_max": 0.0018291176602360792, "clip_ratio/high_mean": 0.0008067279268288985, "clip_ratio/low_mean": 0.0005987192271277308, "clip_ratio/low_min": 1.2913223145005759e-05, "clip_ratio/region_mean": 0.001405447143042693, "epoch": 3.205247813411079, "grad_norm": 0.1174868568778038, "learning_rate": 7.5e-07, "loss": -0.038, "step": 310 }, { "clip_ratio/high_max": 0.0020268373846192844, "clip_ratio/high_mean": 0.0007310061573662097, "clip_ratio/low_mean": 0.0006118009041529149, "clip_ratio/low_min": 1.3283740372571629e-05, "clip_ratio/region_mean": 0.00134280704514822, "epoch": 3.2145772594752184, "grad_norm": 0.12337223440408707, "learning_rate": 7.5e-07, "loss": 0.0116, "step": 311 }, { "clip_ratio/high_max": 0.0016413056509918533, "clip_ratio/high_mean": 0.0006978771589274402, "clip_ratio/low_mean": 0.0007535565964644775, "clip_ratio/low_min": 8.249705842899857e-05, "clip_ratio/region_mean": 0.00145143376357737, "epoch": 3.2239067055393584, "grad_norm": 0.1784147322177887, "learning_rate": 7.5e-07, "loss": 0.0277, "step": 312 }, { "clip_ratio/high_max": 0.0020471918433031533, "clip_ratio/high_mean": 0.0007705039352003951, "clip_ratio/low_mean": 0.0007479044525098288, "clip_ratio/low_min": 5.0585735152708367e-05, "clip_ratio/region_mean": 0.001518408396805171, "epoch": 3.2332361516034984, "grad_norm": 0.1202983409166336, "learning_rate": 7.5e-07, "loss": -0.019, "step": 313 }, { "clip_ratio/high_max": 0.0019719113952305634, "clip_ratio/high_mean": 0.0007390961418423103, "clip_ratio/low_mean": 0.0006542334331243183, "clip_ratio/low_min": 2.8282101993681863e-05, "clip_ratio/region_mean": 0.0013933296177128796, "epoch": 3.2425655976676384, "grad_norm": 0.12071716040372849, "learning_rate": 7.5e-07, "loss": 0.0072, "step": 314 }, { "clip_ratio/high_max": 0.0019702728604897857, "clip_ratio/high_mean": 0.0008102527790470049, "clip_ratio/low_mean": 0.0006525056232931092, "clip_ratio/low_min": 1.864558544184547e-05, "clip_ratio/region_mean": 0.0014627584314439446, "epoch": 3.2518950437317784, "grad_norm": 0.12857277691364288, "learning_rate": 7.5e-07, "loss": 0.0047, "step": 315 }, { "clip_ratio/high_max": 0.001949415309354663, "clip_ratio/high_mean": 0.0008451886678813025, "clip_ratio/low_mean": 0.0007104216292646015, "clip_ratio/low_min": 4.00970020564273e-05, "clip_ratio/region_mean": 0.0015556103026028723, "epoch": 3.2612244897959184, "grad_norm": 0.13518042862415314, "learning_rate": 7.5e-07, "loss": -0.0049, "step": 316 }, { "clip_ratio/high_max": 0.0018201240854978096, "clip_ratio/high_mean": 0.0007632355027453741, "clip_ratio/low_mean": 0.0007299866811081301, "clip_ratio/low_min": 5.907119884795975e-05, "clip_ratio/region_mean": 0.0014932222256902605, "epoch": 3.2705539358600584, "grad_norm": 0.12182590365409851, "learning_rate": 7.5e-07, "loss": 0.0075, "step": 317 }, { "clip_ratio/high_max": 0.0023851152109273244, "clip_ratio/high_mean": 0.0009219160701832152, "clip_ratio/low_mean": 0.0007921172609712812, "clip_ratio/low_min": 6.133911756478483e-05, "clip_ratio/region_mean": 0.0017140333402494434, "epoch": 3.2798833819241984, "grad_norm": 0.12691453099250793, "learning_rate": 7.5e-07, "loss": 0.002, "step": 318 }, { "clip_ratio/high_max": 0.0018309977058379445, "clip_ratio/high_mean": 0.0007573272250738228, "clip_ratio/low_mean": 0.0008084323953880812, "clip_ratio/low_min": 1.5082046047609765e-05, "clip_ratio/region_mean": 0.0015657595758966636, "epoch": 3.2892128279883384, "grad_norm": 0.12701372802257538, "learning_rate": 7.5e-07, "loss": 0.022, "step": 319 }, { "clip_ratio/high_max": 0.0017502247719676234, "clip_ratio/high_mean": 0.000746136743146053, "clip_ratio/low_mean": 0.0008501998163410462, "clip_ratio/low_min": 6.192805994942319e-05, "clip_ratio/region_mean": 0.001596336551301647, "epoch": 3.298542274052478, "grad_norm": 0.17474320530891418, "learning_rate": 7.5e-07, "loss": 0.0381, "step": 320 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0274135044642857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4075.0, "completions/mean_length": 660.0183715820312, "completions/mean_terminated_length": 563.171142578125, "completions/min_length": 101.0, "completions/min_terminated_length": 101.0, "epoch": 3.307871720116618, "grad_norm": 0.1324944645166397, "learning_rate": 7.5e-07, "loss": 0.0013, "num_tokens": 197045666.0, "reward": 0.58837890625, "reward_std": 0.18516971170902252, "rewards/simpleverify_reward/mean": 0.58837890625, "rewards/simpleverify_reward/std": 0.4921443462371826, "step": 321 }, { "clip_ratio/high_max": 0.0020408536001923494, "clip_ratio/high_mean": 0.0008271738734038081, "clip_ratio/low_mean": 0.0006138212675068644, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001440995154553093, "epoch": 3.317201166180758, "grad_norm": 0.14645197987556458, "learning_rate": 7.5e-07, "loss": 0.0051, "step": 322 }, { "clip_ratio/high_max": 0.0017765054872143082, "clip_ratio/high_mean": 0.0007376453777396819, "clip_ratio/low_mean": 0.0005300720604282105, "clip_ratio/low_min": 2.978767952299677e-05, "clip_ratio/region_mean": 0.0012677174963755533, "epoch": 3.326530612244898, "grad_norm": 0.13055391609668732, "learning_rate": 7.5e-07, "loss": -0.0314, "step": 323 }, { "clip_ratio/high_max": 0.001923143212479772, "clip_ratio/high_mean": 0.0007403391582556651, "clip_ratio/low_mean": 0.0005221534138399875, "clip_ratio/low_min": 3.202391235390678e-05, "clip_ratio/region_mean": 0.0012624926148419036, "epoch": 3.335860058309038, "grad_norm": 0.1447087824344635, "learning_rate": 7.5e-07, "loss": -0.0032, "step": 324 }, { "clip_ratio/high_max": 0.0017556129387230612, "clip_ratio/high_mean": 0.0007182475765148411, "clip_ratio/low_mean": 0.0007504450068154256, "clip_ratio/low_min": 5.97834596192115e-05, "clip_ratio/region_mean": 0.0014686925896967296, "epoch": 3.345189504373178, "grad_norm": 0.13935120403766632, "learning_rate": 7.5e-07, "loss": 0.0615, "step": 325 }, { "clip_ratio/high_max": 0.0013925420789746568, "clip_ratio/high_mean": 0.0006366157867887523, "clip_ratio/low_mean": 0.0007442613004968734, "clip_ratio/low_min": 2.1147014194866642e-05, "clip_ratio/region_mean": 0.0013808770781906787, "epoch": 3.354518950437318, "grad_norm": 0.14376632869243622, "learning_rate": 7.5e-07, "loss": 0.0379, "step": 326 }, { "clip_ratio/high_max": 0.00220044804882491, "clip_ratio/high_mean": 0.0009887414198601618, "clip_ratio/low_mean": 0.0005466245474963216, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015353659473476, "epoch": 3.363848396501458, "grad_norm": 0.12551355361938477, "learning_rate": 7.5e-07, "loss": -0.0409, "step": 327 }, { "clip_ratio/high_max": 0.0019246820957050659, "clip_ratio/high_mean": 0.0008015540333872195, "clip_ratio/low_mean": 0.0006205689551279647, "clip_ratio/low_min": 4.065537086717086e-05, "clip_ratio/region_mean": 0.0014221230085240677, "epoch": 3.373177842565598, "grad_norm": 0.12612026929855347, "learning_rate": 7.5e-07, "loss": 0.0203, "step": 328 }, { "clip_ratio/high_max": 0.0019650150679808576, "clip_ratio/high_mean": 0.0008115865275613032, "clip_ratio/low_mean": 0.0006944798078620806, "clip_ratio/low_min": 7.884321075835032e-05, "clip_ratio/region_mean": 0.001506066349975299, "epoch": 3.3825072886297374, "grad_norm": 0.1157570406794548, "learning_rate": 7.5e-07, "loss": 0.0034, "step": 329 }, { "clip_ratio/high_max": 0.001947930231835926, "clip_ratio/high_mean": 0.000773131514506531, "clip_ratio/low_mean": 0.0008262284882221138, "clip_ratio/low_min": 7.721996462350944e-05, "clip_ratio/region_mean": 0.0015993599954526871, "epoch": 3.3918367346938774, "grad_norm": 0.12292768061161041, "learning_rate": 7.5e-07, "loss": 0.0342, "step": 330 }, { "clip_ratio/high_max": 0.0020198631391394883, "clip_ratio/high_mean": 0.00085424080316443, "clip_ratio/low_mean": 0.0007693568149989005, "clip_ratio/low_min": 8.453296140942257e-05, "clip_ratio/region_mean": 0.001623597607249394, "epoch": 3.4011661807580174, "grad_norm": 0.1528497040271759, "learning_rate": 7.5e-07, "loss": -0.0283, "step": 331 }, { "clip_ratio/high_max": 0.001945412157510873, "clip_ratio/high_mean": 0.0008473087364109233, "clip_ratio/low_mean": 0.0007849500598240411, "clip_ratio/low_min": 4.291444292903179e-05, "clip_ratio/region_mean": 0.0016322587798640598, "epoch": 3.4104956268221573, "grad_norm": 0.1332119107246399, "learning_rate": 7.5e-07, "loss": -0.0422, "step": 332 }, { "clip_ratio/high_max": 0.002070817572530359, "clip_ratio/high_mean": 0.0009339246553281555, "clip_ratio/low_mean": 0.0008211289823520929, "clip_ratio/low_min": 2.3243119358085096e-05, "clip_ratio/region_mean": 0.0017550536213093437, "epoch": 3.4198250728862973, "grad_norm": 0.1416408121585846, "learning_rate": 7.5e-07, "loss": -0.0091, "step": 333 }, { "clip_ratio/high_max": 0.0020664475014200434, "clip_ratio/high_mean": 0.0007862660004320787, "clip_ratio/low_mean": 0.0008407906443608226, "clip_ratio/low_min": 7.486963750125142e-05, "clip_ratio/region_mean": 0.0016270566738967318, "epoch": 3.4291545189504373, "grad_norm": 0.13220922648906708, "learning_rate": 7.5e-07, "loss": 0.0311, "step": 334 }, { "clip_ratio/high_max": 0.002032237174717011, "clip_ratio/high_mean": 0.0008027754902286688, "clip_ratio/low_mean": 0.0006818081346864346, "clip_ratio/low_min": 3.684376588353189e-05, "clip_ratio/region_mean": 0.0014845836303720716, "epoch": 3.4384839650145773, "grad_norm": 0.12150772660970688, "learning_rate": 7.5e-07, "loss": -0.0449, "step": 335 }, { "clip_ratio/high_max": 0.002220571390353143, "clip_ratio/high_mean": 0.0008860628695401829, "clip_ratio/low_mean": 0.0009461607878620271, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0018322236865060404, "epoch": 3.4478134110787173, "grad_norm": 0.13061195611953735, "learning_rate": 7.5e-07, "loss": 0.012, "step": 336 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0267857142857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4049.0, "completions/mean_length": 654.1943359375, "completions/mean_terminated_length": 559.4656982421875, "completions/min_length": 6.0, "completions/min_terminated_length": 6.0, "epoch": 3.4571428571428573, "grad_norm": 0.1344422847032547, "learning_rate": 7.5e-07, "loss": 0.006, "num_tokens": 206304476.0, "reward": 0.5979353189468384, "reward_std": 0.1871853917837143, "rewards/simpleverify_reward/mean": 0.5979352593421936, "rewards/simpleverify_reward/std": 0.4903319478034973, "step": 337 }, { "clip_ratio/high_max": 0.0023465795457013883, "clip_ratio/high_mean": 0.0009717870234453585, "clip_ratio/low_mean": 0.000583972294407431, "clip_ratio/low_min": 3.417266270844266e-05, "clip_ratio/region_mean": 0.0015557592996628955, "epoch": 3.466472303206997, "grad_norm": 0.14808325469493866, "learning_rate": 7.5e-07, "loss": -0.0679, "step": 338 }, { "clip_ratio/high_max": 0.0018345132848480716, "clip_ratio/high_mean": 0.0007638905553903896, "clip_ratio/low_mean": 0.0005637107160509913, "clip_ratio/low_min": 1.991714452742599e-05, "clip_ratio/region_mean": 0.001327601246885024, "epoch": 3.4758017492711373, "grad_norm": 0.13874278962612152, "learning_rate": 7.5e-07, "loss": -0.0298, "step": 339 }, { "clip_ratio/high_max": 0.001889637169369962, "clip_ratio/high_mean": 0.0007220472998596961, "clip_ratio/low_mean": 0.0006471702836279292, "clip_ratio/low_min": 2.88093206108897e-05, "clip_ratio/region_mean": 0.0013692175816686358, "epoch": 3.485131195335277, "grad_norm": 0.13582231104373932, "learning_rate": 7.5e-07, "loss": 0.0003, "step": 340 }, { "clip_ratio/high_max": 0.0020815288517042063, "clip_ratio/high_mean": 0.0007989694313437212, "clip_ratio/low_mean": 0.0007281429534486961, "clip_ratio/low_min": 1.6314277672790922e-05, "clip_ratio/region_mean": 0.0015271123847924173, "epoch": 3.494460641399417, "grad_norm": 0.137525737285614, "learning_rate": 7.5e-07, "loss": 0.026, "step": 341 }, { "clip_ratio/high_max": 0.002018686667724978, "clip_ratio/high_mean": 0.0008825135537335882, "clip_ratio/low_mean": 0.0006562769704032689, "clip_ratio/low_min": 1.1083525350841228e-05, "clip_ratio/region_mean": 0.0015387905441457406, "epoch": 3.503790087463557, "grad_norm": 0.12875990569591522, "learning_rate": 7.5e-07, "loss": 0.0006, "step": 342 }, { "clip_ratio/high_max": 0.001981438155780779, "clip_ratio/high_mean": 0.0008340776230397751, "clip_ratio/low_mean": 0.0007612000226799864, "clip_ratio/low_min": 9.278157631342765e-05, "clip_ratio/region_mean": 0.001595277659362182, "epoch": 3.513119533527697, "grad_norm": 0.13223889470100403, "learning_rate": 7.5e-07, "loss": -0.0172, "step": 343 }, { "clip_ratio/high_max": 0.0020284387646825053, "clip_ratio/high_mean": 0.000836162833365961, "clip_ratio/low_mean": 0.0007717874614172615, "clip_ratio/low_min": 9.012350710690953e-05, "clip_ratio/region_mean": 0.0016079502602224238, "epoch": 3.522448979591837, "grad_norm": 0.24934150278568268, "learning_rate": 7.5e-07, "loss": 0.0283, "step": 344 }, { "clip_ratio/high_max": 0.002003703404625412, "clip_ratio/high_mean": 0.0007513312448281795, "clip_ratio/low_mean": 0.0007288294800673611, "clip_ratio/low_min": 4.118524520890787e-05, "clip_ratio/region_mean": 0.0014801607394474559, "epoch": 3.5317784256559768, "grad_norm": 0.144235759973526, "learning_rate": 7.5e-07, "loss": 0.0144, "step": 345 }, { "clip_ratio/high_max": 0.0019255937513662502, "clip_ratio/high_mean": 0.000710349146174849, "clip_ratio/low_mean": 0.000803835820988752, "clip_ratio/low_min": 3.2653840662533185e-05, "clip_ratio/region_mean": 0.0015141850090003572, "epoch": 3.5411078717201168, "grad_norm": 0.14445433020591736, "learning_rate": 7.5e-07, "loss": 0.0546, "step": 346 }, { "clip_ratio/high_max": 0.001994387370359618, "clip_ratio/high_mean": 0.0008598837284807814, "clip_ratio/low_mean": 0.0007677968915231759, "clip_ratio/low_min": 9.887712622003164e-05, "clip_ratio/region_mean": 0.001627680627279915, "epoch": 3.5504373177842563, "grad_norm": 0.1266629546880722, "learning_rate": 7.5e-07, "loss": -0.0065, "step": 347 }, { "clip_ratio/high_max": 0.002005260394071229, "clip_ratio/high_mean": 0.0008747828560444759, "clip_ratio/low_mean": 0.0007431800859194482, "clip_ratio/low_min": 2.8788577765226364e-05, "clip_ratio/region_mean": 0.001617962945601903, "epoch": 3.5597667638483967, "grad_norm": 0.12730680406093597, "learning_rate": 7.5e-07, "loss": -0.0235, "step": 348 }, { "clip_ratio/high_max": 0.002305332563992124, "clip_ratio/high_mean": 0.0010211733824689873, "clip_ratio/low_mean": 0.0008076394706222345, "clip_ratio/low_min": 2.4373609448957723e-05, "clip_ratio/region_mean": 0.0018288128412677906, "epoch": 3.5690962099125363, "grad_norm": 0.13796520233154297, "learning_rate": 7.5e-07, "loss": -0.0193, "step": 349 }, { "clip_ratio/high_max": 0.002053395175607875, "clip_ratio/high_mean": 0.0008073165354289813, "clip_ratio/low_mean": 0.000962716483627446, "clip_ratio/low_min": 0.00013535761991079198, "clip_ratio/region_mean": 0.0017700329990475439, "epoch": 3.5784256559766763, "grad_norm": 0.14047159254550934, "learning_rate": 7.5e-07, "loss": 0.0209, "step": 350 }, { "clip_ratio/high_max": 0.0019117862248094752, "clip_ratio/high_mean": 0.0007680230064579519, "clip_ratio/low_mean": 0.0007336527123698033, "clip_ratio/low_min": 1.771039933373686e-05, "clip_ratio/region_mean": 0.0015016757606645115, "epoch": 3.5877551020408163, "grad_norm": 0.12009663879871368, "learning_rate": 7.5e-07, "loss": -0.0034, "step": 351 }, { "clip_ratio/high_max": 0.0018528514337958768, "clip_ratio/high_mean": 0.0007816444849595428, "clip_ratio/low_mean": 0.0007175337177613983, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001499178193625994, "epoch": 3.5970845481049563, "grad_norm": 0.12438838928937912, "learning_rate": 7.5e-07, "loss": -0.0244, "step": 352 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.026157924107142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4050.0, "completions/mean_length": 659.2789916992188, "completions/mean_terminated_length": 566.9667358398438, "completions/min_length": 96.0, "completions/min_terminated_length": 96.0, "epoch": 3.6064139941690962, "grad_norm": 0.13598443567752838, "learning_rate": 7.5e-07, "loss": 0.01, "num_tokens": 215617683.0, "reward": 0.6077009439468384, "reward_std": 0.18252407014369965, "rewards/simpleverify_reward/mean": 0.6077008843421936, "rewards/simpleverify_reward/std": 0.4882797598838806, "step": 353 }, { "clip_ratio/high_max": 0.0015766473188705277, "clip_ratio/high_mean": 0.0007032620269455947, "clip_ratio/low_mean": 0.0005538996119867079, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012571616243803874, "epoch": 3.6157434402332362, "grad_norm": 0.13026034832000732, "learning_rate": 7.5e-07, "loss": 0.0062, "step": 354 }, { "clip_ratio/high_max": 0.0019087456239503808, "clip_ratio/high_mean": 0.0008195007085305406, "clip_ratio/low_mean": 0.0005515924290193652, "clip_ratio/low_min": 6.778470105928136e-05, "clip_ratio/region_mean": 0.0013710931270907167, "epoch": 3.6250728862973762, "grad_norm": 0.15417174994945526, "learning_rate": 7.5e-07, "loss": 0.0161, "step": 355 }, { "clip_ratio/high_max": 0.002163552329875529, "clip_ratio/high_mean": 0.0008665172990731662, "clip_ratio/low_mean": 0.0005049877881901921, "clip_ratio/low_min": 1.5056612937769387e-05, "clip_ratio/region_mean": 0.0013715050808968954, "epoch": 3.6344023323615158, "grad_norm": 0.13486723601818085, "learning_rate": 7.5e-07, "loss": -0.0285, "step": 356 }, { "clip_ratio/high_max": 0.0017331200215267017, "clip_ratio/high_mean": 0.0007762460518279113, "clip_ratio/low_mean": 0.0005409514051279984, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013171974860597402, "epoch": 3.643731778425656, "grad_norm": 0.1226663663983345, "learning_rate": 7.5e-07, "loss": -0.0205, "step": 357 }, { "clip_ratio/high_max": 0.0017271683173021302, "clip_ratio/high_mean": 0.0007484573934561922, "clip_ratio/low_mean": 0.0006355204295687145, "clip_ratio/low_min": 2.5135333999060094e-05, "clip_ratio/region_mean": 0.0013839778512192424, "epoch": 3.6530612244897958, "grad_norm": 0.16101983189582825, "learning_rate": 7.5e-07, "loss": 0.0078, "step": 358 }, { "clip_ratio/high_max": 0.0020706332034023944, "clip_ratio/high_mean": 0.0008138049652188784, "clip_ratio/low_mean": 0.0004990708739569527, "clip_ratio/low_min": 2.484946253389353e-05, "clip_ratio/region_mean": 0.0013128758946550079, "epoch": 3.6623906705539357, "grad_norm": 0.13246366381645203, "learning_rate": 7.5e-07, "loss": -0.0395, "step": 359 }, { "clip_ratio/high_max": 0.0018872195796575397, "clip_ratio/high_mean": 0.0006717592859786237, "clip_ratio/low_mean": 0.000661975239381718, "clip_ratio/low_min": 1.1170687685080338e-05, "clip_ratio/region_mean": 0.0013337345044419635, "epoch": 3.6717201166180757, "grad_norm": 0.1271660029888153, "learning_rate": 7.5e-07, "loss": 0.0209, "step": 360 }, { "clip_ratio/high_max": 0.002351716066186782, "clip_ratio/high_mean": 0.0008686843957548263, "clip_ratio/low_mean": 0.0005592133147729328, "clip_ratio/low_min": 3.642664432845777e-05, "clip_ratio/region_mean": 0.0014278977178037167, "epoch": 3.6810495626822157, "grad_norm": 0.12068501859903336, "learning_rate": 7.5e-07, "loss": -0.0196, "step": 361 }, { "clip_ratio/high_max": 0.002055466320598498, "clip_ratio/high_mean": 0.0007883622256485978, "clip_ratio/low_mean": 0.0007106574103090679, "clip_ratio/low_min": 3.342028776387451e-05, "clip_ratio/region_mean": 0.0014990196250437293, "epoch": 3.6903790087463557, "grad_norm": 0.13293927907943726, "learning_rate": 7.5e-07, "loss": -0.0154, "step": 362 }, { "clip_ratio/high_max": 0.0018148874514736235, "clip_ratio/high_mean": 0.0007713978625361051, "clip_ratio/low_mean": 0.0007252979830809636, "clip_ratio/low_min": 1.6162399333552457e-05, "clip_ratio/region_mean": 0.0014966958078730386, "epoch": 3.6997084548104957, "grad_norm": 0.1219753548502922, "learning_rate": 7.5e-07, "loss": -0.0006, "step": 363 }, { "clip_ratio/high_max": 0.0020816157484659925, "clip_ratio/high_mean": 0.0008720670539332787, "clip_ratio/low_mean": 0.0007179662534326781, "clip_ratio/low_min": 4.4215807065484114e-05, "clip_ratio/region_mean": 0.0015900333237368613, "epoch": 3.7090379008746357, "grad_norm": 0.14350813627243042, "learning_rate": 7.5e-07, "loss": -0.0143, "step": 364 }, { "clip_ratio/high_max": 0.0018772338335111272, "clip_ratio/high_mean": 0.0007655775843886659, "clip_ratio/low_mean": 0.0008028314277908066, "clip_ratio/low_min": 2.302025859535206e-05, "clip_ratio/region_mean": 0.0015684089958085679, "epoch": 3.7183673469387752, "grad_norm": 0.12387395650148392, "learning_rate": 7.5e-07, "loss": 0.0242, "step": 365 }, { "clip_ratio/high_max": 0.0017953253773157485, "clip_ratio/high_mean": 0.0007928287668619305, "clip_ratio/low_mean": 0.0007264193609444192, "clip_ratio/low_min": 5.4395834922615904e-05, "clip_ratio/region_mean": 0.001519248093245551, "epoch": 3.7276967930029157, "grad_norm": 0.1345619112253189, "learning_rate": 7.5e-07, "loss": -0.0216, "step": 366 }, { "clip_ratio/high_max": 0.0018638392903085332, "clip_ratio/high_mean": 0.0007222330441436497, "clip_ratio/low_mean": 0.000804279883595882, "clip_ratio/low_min": 4.2582800233503804e-05, "clip_ratio/region_mean": 0.001526512911368627, "epoch": 3.7370262390670552, "grad_norm": 0.13340014219284058, "learning_rate": 7.5e-07, "loss": 0.0405, "step": 367 }, { "clip_ratio/high_max": 0.0019713456385943573, "clip_ratio/high_mean": 0.0007908247062005103, "clip_ratio/low_mean": 0.0007834406333131483, "clip_ratio/low_min": 4.885299131274223e-05, "clip_ratio/region_mean": 0.0015742653667984996, "epoch": 3.746355685131195, "grad_norm": 0.1192748099565506, "learning_rate": 7.5e-07, "loss": -0.0041, "step": 368 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0316685267857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4093.0, "completions/mean_length": 681.4520874023438, "completions/mean_terminated_length": 569.7819213867188, "completions/min_length": 109.0, "completions/min_terminated_length": 109.0, "epoch": 3.755685131195335, "grad_norm": 0.13231417536735535, "learning_rate": 7.5e-07, "loss": -0.0086, "num_tokens": 224915604.0, "reward": 0.61181640625, "reward_std": 0.17692798376083374, "rewards/simpleverify_reward/mean": 0.61181640625, "rewards/simpleverify_reward/std": 0.48735371232032776, "step": 369 }, { "clip_ratio/high_max": 0.0019644650383270346, "clip_ratio/high_mean": 0.0008238172777055297, "clip_ratio/low_mean": 0.000473721473099431, "clip_ratio/low_min": 1.6617921573924832e-05, "clip_ratio/region_mean": 0.0012975387398910243, "epoch": 3.765014577259475, "grad_norm": 0.14271745085716248, "learning_rate": 7.5e-07, "loss": -0.0376, "step": 370 }, { "clip_ratio/high_max": 0.001843410216679331, "clip_ratio/high_mean": 0.0006908777377248043, "clip_ratio/low_mean": 0.0005936674306212808, "clip_ratio/low_min": 1.323311425949214e-05, "clip_ratio/region_mean": 0.001284545211092336, "epoch": 3.774344023323615, "grad_norm": 0.14379075169563293, "learning_rate": 7.5e-07, "loss": 0.0222, "step": 371 }, { "clip_ratio/high_max": 0.0017523520800750703, "clip_ratio/high_mean": 0.0008184082225852762, "clip_ratio/low_mean": 0.00047561468454659916, "clip_ratio/low_min": 1.3498920452548191e-05, "clip_ratio/region_mean": 0.0012940229062223807, "epoch": 3.783673469387755, "grad_norm": 0.1188713014125824, "learning_rate": 7.5e-07, "loss": -0.0271, "step": 372 }, { "clip_ratio/high_max": 0.0019285304842924234, "clip_ratio/high_mean": 0.000739922027605644, "clip_ratio/low_mean": 0.0005927263027842855, "clip_ratio/low_min": 1.8317701687919907e-05, "clip_ratio/region_mean": 0.0013326483058335725, "epoch": 3.793002915451895, "grad_norm": 0.1336059719324112, "learning_rate": 7.5e-07, "loss": -0.018, "step": 373 }, { "clip_ratio/high_max": 0.0014114125551714096, "clip_ratio/high_mean": 0.0005018692381781875, "clip_ratio/low_mean": 0.000713205166903208, "clip_ratio/low_min": 4.2642707740014885e-05, "clip_ratio/region_mean": 0.0012150744114478584, "epoch": 3.8023323615160347, "grad_norm": 0.12717552483081818, "learning_rate": 7.5e-07, "loss": 0.0522, "step": 374 }, { "clip_ratio/high_max": 0.0018655980384210125, "clip_ratio/high_mean": 0.0007273678329511313, "clip_ratio/low_mean": 0.0005839056393597275, "clip_ratio/low_min": 3.441057378950063e-05, "clip_ratio/region_mean": 0.0013112734704918694, "epoch": 3.811661807580175, "grad_norm": 0.13825935125350952, "learning_rate": 7.5e-07, "loss": -0.0055, "step": 375 }, { "clip_ratio/high_max": 0.0018788106099236757, "clip_ratio/high_mean": 0.0007702998937020311, "clip_ratio/low_mean": 0.000673764358907647, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014440642662520986, "epoch": 3.8209912536443147, "grad_norm": 0.1377682089805603, "learning_rate": 7.5e-07, "loss": 0.0174, "step": 376 }, { "clip_ratio/high_max": 0.0017917483492055908, "clip_ratio/high_mean": 0.0007475980473827804, "clip_ratio/low_mean": 0.0006306117556960089, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013782098103547469, "epoch": 3.8303206997084547, "grad_norm": 0.13490578532218933, "learning_rate": 7.5e-07, "loss": -0.0106, "step": 377 }, { "clip_ratio/high_max": 0.0018072171224048361, "clip_ratio/high_mean": 0.0006836191787442658, "clip_ratio/low_mean": 0.0006929916926310398, "clip_ratio/low_min": 1.449107367079705e-05, "clip_ratio/region_mean": 0.0013766108713753056, "epoch": 3.8396501457725947, "grad_norm": 0.12909512221813202, "learning_rate": 7.5e-07, "loss": 0.0319, "step": 378 }, { "clip_ratio/high_max": 0.0019326410838402808, "clip_ratio/high_mean": 0.0008257864064944442, "clip_ratio/low_mean": 0.0005582642070294241, "clip_ratio/low_min": 1.5307372450479306e-05, "clip_ratio/region_mean": 0.001384050596243469, "epoch": 3.8489795918367347, "grad_norm": 0.13249970972537994, "learning_rate": 7.5e-07, "loss": -0.0298, "step": 379 }, { "clip_ratio/high_max": 0.00214029321068665, "clip_ratio/high_mean": 0.0008107530829875031, "clip_ratio/low_mean": 0.0007437082585965982, "clip_ratio/low_min": 5.287838393996935e-05, "clip_ratio/region_mean": 0.001554461330670165, "epoch": 3.8583090379008746, "grad_norm": 0.13859547674655914, "learning_rate": 7.5e-07, "loss": -0.028, "step": 380 }, { "clip_ratio/high_max": 0.0018411909113638103, "clip_ratio/high_mean": 0.0007657269725314109, "clip_ratio/low_mean": 0.000762123980166507, "clip_ratio/low_min": 1.0305028808943462e-05, "clip_ratio/region_mean": 0.0015278509526979178, "epoch": 3.8676384839650146, "grad_norm": 0.12644539773464203, "learning_rate": 7.5e-07, "loss": -0.0087, "step": 381 }, { "clip_ratio/high_max": 0.0017565360758453608, "clip_ratio/high_mean": 0.0007580902984045679, "clip_ratio/low_mean": 0.0007181239752753754, "clip_ratio/low_min": 4.775549314217642e-05, "clip_ratio/region_mean": 0.0014762142454856075, "epoch": 3.8769679300291546, "grad_norm": 0.13256795704364777, "learning_rate": 7.5e-07, "loss": -0.0262, "step": 382 }, { "clip_ratio/high_max": 0.0018649377816473134, "clip_ratio/high_mean": 0.0007608426749357022, "clip_ratio/low_mean": 0.0007160243349062512, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014768669934710488, "epoch": 3.8862973760932946, "grad_norm": 0.1312928944826126, "learning_rate": 7.5e-07, "loss": -0.0112, "step": 383 }, { "clip_ratio/high_max": 0.001982178622711217, "clip_ratio/high_mean": 0.0007522262767452048, "clip_ratio/low_mean": 0.0007842488485039212, "clip_ratio/low_min": 6.428370397770777e-05, "clip_ratio/region_mean": 0.0015364751452580094, "epoch": 3.8956268221574346, "grad_norm": 0.12795305252075195, "learning_rate": 7.5e-07, "loss": -0.0075, "step": 384 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.029645647321428603, "completions/max_length": 4096.0, "completions/max_terminated_length": 4095.0, "completions/mean_length": 682.4894409179688, "completions/mean_terminated_length": 578.2020263671875, "completions/min_length": 93.0, "completions/min_terminated_length": 93.0, "epoch": 4.0093294460641395, "grad_norm": 0.1382928490638733, "learning_rate": 7.5e-07, "loss": 0.0214, "num_tokens": 234374444.0, "reward": 0.5992606282234192, "reward_std": 0.1825239360332489, "rewards/simpleverify_reward/mean": 0.5992606282234192, "rewards/simpleverify_reward/std": 0.49006539583206177, "step": 385 }, { "clip_ratio/high_max": 0.0020337376154202502, "clip_ratio/high_mean": 0.0008235742352553643, "clip_ratio/low_mean": 0.0005084305576019688, "clip_ratio/low_min": 2.9480052035069093e-05, "clip_ratio/region_mean": 0.0013320048165041953, "epoch": 4.01865889212828, "grad_norm": 0.1386559158563614, "learning_rate": 7.5e-07, "loss": 0.0083, "step": 386 }, { "clip_ratio/high_max": 0.002207613149948884, "clip_ratio/high_mean": 0.0008165582348738099, "clip_ratio/low_mean": 0.0005629440793200047, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013795022859994788, "epoch": 4.0279883381924195, "grad_norm": 0.12692034244537354, "learning_rate": 7.5e-07, "loss": -0.0136, "step": 387 }, { "clip_ratio/high_max": 0.0017830231008701958, "clip_ratio/high_mean": 0.0007365793917415431, "clip_ratio/low_mean": 0.0005121038457218674, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012486832347349264, "epoch": 4.03731778425656, "grad_norm": 0.1391746997833252, "learning_rate": 7.5e-07, "loss": -0.0023, "step": 388 }, { "clip_ratio/high_max": 0.0019063666004512925, "clip_ratio/high_mean": 0.0007735318577033468, "clip_ratio/low_mean": 0.0005552527818508679, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013287846559251193, "epoch": 4.0466472303206995, "grad_norm": 0.13901612162590027, "learning_rate": 7.5e-07, "loss": 0.0166, "step": 389 }, { "clip_ratio/high_max": 0.00189041834892123, "clip_ratio/high_mean": 0.0007066566304274602, "clip_ratio/low_mean": 0.0006235961818674696, "clip_ratio/low_min": 1.5277439160854556e-05, "clip_ratio/region_mean": 0.0013302527913765516, "epoch": 4.05597667638484, "grad_norm": 0.13058783113956451, "learning_rate": 7.5e-07, "loss": 0.0155, "step": 390 }, { "clip_ratio/high_max": 0.0021190235602261964, "clip_ratio/high_mean": 0.0008214493600462447, "clip_ratio/low_mean": 0.0005265035360935144, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001347952875221381, "epoch": 4.0653061224489795, "grad_norm": 0.12480573356151581, "learning_rate": 7.5e-07, "loss": -0.0081, "step": 391 }, { "clip_ratio/high_max": 0.0024153353879228234, "clip_ratio/high_mean": 0.0009358988463645801, "clip_ratio/low_mean": 0.0005842661166752805, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00152016498032026, "epoch": 4.07463556851312, "grad_norm": 0.1288413256406784, "learning_rate": 7.5e-07, "loss": -0.0552, "step": 392 }, { "clip_ratio/high_max": 0.0019335723445692565, "clip_ratio/high_mean": 0.0008156704861903563, "clip_ratio/low_mean": 0.0006123091898189159, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001427979706932092, "epoch": 4.0839650145772595, "grad_norm": 0.14116966724395752, "learning_rate": 7.5e-07, "loss": 0.0105, "step": 393 }, { "clip_ratio/high_max": 0.0020327543461462483, "clip_ratio/high_mean": 0.0008651519056002144, "clip_ratio/low_mean": 0.0006236663466552272, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014888182558934204, "epoch": 4.093294460641399, "grad_norm": 0.1435943841934204, "learning_rate": 7.5e-07, "loss": -0.0328, "step": 394 }, { "clip_ratio/high_max": 0.002007888218940934, "clip_ratio/high_mean": 0.0007571179594378918, "clip_ratio/low_mean": 0.0007323446716327453, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001489462607423775, "epoch": 4.1026239067055394, "grad_norm": 0.13382846117019653, "learning_rate": 7.5e-07, "loss": 0.0211, "step": 395 }, { "clip_ratio/high_max": 0.001965460349310888, "clip_ratio/high_mean": 0.0007913763129181461, "clip_ratio/low_mean": 0.0006534463545904146, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014448226975218859, "epoch": 4.111953352769679, "grad_norm": 0.13430066406726837, "learning_rate": 7.5e-07, "loss": -0.0292, "step": 396 }, { "clip_ratio/high_max": 0.0021421784404083155, "clip_ratio/high_mean": 0.0009015498289954849, "clip_ratio/low_mean": 0.0006883087644382613, "clip_ratio/low_min": 5.730904376832768e-05, "clip_ratio/region_mean": 0.0015898586207185872, "epoch": 4.121282798833819, "grad_norm": 0.13841068744659424, "learning_rate": 7.5e-07, "loss": 0.0055, "step": 397 }, { "clip_ratio/high_max": 0.001823792037612293, "clip_ratio/high_mean": 0.0007137648390198592, "clip_ratio/low_mean": 0.0007224725459309411, "clip_ratio/low_min": 6.70222143526189e-05, "clip_ratio/region_mean": 0.0014362373876792844, "epoch": 4.130612244897959, "grad_norm": 0.13640277087688446, "learning_rate": 7.5e-07, "loss": 0.0176, "step": 398 }, { "clip_ratio/high_max": 0.0019104735256405547, "clip_ratio/high_mean": 0.0008109175105346367, "clip_ratio/low_mean": 0.0006757436112820869, "clip_ratio/low_min": 4.812593215319794e-05, "clip_ratio/region_mean": 0.001486661123635713, "epoch": 4.139941690962099, "grad_norm": 0.12342701852321625, "learning_rate": 7.5e-07, "loss": 0.0162, "step": 399 }, { "clip_ratio/high_max": 0.002469293787726201, "clip_ratio/high_mean": 0.0009702953648229595, "clip_ratio/low_mean": 0.0007595020178996492, "clip_ratio/low_min": 1.468170103180455e-05, "clip_ratio/region_mean": 0.001729797415464418, "epoch": 4.149271137026239, "grad_norm": 0.14233455061912537, "learning_rate": 7.5e-07, "loss": -0.0362, "step": 400 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0311802455357143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3928.0, "completions/mean_length": 690.3782348632812, "completions/mean_terminated_length": 580.7725219726562, "completions/min_length": 95.0, "completions/min_terminated_length": 95.0, "epoch": 4.158600583090379, "grad_norm": 0.15243451297283173, "learning_rate": 7.5e-07, "loss": -0.0088, "num_tokens": 243881050.0, "reward": 0.6032366156578064, "reward_std": 0.18902812898159027, "rewards/simpleverify_reward/mean": 0.6032366156578064, "rewards/simpleverify_reward/std": 0.48924317955970764, "step": 401 }, { "clip_ratio/high_max": 0.0021684690545953345, "clip_ratio/high_mean": 0.0008246731649705907, "clip_ratio/low_mean": 0.0005357108693715418, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013603840307041537, "epoch": 4.167930029154519, "grad_norm": 0.14567749202251434, "learning_rate": 7.5e-07, "loss": -0.0263, "step": 402 }, { "clip_ratio/high_max": 0.001954484723682981, "clip_ratio/high_mean": 0.0007532408671977464, "clip_ratio/low_mean": 0.0006956215629543294, "clip_ratio/low_min": 4.975156480213627e-05, "clip_ratio/region_mean": 0.001448862411052687, "epoch": 4.1772594752186585, "grad_norm": 0.1540350764989853, "learning_rate": 7.5e-07, "loss": 0.0413, "step": 403 }, { "clip_ratio/high_max": 0.0017985033700824715, "clip_ratio/high_mean": 0.0007182301560533233, "clip_ratio/low_mean": 0.0004975067795385257, "clip_ratio/low_min": 2.203808253398165e-05, "clip_ratio/region_mean": 0.001215736923768418, "epoch": 4.186588921282799, "grad_norm": 0.12910836935043335, "learning_rate": 7.5e-07, "loss": 0.0022, "step": 404 }, { "clip_ratio/high_max": 0.0018321140232728794, "clip_ratio/high_mean": 0.0007579278590128524, "clip_ratio/low_mean": 0.0006666876843155478, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014246155296859797, "epoch": 4.1959183673469385, "grad_norm": 0.1257396787405014, "learning_rate": 7.5e-07, "loss": 0.0113, "step": 405 }, { "clip_ratio/high_max": 0.0017905656750372145, "clip_ratio/high_mean": 0.0007175864593591541, "clip_ratio/low_mean": 0.000701015741469746, "clip_ratio/low_min": 2.2969022211327683e-05, "clip_ratio/region_mean": 0.0014186022090143524, "epoch": 4.205247813411079, "grad_norm": 0.13220834732055664, "learning_rate": 7.5e-07, "loss": 0.0153, "step": 406 }, { "clip_ratio/high_max": 0.002282168974488741, "clip_ratio/high_mean": 0.0008694540265423711, "clip_ratio/low_mean": 0.0007036248462100048, "clip_ratio/low_min": 3.8355038668669295e-05, "clip_ratio/region_mean": 0.0015730788618384395, "epoch": 4.214577259475218, "grad_norm": 0.15366628766059875, "learning_rate": 7.5e-07, "loss": -0.0319, "step": 407 }, { "clip_ratio/high_max": 0.001948871067725122, "clip_ratio/high_mean": 0.0008002959602890769, "clip_ratio/low_mean": 0.0006531482067657635, "clip_ratio/low_min": 5.7503888456267305e-05, "clip_ratio/region_mean": 0.001453444165235851, "epoch": 4.223906705539359, "grad_norm": 0.13512006402015686, "learning_rate": 7.5e-07, "loss": -0.0052, "step": 408 }, { "clip_ratio/high_max": 0.0021602295582852094, "clip_ratio/high_mean": 0.0007799872246323503, "clip_ratio/low_mean": 0.00083620797158801, "clip_ratio/low_min": 3.6671230191132054e-05, "clip_ratio/region_mean": 0.0016161951934918761, "epoch": 4.233236151603498, "grad_norm": 0.13306686282157898, "learning_rate": 7.5e-07, "loss": 0.0202, "step": 409 }, { "clip_ratio/high_max": 0.00202434773746063, "clip_ratio/high_mean": 0.0007734409100521589, "clip_ratio/low_mean": 0.000820068278699182, "clip_ratio/low_min": 7.848880886740517e-05, "clip_ratio/region_mean": 0.001593509216036182, "epoch": 4.242565597667639, "grad_norm": 0.12891149520874023, "learning_rate": 7.5e-07, "loss": 0.0289, "step": 410 }, { "clip_ratio/high_max": 0.0018556145296315663, "clip_ratio/high_mean": 0.0007048051338642836, "clip_ratio/low_mean": 0.0007296068524738075, "clip_ratio/low_min": 3.531571837811498e-05, "clip_ratio/region_mean": 0.0014344119736051653, "epoch": 4.251895043731778, "grad_norm": 0.13976530730724335, "learning_rate": 7.5e-07, "loss": 0.0201, "step": 411 }, { "clip_ratio/high_max": 0.0021241668255242985, "clip_ratio/high_mean": 0.0008323817255586619, "clip_ratio/low_mean": 0.0006973180024942849, "clip_ratio/low_min": 6.569235483766533e-05, "clip_ratio/region_mean": 0.001529699788079597, "epoch": 4.261224489795918, "grad_norm": 0.12604692578315735, "learning_rate": 7.5e-07, "loss": -0.0003, "step": 412 }, { "clip_ratio/high_max": 0.0023105514810595196, "clip_ratio/high_mean": 0.0009074083482119022, "clip_ratio/low_mean": 0.0006666678982583107, "clip_ratio/low_min": 1.242544749402441e-05, "clip_ratio/region_mean": 0.0015740762537461706, "epoch": 4.270553935860058, "grad_norm": 0.13396592438220978, "learning_rate": 7.5e-07, "loss": -0.0412, "step": 413 }, { "clip_ratio/high_max": 0.0022212121475604363, "clip_ratio/high_mean": 0.0009421261092938948, "clip_ratio/low_mean": 0.000771273404097883, "clip_ratio/low_min": 2.8915547773067374e-05, "clip_ratio/region_mean": 0.0017133995424956083, "epoch": 4.279883381924198, "grad_norm": 0.15584152936935425, "learning_rate": 7.5e-07, "loss": -0.0061, "step": 414 }, { "clip_ratio/high_max": 0.0020955473228241317, "clip_ratio/high_mean": 0.0008972993982752087, "clip_ratio/low_mean": 0.0008828031204757281, "clip_ratio/low_min": 7.174708025559084e-05, "clip_ratio/region_mean": 0.001780102524207905, "epoch": 4.289212827988338, "grad_norm": 0.14774790406227112, "learning_rate": 7.5e-07, "loss": -0.0051, "step": 415 }, { "clip_ratio/high_max": 0.001986980583751574, "clip_ratio/high_mean": 0.00090591206389945, "clip_ratio/low_mean": 0.0008719347169972025, "clip_ratio/low_min": 7.18762275937479e-05, "clip_ratio/region_mean": 0.00177784678817261, "epoch": 4.298542274052478, "grad_norm": 0.13610081374645233, "learning_rate": 7.5e-07, "loss": -0.014, "step": 416 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.029017857142857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4046.0, "completions/mean_length": 671.0411987304688, "completions/mean_terminated_length": 568.68603515625, "completions/min_length": 84.0, "completions/min_terminated_length": 84.0, "epoch": 4.307871720116618, "grad_norm": 0.12808267772197723, "learning_rate": 7.5e-07, "loss": -0.018, "num_tokens": 253246256.0, "reward": 0.6139090657234192, "reward_std": 0.17434468865394592, "rewards/simpleverify_reward/mean": 0.6139090657234192, "rewards/simpleverify_reward/std": 0.48686879873275757, "step": 417 }, { "clip_ratio/high_max": 0.0017126608909165952, "clip_ratio/high_mean": 0.0006861167676106561, "clip_ratio/low_mean": 0.000533805153281719, "clip_ratio/low_min": 2.7929992938879877e-05, "clip_ratio/region_mean": 0.0012199219236208592, "epoch": 4.317201166180758, "grad_norm": 0.18599529564380646, "learning_rate": 7.5e-07, "loss": 0.0186, "step": 418 }, { "clip_ratio/high_max": 0.0018000271447817795, "clip_ratio/high_mean": 0.0007396372275252361, "clip_ratio/low_mean": 0.0005373870053517749, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012770242283295374, "epoch": 4.326530612244898, "grad_norm": 0.1431349664926529, "learning_rate": 7.5e-07, "loss": -0.0038, "step": 419 }, { "clip_ratio/high_max": 0.001713112087600166, "clip_ratio/high_mean": 0.0007617964674864197, "clip_ratio/low_mean": 0.0005320179061527597, "clip_ratio/low_min": 2.5145187464659102e-05, "clip_ratio/region_mean": 0.0012938143408973701, "epoch": 4.335860058309038, "grad_norm": 0.1235693171620369, "learning_rate": 7.5e-07, "loss": -0.0083, "step": 420 }, { "clip_ratio/high_max": 0.0017597593978280202, "clip_ratio/high_mean": 0.0007126762111511198, "clip_ratio/low_mean": 0.0005556661562877707, "clip_ratio/low_min": 1.382743357680738e-05, "clip_ratio/region_mean": 0.001268342344701523, "epoch": 4.345189504373177, "grad_norm": 0.13228778541088104, "learning_rate": 7.5e-07, "loss": 0.0076, "step": 421 }, { "clip_ratio/high_max": 0.0022516716708196327, "clip_ratio/high_mean": 0.0009039502328960225, "clip_ratio/low_mean": 0.0005673057976309792, "clip_ratio/low_min": 2.7676347599481232e-05, "clip_ratio/region_mean": 0.0014712559895997401, "epoch": 4.354518950437318, "grad_norm": 0.1359865814447403, "learning_rate": 7.5e-07, "loss": -0.0032, "step": 422 }, { "clip_ratio/high_max": 0.0019588229079090524, "clip_ratio/high_mean": 0.0008701923688931856, "clip_ratio/low_mean": 0.0005431635713648575, "clip_ratio/low_min": 4.3463143811095506e-05, "clip_ratio/region_mean": 0.001413355905242497, "epoch": 4.363848396501457, "grad_norm": 0.1361052244901657, "learning_rate": 7.5e-07, "loss": -0.0475, "step": 423 }, { "clip_ratio/high_max": 0.0018365352516411804, "clip_ratio/high_mean": 0.0007362661126535386, "clip_ratio/low_mean": 0.000574919866721757, "clip_ratio/low_min": 1.948861790879164e-05, "clip_ratio/region_mean": 0.0013111859661876224, "epoch": 4.373177842565598, "grad_norm": 0.14038214087486267, "learning_rate": 7.5e-07, "loss": -0.0246, "step": 424 }, { "clip_ratio/high_max": 0.0018260803517478053, "clip_ratio/high_mean": 0.0007264127871167148, "clip_ratio/low_mean": 0.0006578684806299862, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013842812659277115, "epoch": 4.382507288629737, "grad_norm": 0.1267143189907074, "learning_rate": 7.5e-07, "loss": 0.0014, "step": 425 }, { "clip_ratio/high_max": 0.002087468223180622, "clip_ratio/high_mean": 0.0008466613198834239, "clip_ratio/low_mean": 0.0006693988580082078, "clip_ratio/low_min": 1.1922929843422025e-05, "clip_ratio/region_mean": 0.0015160601906245574, "epoch": 4.391836734693878, "grad_norm": 0.12272098660469055, "learning_rate": 7.5e-07, "loss": -0.0394, "step": 426 }, { "clip_ratio/high_max": 0.002035418430750724, "clip_ratio/high_mean": 0.0007714430612395518, "clip_ratio/low_mean": 0.0007846140124456724, "clip_ratio/low_min": 6.458000643760897e-05, "clip_ratio/region_mean": 0.0015560570718662348, "epoch": 4.401166180758017, "grad_norm": 0.1267683357000351, "learning_rate": 7.5e-07, "loss": 0.0188, "step": 427 }, { "clip_ratio/high_max": 0.002001537177420687, "clip_ratio/high_mean": 0.000823317575850524, "clip_ratio/low_mean": 0.0007004262370173819, "clip_ratio/low_min": 6.369276525219902e-05, "clip_ratio/region_mean": 0.0015237438274198212, "epoch": 4.410495626822158, "grad_norm": 0.14416974782943726, "learning_rate": 7.5e-07, "loss": -0.0001, "step": 428 }, { "clip_ratio/high_max": 0.0019214435051253531, "clip_ratio/high_mean": 0.0007905766397016123, "clip_ratio/low_mean": 0.0007845610080039478, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001575137644977076, "epoch": 4.419825072886297, "grad_norm": 0.1360076367855072, "learning_rate": 7.5e-07, "loss": -0.027, "step": 429 }, { "clip_ratio/high_max": 0.002063992687908467, "clip_ratio/high_mean": 0.0007806148914824007, "clip_ratio/low_mean": 0.0006156288554848288, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001396243751514703, "epoch": 4.429154518950437, "grad_norm": 0.12482333183288574, "learning_rate": 7.5e-07, "loss": 0.0087, "step": 430 }, { "clip_ratio/high_max": 0.0021279731881804764, "clip_ratio/high_mean": 0.0008498634233546909, "clip_ratio/low_mean": 0.000764104564950685, "clip_ratio/low_min": 1.444752615498146e-05, "clip_ratio/region_mean": 0.0016139679937623441, "epoch": 4.438483965014577, "grad_norm": 0.14716611802577972, "learning_rate": 7.5e-07, "loss": 0.0002, "step": 431 }, { "clip_ratio/high_max": 0.002077570588880917, "clip_ratio/high_mean": 0.0007924578058009502, "clip_ratio/low_mean": 0.0007744689573883079, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015669267559133004, "epoch": 4.447813411078717, "grad_norm": 0.127461239695549, "learning_rate": 7.5e-07, "loss": 0.0044, "step": 432 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.029087611607142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4093.0, "completions/mean_length": 666.1735229492188, "completions/mean_terminated_length": 563.4191284179688, "completions/min_length": 108.0, "completions/min_terminated_length": 108.0, "epoch": 4.457142857142857, "grad_norm": 0.15110084414482117, "learning_rate": 7.5e-07, "loss": -0.0132, "num_tokens": 262496183.0, "reward": 0.624093234539032, "reward_std": 0.1760397106409073, "rewards/simpleverify_reward/mean": 0.6240931749343872, "rewards/simpleverify_reward/std": 0.4843730330467224, "step": 433 }, { "clip_ratio/high_max": 0.002092018366965931, "clip_ratio/high_mean": 0.0007937057544040726, "clip_ratio/low_mean": 0.00042644850645956467, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012201542340335436, "epoch": 4.466472303206997, "grad_norm": 0.12051482498645782, "learning_rate": 7.5e-07, "loss": 0.0024, "step": 434 }, { "clip_ratio/high_max": 0.0018760680031846277, "clip_ratio/high_mean": 0.0007819799011485884, "clip_ratio/low_mean": 0.0005930543438807945, "clip_ratio/low_min": 2.484131073288154e-05, "clip_ratio/region_mean": 0.0013750342332059518, "epoch": 4.475801749271137, "grad_norm": 0.1295393854379654, "learning_rate": 7.5e-07, "loss": 0.0044, "step": 435 }, { "clip_ratio/high_max": 0.00200558880897006, "clip_ratio/high_mean": 0.0007762890691083157, "clip_ratio/low_mean": 0.000579943949560402, "clip_ratio/low_min": 1.4004032891534735e-05, "clip_ratio/region_mean": 0.0013562330314016435, "epoch": 4.485131195335277, "grad_norm": 0.1392168253660202, "learning_rate": 7.5e-07, "loss": 0.0052, "step": 436 }, { "clip_ratio/high_max": 0.00203851154219592, "clip_ratio/high_mean": 0.0007721796155237826, "clip_ratio/low_mean": 0.0005361701460060431, "clip_ratio/low_min": 1.8113316400558688e-05, "clip_ratio/region_mean": 0.0013083498051855713, "epoch": 4.494460641399417, "grad_norm": 0.1305140256881714, "learning_rate": 7.5e-07, "loss": -0.016, "step": 437 }, { "clip_ratio/high_max": 0.0018560509488452226, "clip_ratio/high_mean": 0.0008040585853450466, "clip_ratio/low_mean": 0.0005126099549670471, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013166685566829983, "epoch": 4.503790087463557, "grad_norm": 0.15244103968143463, "learning_rate": 7.5e-07, "loss": -0.0672, "step": 438 }, { "clip_ratio/high_max": 0.001924918506119866, "clip_ratio/high_mean": 0.0007973466053954326, "clip_ratio/low_mean": 0.0005127983495185617, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013101449694659095, "epoch": 4.513119533527696, "grad_norm": 0.13570654392242432, "learning_rate": 7.5e-07, "loss": -0.018, "step": 439 }, { "clip_ratio/high_max": 0.0020705804708995856, "clip_ratio/high_mean": 0.0008438017139269505, "clip_ratio/low_mean": 0.0004992714875697857, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013430732105916832, "epoch": 4.522448979591837, "grad_norm": 0.2025289535522461, "learning_rate": 7.5e-07, "loss": -0.0154, "step": 440 }, { "clip_ratio/high_max": 0.0018381111040071119, "clip_ratio/high_mean": 0.00075343158277974, "clip_ratio/low_mean": 0.0006664814372925321, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001419913038262166, "epoch": 4.531778425655976, "grad_norm": 0.12944617867469788, "learning_rate": 7.5e-07, "loss": 0.0019, "step": 441 }, { "clip_ratio/high_max": 0.0020622792653739452, "clip_ratio/high_mean": 0.0008789497805992141, "clip_ratio/low_mean": 0.0006590549901375198, "clip_ratio/low_min": 2.4098708308883943e-05, "clip_ratio/region_mean": 0.001538004755275324, "epoch": 4.541107871720117, "grad_norm": 0.1329277604818344, "learning_rate": 7.5e-07, "loss": -0.0184, "step": 442 }, { "clip_ratio/high_max": 0.002270839082484599, "clip_ratio/high_mean": 0.0008707396445970517, "clip_ratio/low_mean": 0.0007951275474624708, "clip_ratio/low_min": 5.692004833690589e-05, "clip_ratio/region_mean": 0.0016658671993354801, "epoch": 4.550437317784256, "grad_norm": 0.1437629610300064, "learning_rate": 7.5e-07, "loss": 0.0222, "step": 443 }, { "clip_ratio/high_max": 0.0020043636177433655, "clip_ratio/high_mean": 0.0007903808582341298, "clip_ratio/low_mean": 0.0006684689587928005, "clip_ratio/low_min": 4.725141479866579e-05, "clip_ratio/region_mean": 0.0014588498379453085, "epoch": 4.559766763848397, "grad_norm": 0.15374557673931122, "learning_rate": 7.5e-07, "loss": -0.0062, "step": 444 }, { "clip_ratio/high_max": 0.002101769670844078, "clip_ratio/high_mean": 0.0008970155122369761, "clip_ratio/low_mean": 0.0007014798411546508, "clip_ratio/low_min": 3.955542797484668e-05, "clip_ratio/region_mean": 0.0015984953934093937, "epoch": 4.569096209912536, "grad_norm": 0.12580399215221405, "learning_rate": 7.5e-07, "loss": -0.0203, "step": 445 }, { "clip_ratio/high_max": 0.0022902291602804326, "clip_ratio/high_mean": 0.0008906019866117276, "clip_ratio/low_mean": 0.0006750494403604534, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001565651393320877, "epoch": 4.578425655976677, "grad_norm": 0.13930457830429077, "learning_rate": 7.5e-07, "loss": -0.0338, "step": 446 }, { "clip_ratio/high_max": 0.0021161023614695296, "clip_ratio/high_mean": 0.0008500066523993155, "clip_ratio/low_mean": 0.0008220592517318437, "clip_ratio/low_min": 4.411019563121954e-05, "clip_ratio/region_mean": 0.0016720659405109473, "epoch": 4.587755102040816, "grad_norm": 0.1388111114501953, "learning_rate": 7.5e-07, "loss": 0.0261, "step": 447 }, { "clip_ratio/high_max": 0.0020530414603854297, "clip_ratio/high_mean": 0.0008583015733165666, "clip_ratio/low_mean": 0.0007821308317943476, "clip_ratio/low_min": 2.9486438506864943e-05, "clip_ratio/region_mean": 0.0016404324014729355, "epoch": 4.597084548104956, "grad_norm": 0.13023537397384644, "learning_rate": 7.5e-07, "loss": 0.0128, "step": 448 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0330636160714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 4066.0, "completions/mean_length": 687.0256958007812, "completions/mean_terminated_length": 570.4585571289062, "completions/min_length": 107.0, "completions/min_terminated_length": 107.0, "epoch": 4.606413994169096, "grad_norm": 0.14057570695877075, "learning_rate": 7.5e-07, "loss": -0.0013, "num_tokens": 271857743.0, "reward": 0.5988420844078064, "reward_std": 0.17306843400001526, "rewards/simpleverify_reward/mean": 0.5988420844078064, "rewards/simpleverify_reward/std": 0.49014994502067566, "step": 449 }, { "clip_ratio/high_max": 0.0018205264132120647, "clip_ratio/high_mean": 0.0007523867207055446, "clip_ratio/low_mean": 0.000471224676402926, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012236114052939229, "epoch": 4.615743440233236, "grad_norm": 0.13675358891487122, "learning_rate": 7.5e-07, "loss": 0.0093, "step": 450 }, { "clip_ratio/high_max": 0.0018754366537905298, "clip_ratio/high_mean": 0.0006476892303908244, "clip_ratio/low_mean": 0.0006046713751857169, "clip_ratio/low_min": 1.4070238648855593e-05, "clip_ratio/region_mean": 0.0012523605873866472, "epoch": 4.625072886297376, "grad_norm": 0.1445409506559372, "learning_rate": 7.5e-07, "loss": 0.0278, "step": 451 }, { "clip_ratio/high_max": 0.0016874211360118352, "clip_ratio/high_mean": 0.00067334683444642, "clip_ratio/low_mean": 0.0005628094513667747, "clip_ratio/low_min": 1.2413108379405458e-05, "clip_ratio/region_mean": 0.0012361562767182477, "epoch": 4.634402332361516, "grad_norm": 0.1390719711780548, "learning_rate": 7.5e-07, "loss": 0.015, "step": 452 }, { "clip_ratio/high_max": 0.0020800433339900337, "clip_ratio/high_mean": 0.000826436533316155, "clip_ratio/low_mean": 0.0006033772233422496, "clip_ratio/low_min": 3.016388109244872e-05, "clip_ratio/region_mean": 0.0014298137321020477, "epoch": 4.643731778425656, "grad_norm": 0.14265714585781097, "learning_rate": 7.5e-07, "loss": 0.0271, "step": 453 }, { "clip_ratio/high_max": 0.001589495368534699, "clip_ratio/high_mean": 0.0006208046561368974, "clip_ratio/low_mean": 0.0005473463543239632, "clip_ratio/low_min": 1.0074145393446088e-05, "clip_ratio/region_mean": 0.001168151033198228, "epoch": 4.653061224489796, "grad_norm": 0.1595458984375, "learning_rate": 7.5e-07, "loss": 0.0218, "step": 454 }, { "clip_ratio/high_max": 0.0017394983078702353, "clip_ratio/high_mean": 0.0006689027622996946, "clip_ratio/low_mean": 0.0005388010513343033, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012077038518327754, "epoch": 4.662390670553936, "grad_norm": 0.13425207138061523, "learning_rate": 7.5e-07, "loss": -0.0205, "step": 455 }, { "clip_ratio/high_max": 0.0019296595419291407, "clip_ratio/high_mean": 0.0007458570989911095, "clip_ratio/low_mean": 0.00047356349296023836, "clip_ratio/low_min": 4.089784761163173e-05, "clip_ratio/region_mean": 0.0012194205773994327, "epoch": 4.671720116618076, "grad_norm": 0.12792038917541504, "learning_rate": 7.5e-07, "loss": -0.0568, "step": 456 }, { "clip_ratio/high_max": 0.001938334622536786, "clip_ratio/high_mean": 0.000779546697231126, "clip_ratio/low_mean": 0.0006365528843161883, "clip_ratio/low_min": 3.392130383872427e-05, "clip_ratio/region_mean": 0.001416099566995399, "epoch": 4.681049562682215, "grad_norm": 0.1278204321861267, "learning_rate": 7.5e-07, "loss": -0.0095, "step": 457 }, { "clip_ratio/high_max": 0.0015420875315612648, "clip_ratio/high_mean": 0.0006765449506929144, "clip_ratio/low_mean": 0.0006603617730434053, "clip_ratio/low_min": 3.5730796298594214e-05, "clip_ratio/region_mean": 0.001336906756478129, "epoch": 4.690379008746356, "grad_norm": 0.1378299593925476, "learning_rate": 7.5e-07, "loss": 0.0057, "step": 458 }, { "clip_ratio/high_max": 0.0016325050564773846, "clip_ratio/high_mean": 0.0006561511672771303, "clip_ratio/low_mean": 0.0006439916742237983, "clip_ratio/low_min": 4.192942287772894e-05, "clip_ratio/region_mean": 0.0013001428087591194, "epoch": 4.699708454810495, "grad_norm": 0.11765290051698685, "learning_rate": 7.5e-07, "loss": -0.0062, "step": 459 }, { "clip_ratio/high_max": 0.001799173765903106, "clip_ratio/high_mean": 0.000871569141963846, "clip_ratio/low_mean": 0.000657420926472696, "clip_ratio/low_min": 1.3221916560723912e-05, "clip_ratio/region_mean": 0.0015289900329662487, "epoch": 4.709037900874636, "grad_norm": 0.1326141357421875, "learning_rate": 7.5e-07, "loss": -0.0598, "step": 460 }, { "clip_ratio/high_max": 0.001957122716703452, "clip_ratio/high_mean": 0.0008039147014642367, "clip_ratio/low_mean": 0.0006722237412759569, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014761384736630134, "epoch": 4.718367346938775, "grad_norm": 0.11270154267549515, "learning_rate": 7.5e-07, "loss": -0.0108, "step": 461 }, { "clip_ratio/high_max": 0.001853579749877099, "clip_ratio/high_mean": 0.0007001689955359325, "clip_ratio/low_mean": 0.0006873694337627967, "clip_ratio/low_min": 1.30725793496822e-05, "clip_ratio/region_mean": 0.0013875384320272133, "epoch": 4.727696793002916, "grad_norm": 0.14703401923179626, "learning_rate": 7.5e-07, "loss": -0.0023, "step": 462 }, { "clip_ratio/high_max": 0.001959476910997182, "clip_ratio/high_mean": 0.0008098825564957224, "clip_ratio/low_mean": 0.0007665319753868971, "clip_ratio/low_min": 3.5362147173145786e-05, "clip_ratio/region_mean": 0.0015764145355205983, "epoch": 4.737026239067055, "grad_norm": 0.13609673082828522, "learning_rate": 7.5e-07, "loss": -0.0092, "step": 463 }, { "clip_ratio/high_max": 0.002183849079301581, "clip_ratio/high_mean": 0.0008515362369507784, "clip_ratio/low_mean": 0.0008212688844650984, "clip_ratio/low_min": 8.229960349126486e-05, "clip_ratio/region_mean": 0.0016728051195968874, "epoch": 4.746355685131196, "grad_norm": 0.14858703315258026, "learning_rate": 7.5e-07, "loss": -0.0212, "step": 464 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0369698660714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3945.0, "completions/mean_length": 699.9819946289062, "completions/mean_terminated_length": 569.61181640625, "completions/min_length": 96.0, "completions/min_terminated_length": 96.0, "epoch": 4.755685131195335, "grad_norm": 0.14448174834251404, "learning_rate": 7.5e-07, "loss": 0.0112, "num_tokens": 281159812.0, "reward": 0.6095145344734192, "reward_std": 0.1710619181394577, "rewards/simpleverify_reward/mean": 0.6095145344734192, "rewards/simpleverify_reward/std": 0.48787620663642883, "step": 465 }, { "clip_ratio/high_max": 0.0016467587483930402, "clip_ratio/high_mean": 0.0006661154939138214, "clip_ratio/low_mean": 0.0005465937956614653, "clip_ratio/low_min": 2.8337305593595374e-05, "clip_ratio/region_mean": 0.0012127092850278132, "epoch": 4.765014577259475, "grad_norm": 0.1312132626771927, "learning_rate": 7.5e-07, "loss": -0.0069, "step": 466 }, { "clip_ratio/high_max": 0.0018264318750880193, "clip_ratio/high_mean": 0.0007701601025473792, "clip_ratio/low_mean": 0.0005221373380663863, "clip_ratio/low_min": 1.295605306950165e-05, "clip_ratio/region_mean": 0.001292297452891944, "epoch": 4.774344023323615, "grad_norm": 0.13193680346012115, "learning_rate": 7.5e-07, "loss": -0.0177, "step": 467 }, { "clip_ratio/high_max": 0.0016870855106390081, "clip_ratio/high_mean": 0.0007033165620669024, "clip_ratio/low_mean": 0.0005004865151931881, "clip_ratio/low_min": 6.015655344526749e-05, "clip_ratio/region_mean": 0.0012038030799885746, "epoch": 4.783673469387755, "grad_norm": 0.12467977404594421, "learning_rate": 7.5e-07, "loss": 0.0136, "step": 468 }, { "clip_ratio/high_max": 0.0020178991435386706, "clip_ratio/high_mean": 0.000797357271949295, "clip_ratio/low_mean": 0.0004983714525224059, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001295728754485026, "epoch": 4.793002915451895, "grad_norm": 0.1410754770040512, "learning_rate": 7.5e-07, "loss": -0.0175, "step": 469 }, { "clip_ratio/high_max": 0.0017002989843604155, "clip_ratio/high_mean": 0.0007254277406900655, "clip_ratio/low_mean": 0.0006409582656488055, "clip_ratio/low_min": 3.170935269736219e-05, "clip_ratio/region_mean": 0.0013663859863299876, "epoch": 4.802332361516035, "grad_norm": 0.14712795615196228, "learning_rate": 7.5e-07, "loss": 0.0091, "step": 470 }, { "clip_ratio/high_max": 0.0019389020671951585, "clip_ratio/high_mean": 0.0007913356912467862, "clip_ratio/low_mean": 0.0006470123662438709, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001438348066585604, "epoch": 4.811661807580175, "grad_norm": 0.13746172189712524, "learning_rate": 7.5e-07, "loss": 0.0059, "step": 471 }, { "clip_ratio/high_max": 0.0020308634993853047, "clip_ratio/high_mean": 0.0007768980995024322, "clip_ratio/low_mean": 0.0006064226508897264, "clip_ratio/low_min": 1.1954858564422466e-05, "clip_ratio/region_mean": 0.001383320730383275, "epoch": 4.820991253644315, "grad_norm": 0.13030557334423065, "learning_rate": 7.5e-07, "loss": -0.007, "step": 472 }, { "clip_ratio/high_max": 0.0019477485038805753, "clip_ratio/high_mean": 0.0007755901151540456, "clip_ratio/low_mean": 0.0007045828078844352, "clip_ratio/low_min": 1.9379844161449e-05, "clip_ratio/region_mean": 0.0014801729157625232, "epoch": 4.830320699708455, "grad_norm": 0.14699174463748932, "learning_rate": 7.5e-07, "loss": 0.0189, "step": 473 }, { "clip_ratio/high_max": 0.0021290155564201996, "clip_ratio/high_mean": 0.0008882524052751251, "clip_ratio/low_mean": 0.000617683641394251, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015059360703162383, "epoch": 4.839650145772595, "grad_norm": 0.127415269613266, "learning_rate": 7.5e-07, "loss": -0.0562, "step": 474 }, { "clip_ratio/high_max": 0.0021199159091338515, "clip_ratio/high_mean": 0.0007547224686277332, "clip_ratio/low_mean": 0.0007355914085565018, "clip_ratio/low_min": 3.9451648262911476e-05, "clip_ratio/region_mean": 0.0014903138799127191, "epoch": 4.848979591836734, "grad_norm": 0.1357252448797226, "learning_rate": 7.5e-07, "loss": -0.0052, "step": 475 }, { "clip_ratio/high_max": 0.0018016316498687956, "clip_ratio/high_mean": 0.0007292530990525847, "clip_ratio/low_mean": 0.0007141724108805647, "clip_ratio/low_min": 1.642143979552202e-05, "clip_ratio/region_mean": 0.0014434255317610223, "epoch": 4.858309037900875, "grad_norm": 0.1302269697189331, "learning_rate": 7.5e-07, "loss": 0.0243, "step": 476 }, { "clip_ratio/high_max": 0.002279565840581199, "clip_ratio/high_mean": 0.0008358134855370736, "clip_ratio/low_mean": 0.0005738252684750478, "clip_ratio/low_min": 3.486750210868195e-05, "clip_ratio/region_mean": 0.0014096387749304995, "epoch": 4.867638483965014, "grad_norm": 0.14299818873405457, "learning_rate": 7.5e-07, "loss": -0.0154, "step": 477 }, { "clip_ratio/high_max": 0.0019073536386713386, "clip_ratio/high_mean": 0.0007936125784908654, "clip_ratio/low_mean": 0.000838951074911165, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001632563660677988, "epoch": 4.876967930029155, "grad_norm": 0.16214604675769806, "learning_rate": 7.5e-07, "loss": 0.0024, "step": 478 }, { "clip_ratio/high_max": 0.001847487350460142, "clip_ratio/high_mean": 0.0007892060621088604, "clip_ratio/low_mean": 0.0007202045508165611, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015094106056494638, "epoch": 4.886297376093294, "grad_norm": 0.13754135370254517, "learning_rate": 7.5e-07, "loss": -0.0287, "step": 479 }, { "clip_ratio/high_max": 0.0019255081278970465, "clip_ratio/high_mean": 0.0008177333347703097, "clip_ratio/low_mean": 0.0007129543428163743, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001530687695776578, "epoch": 4.895626822157435, "grad_norm": 0.1294672042131424, "learning_rate": 7.5e-07, "loss": -0.029, "step": 480 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0362723214285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4096.0, "completions/mean_length": 698.5775756835938, "completions/mean_terminated_length": 570.70703125, "completions/min_length": 105.0, "completions/min_terminated_length": 105.0, "epoch": 5.0093294460641395, "grad_norm": 0.13860493898391724, "learning_rate": 7.5e-07, "loss": 0.018, "num_tokens": 290449300.0, "reward": 0.6146066188812256, "reward_std": 0.1648068130016327, "rewards/simpleverify_reward/mean": 0.6146065592765808, "rewards/simpleverify_reward/std": 0.48670509457588196, "step": 481 }, { "clip_ratio/high_max": 0.0018470853065082338, "clip_ratio/high_mean": 0.0006507383777716313, "clip_ratio/low_mean": 0.0004437268273704831, "clip_ratio/low_min": 3.0424246688198764e-05, "clip_ratio/region_mean": 0.0010944652240141295, "epoch": 5.01865889212828, "grad_norm": 0.1350337713956833, "learning_rate": 7.5e-07, "loss": -0.0122, "step": 482 }, { "clip_ratio/high_max": 0.0018503527389839292, "clip_ratio/high_mean": 0.0007472679935744964, "clip_ratio/low_mean": 0.000513331573074538, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012605995652847923, "epoch": 5.0279883381924195, "grad_norm": 0.1302531659603119, "learning_rate": 7.5e-07, "loss": -0.0106, "step": 483 }, { "clip_ratio/high_max": 0.0018302307653357275, "clip_ratio/high_mean": 0.0007430651130562183, "clip_ratio/low_mean": 0.0005147268566361163, "clip_ratio/low_min": 1.6015374058042653e-05, "clip_ratio/region_mean": 0.0012577919842442498, "epoch": 5.03731778425656, "grad_norm": 0.16438889503479004, "learning_rate": 7.5e-07, "loss": -0.0365, "step": 484 }, { "clip_ratio/high_max": 0.0017351472761220066, "clip_ratio/high_mean": 0.0006790565262235759, "clip_ratio/low_mean": 0.0006323010584310396, "clip_ratio/low_min": 4.043628359795548e-05, "clip_ratio/region_mean": 0.001311357569647953, "epoch": 5.0466472303206995, "grad_norm": 0.14908896386623383, "learning_rate": 7.5e-07, "loss": -0.0145, "step": 485 }, { "clip_ratio/high_max": 0.001689907192485407, "clip_ratio/high_mean": 0.0006700305757476599, "clip_ratio/low_mean": 0.0004893963277936564, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001159426910817274, "epoch": 5.05597667638484, "grad_norm": 0.12821170687675476, "learning_rate": 7.5e-07, "loss": -0.0267, "step": 486 }, { "clip_ratio/high_max": 0.0019397663709241897, "clip_ratio/high_mean": 0.0007544454219896579, "clip_ratio/low_mean": 0.0006129846497060498, "clip_ratio/low_min": 6.231474344531307e-05, "clip_ratio/region_mean": 0.001367430068057729, "epoch": 5.0653061224489795, "grad_norm": 0.1319240927696228, "learning_rate": 7.5e-07, "loss": -0.0069, "step": 487 }, { "clip_ratio/high_max": 0.001888791513920296, "clip_ratio/high_mean": 0.0006828327859693673, "clip_ratio/low_mean": 0.0005772338972747093, "clip_ratio/low_min": 3.4492273698560894e-05, "clip_ratio/region_mean": 0.0012600666741491295, "epoch": 5.07463556851312, "grad_norm": 0.1321650743484497, "learning_rate": 7.5e-07, "loss": 0.0229, "step": 488 }, { "clip_ratio/high_max": 0.0017603277447051369, "clip_ratio/high_mean": 0.0006685683474643156, "clip_ratio/low_mean": 0.0006042987715773052, "clip_ratio/low_min": 6.880132605147082e-05, "clip_ratio/region_mean": 0.0012728671172226314, "epoch": 5.0839650145772595, "grad_norm": 0.14090333878993988, "learning_rate": 7.5e-07, "loss": -0.0308, "step": 489 }, { "clip_ratio/high_max": 0.001809050747397123, "clip_ratio/high_mean": 0.0007495743357139872, "clip_ratio/low_mean": 0.0006797176574764308, "clip_ratio/low_min": 1.5180957234406378e-05, "clip_ratio/region_mean": 0.0014292920168372802, "epoch": 5.093294460641399, "grad_norm": 0.15775853395462036, "learning_rate": 7.5e-07, "loss": 0.002, "step": 490 }, { "clip_ratio/high_max": 0.0016178615478565916, "clip_ratio/high_mean": 0.0006446157804020913, "clip_ratio/low_mean": 0.0007066846019370132, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013513003978005145, "epoch": 5.1026239067055394, "grad_norm": 0.11786099523305893, "learning_rate": 7.5e-07, "loss": 0.0245, "step": 491 }, { "clip_ratio/high_max": 0.0017088571948988829, "clip_ratio/high_mean": 0.0007903857513156254, "clip_ratio/low_mean": 0.0006034661514604522, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013938519077782985, "epoch": 5.111953352769679, "grad_norm": 0.1316787302494049, "learning_rate": 7.5e-07, "loss": -0.0324, "step": 492 }, { "clip_ratio/high_max": 0.001992966699617682, "clip_ratio/high_mean": 0.0008306409381475532, "clip_ratio/low_mean": 0.0007620348524142173, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015926758205750957, "epoch": 5.121282798833819, "grad_norm": 0.13594411313533783, "learning_rate": 7.5e-07, "loss": -0.0093, "step": 493 }, { "clip_ratio/high_max": 0.0020271833491278812, "clip_ratio/high_mean": 0.0007788471211824799, "clip_ratio/low_mean": 0.0007599579257657751, "clip_ratio/low_min": 5.1299724873388186e-05, "clip_ratio/region_mean": 0.001538805037853308, "epoch": 5.130612244897959, "grad_norm": 0.14177857339382172, "learning_rate": 7.5e-07, "loss": 0.0097, "step": 494 }, { "clip_ratio/high_max": 0.00210764732764801, "clip_ratio/high_mean": 0.0008632083354314091, "clip_ratio/low_mean": 0.0007549719684902811, "clip_ratio/low_min": 3.1141007639234886e-05, "clip_ratio/region_mean": 0.0016181803330255207, "epoch": 5.139941690962099, "grad_norm": 0.13621920347213745, "learning_rate": 7.5e-07, "loss": 0.0222, "step": 495 }, { "clip_ratio/high_max": 0.001890771422040416, "clip_ratio/high_mean": 0.000802141224994557, "clip_ratio/low_mean": 0.0006754257728971425, "clip_ratio/low_min": 1.641281596675981e-05, "clip_ratio/region_mean": 0.0014775669988011941, "epoch": 5.149271137026239, "grad_norm": 0.13980422914028168, "learning_rate": 7.5e-07, "loss": -0.0246, "step": 496 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0390625, "completions/max_length": 4096.0, "completions/max_terminated_length": 4090.0, "completions/mean_length": 721.9131469726562, "completions/mean_terminated_length": 584.7550659179688, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 5.158600583090379, "grad_norm": 0.14978386461734772, "learning_rate": 7.5e-07, "loss": -0.0093, "num_tokens": 299926094.0, "reward": 0.6122349500656128, "reward_std": 0.1780865341424942, "rewards/simpleverify_reward/mean": 0.6122349500656128, "rewards/simpleverify_reward/std": 0.4872575104236603, "step": 497 }, { "clip_ratio/high_max": 0.0017732913984218612, "clip_ratio/high_mean": 0.0007286600975930924, "clip_ratio/low_mean": 0.0005616878479486331, "clip_ratio/low_min": 9.912767382047605e-06, "clip_ratio/region_mean": 0.0012903479400847573, "epoch": 5.167930029154519, "grad_norm": 0.14300790429115295, "learning_rate": 7.5e-07, "loss": 0.0166, "step": 498 }, { "clip_ratio/high_max": 0.002071462691674242, "clip_ratio/high_mean": 0.0009060933152795769, "clip_ratio/low_mean": 0.00045692593994317576, "clip_ratio/low_min": 1.1743705726985354e-05, "clip_ratio/region_mean": 0.0013630192152049858, "epoch": 5.1772594752186585, "grad_norm": 0.15501728653907776, "learning_rate": 7.5e-07, "loss": -0.0518, "step": 499 }, { "clip_ratio/high_max": 0.0018789325695252046, "clip_ratio/high_mean": 0.0007508348317060154, "clip_ratio/low_mean": 0.0005722620117012411, "clip_ratio/low_min": 3.009078591276193e-05, "clip_ratio/region_mean": 0.001323096854321193, "epoch": 5.186588921282799, "grad_norm": 0.13534313440322876, "learning_rate": 7.5e-07, "loss": 0.0132, "step": 500 }, { "clip_ratio/high_max": 0.001691886154731037, "clip_ratio/high_mean": 0.0006348859860736411, "clip_ratio/low_mean": 0.00046001946338947164, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010949054558295757, "epoch": 5.1959183673469385, "grad_norm": 0.13750749826431274, "learning_rate": 7.5e-07, "loss": -0.0081, "step": 501 }, { "clip_ratio/high_max": 0.001851037497544894, "clip_ratio/high_mean": 0.0008021013272809796, "clip_ratio/low_mean": 0.000502673859045899, "clip_ratio/low_min": 2.6957084628520533e-05, "clip_ratio/region_mean": 0.001304775181779405, "epoch": 5.205247813411079, "grad_norm": 0.12921108305454254, "learning_rate": 7.5e-07, "loss": -0.0147, "step": 502 }, { "clip_ratio/high_max": 0.0020149149277131073, "clip_ratio/high_mean": 0.0008190559929062147, "clip_ratio/low_mean": 0.0006546231652464485, "clip_ratio/low_min": 2.726593447732739e-05, "clip_ratio/region_mean": 0.0014736791599716526, "epoch": 5.214577259475218, "grad_norm": 0.1435370296239853, "learning_rate": 7.5e-07, "loss": 0.0172, "step": 503 }, { "clip_ratio/high_max": 0.0018436500322422944, "clip_ratio/high_mean": 0.0007328987594519276, "clip_ratio/low_mean": 0.0005209874975662387, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012538862574729137, "epoch": 5.223906705539359, "grad_norm": 0.13871459662914276, "learning_rate": 7.5e-07, "loss": -0.0082, "step": 504 }, { "clip_ratio/high_max": 0.0019040549668716267, "clip_ratio/high_mean": 0.000775174041336868, "clip_ratio/low_mean": 0.0006762303000868997, "clip_ratio/low_min": 3.594465033529559e-05, "clip_ratio/region_mean": 0.0014514043214148842, "epoch": 5.233236151603498, "grad_norm": 0.13658717274665833, "learning_rate": 7.5e-07, "loss": -0.0154, "step": 505 }, { "clip_ratio/high_max": 0.0019625853674369864, "clip_ratio/high_mean": 0.0008413827472395496, "clip_ratio/low_mean": 0.0006234653883439023, "clip_ratio/low_min": 2.4206041416618973e-05, "clip_ratio/region_mean": 0.0014648481374024414, "epoch": 5.242565597667639, "grad_norm": 0.15126526355743408, "learning_rate": 7.5e-07, "loss": -0.0282, "step": 506 }, { "clip_ratio/high_max": 0.001652775357797509, "clip_ratio/high_mean": 0.0006946515686649946, "clip_ratio/low_mean": 0.0008080532716121525, "clip_ratio/low_min": 1.7720441974233836e-05, "clip_ratio/region_mean": 0.0015027048430056311, "epoch": 5.251895043731778, "grad_norm": 0.13680978119373322, "learning_rate": 7.5e-07, "loss": 0.0028, "step": 507 }, { "clip_ratio/high_max": 0.0019440902215137612, "clip_ratio/high_mean": 0.0006651036401308374, "clip_ratio/low_mean": 0.0007388744616037002, "clip_ratio/low_min": 1.333191084995633e-05, "clip_ratio/region_mean": 0.00140397809445858, "epoch": 5.261224489795918, "grad_norm": 0.13925637304782867, "learning_rate": 7.5e-07, "loss": 0.0339, "step": 508 }, { "clip_ratio/high_max": 0.0020570592023432255, "clip_ratio/high_mean": 0.0008233747284975834, "clip_ratio/low_mean": 0.0006831100326962769, "clip_ratio/low_min": 3.248440771130845e-05, "clip_ratio/region_mean": 0.0015064847648318391, "epoch": 5.270553935860058, "grad_norm": 0.13757140934467316, "learning_rate": 7.5e-07, "loss": -0.0084, "step": 509 }, { "clip_ratio/high_max": 0.0019868631788995117, "clip_ratio/high_mean": 0.000854424173667212, "clip_ratio/low_mean": 0.0007047257367958082, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015591499359288719, "epoch": 5.279883381924198, "grad_norm": 0.13248638808727264, "learning_rate": 7.5e-07, "loss": -0.0159, "step": 510 }, { "clip_ratio/high_max": 0.002148589635908138, "clip_ratio/high_mean": 0.0009134960055234842, "clip_ratio/low_mean": 0.0008280662004835904, "clip_ratio/low_min": 6.289572957030032e-05, "clip_ratio/region_mean": 0.001741562176903244, "epoch": 5.289212827988338, "grad_norm": 0.13679827749729156, "learning_rate": 7.5e-07, "loss": -0.0395, "step": 511 }, { "clip_ratio/high_max": 0.0020884950390609447, "clip_ratio/high_mean": 0.0008088386712188367, "clip_ratio/low_mean": 0.0007138985947676701, "clip_ratio/low_min": 3.905918038071832e-05, "clip_ratio/region_mean": 0.0015227372823574115, "epoch": 5.298542274052478, "grad_norm": 0.1270134150981903, "learning_rate": 7.5e-07, "loss": -0.0177, "step": 512 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.037318638392857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4096.0, "completions/mean_length": 708.6611938476562, "completions/mean_terminated_length": 577.3499145507812, "completions/min_length": 83.0, "completions/min_terminated_length": 83.0, "epoch": 5.307871720116618, "grad_norm": 0.14489367604255676, "learning_rate": 7.5e-07, "loss": -0.022, "num_tokens": 309319764.0, "reward": 0.6239537000656128, "reward_std": 0.16030703485012054, "rewards/simpleverify_reward/mean": 0.6239537000656128, "rewards/simpleverify_reward/std": 0.48440876603126526, "step": 513 }, { "clip_ratio/high_max": 0.0020580012460413855, "clip_ratio/high_mean": 0.0008198959121727967, "clip_ratio/low_mean": 0.0004230168397043599, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012429127418727148, "epoch": 5.317201166180758, "grad_norm": 0.13636641204357147, "learning_rate": 7.5e-07, "loss": -0.016, "step": 514 }, { "clip_ratio/high_max": 0.0017953861788555514, "clip_ratio/high_mean": 0.0007348612380155828, "clip_ratio/low_mean": 0.0004957384899171302, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012305997443036176, "epoch": 5.326530612244898, "grad_norm": 0.13111677765846252, "learning_rate": 7.5e-07, "loss": -0.0109, "step": 515 }, { "clip_ratio/high_max": 0.0019476407251204364, "clip_ratio/high_mean": 0.0007752077053737594, "clip_ratio/low_mean": 0.00052284792309365, "clip_ratio/low_min": 4.042515047331108e-05, "clip_ratio/region_mean": 0.00129805564210983, "epoch": 5.335860058309038, "grad_norm": 0.14180073142051697, "learning_rate": 7.5e-07, "loss": -0.012, "step": 516 }, { "clip_ratio/high_max": 0.0018302748394489754, "clip_ratio/high_mean": 0.0007069599387250491, "clip_ratio/low_mean": 0.0006419657674996415, "clip_ratio/low_min": 1.666222306084819e-05, "clip_ratio/region_mean": 0.0013489256816683337, "epoch": 5.345189504373177, "grad_norm": 0.12370339781045914, "learning_rate": 7.5e-07, "loss": -0.0033, "step": 517 }, { "clip_ratio/high_max": 0.0019068303736275993, "clip_ratio/high_mean": 0.0007445696919603506, "clip_ratio/low_mean": 0.000540947688932647, "clip_ratio/low_min": 1.8729397197603248e-05, "clip_ratio/region_mean": 0.0012855173772550188, "epoch": 5.354518950437318, "grad_norm": 0.12796400487422943, "learning_rate": 7.5e-07, "loss": -0.0277, "step": 518 }, { "clip_ratio/high_max": 0.0015896897384664044, "clip_ratio/high_mean": 0.0006529776019306155, "clip_ratio/low_mean": 0.0006486144666268956, "clip_ratio/low_min": 1.4285714314610232e-05, "clip_ratio/region_mean": 0.001301592081290437, "epoch": 5.363848396501457, "grad_norm": 0.13864344358444214, "learning_rate": 7.5e-07, "loss": -0.0073, "step": 519 }, { "clip_ratio/high_max": 0.0021342619584174827, "clip_ratio/high_mean": 0.0006944915157873766, "clip_ratio/low_mean": 0.0006754996365998522, "clip_ratio/low_min": 5.1865721616195515e-05, "clip_ratio/region_mean": 0.001369991157844197, "epoch": 5.373177842565598, "grad_norm": 0.1330270916223526, "learning_rate": 7.5e-07, "loss": 0.0343, "step": 520 }, { "clip_ratio/high_max": 0.0012837603462685365, "clip_ratio/high_mean": 0.000513504948685295, "clip_ratio/low_mean": 0.0005597388699243311, "clip_ratio/low_min": 1.097839412977919e-05, "clip_ratio/region_mean": 0.0010732438022387214, "epoch": 5.382507288629737, "grad_norm": 0.13281384110450745, "learning_rate": 7.5e-07, "loss": 0.0294, "step": 521 }, { "clip_ratio/high_max": 0.0016677099993103184, "clip_ratio/high_mean": 0.00062616194554721, "clip_ratio/low_mean": 0.0007428811204590602, "clip_ratio/low_min": 4.4873424485558644e-05, "clip_ratio/region_mean": 0.0013690430641872808, "epoch": 5.391836734693878, "grad_norm": 0.13574445247650146, "learning_rate": 7.5e-07, "loss": 0.033, "step": 522 }, { "clip_ratio/high_max": 0.0021666248685505707, "clip_ratio/high_mean": 0.0007631511689396575, "clip_ratio/low_mean": 0.0006400669972208561, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001403218186169397, "epoch": 5.401166180758017, "grad_norm": 0.13975489139556885, "learning_rate": 7.5e-07, "loss": -0.0441, "step": 523 }, { "clip_ratio/high_max": 0.001733349599817302, "clip_ratio/high_mean": 0.0006758810486644506, "clip_ratio/low_mean": 0.0006825587161074509, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013584397747763433, "epoch": 5.410495626822158, "grad_norm": 0.13072234392166138, "learning_rate": 7.5e-07, "loss": -0.021, "step": 524 }, { "clip_ratio/high_max": 0.0016863440432643984, "clip_ratio/high_mean": 0.0006644273094025266, "clip_ratio/low_mean": 0.0006996867268753704, "clip_ratio/low_min": 1.745810004649684e-05, "clip_ratio/region_mean": 0.0013641140576510224, "epoch": 5.419825072886297, "grad_norm": 0.14115004241466522, "learning_rate": 7.5e-07, "loss": 0.0153, "step": 525 }, { "clip_ratio/high_max": 0.0022102410657680593, "clip_ratio/high_mean": 0.0008669294275023276, "clip_ratio/low_mean": 0.0005742263438150985, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014411557822313625, "epoch": 5.429154518950437, "grad_norm": 0.11857084184885025, "learning_rate": 7.5e-07, "loss": -0.0272, "step": 526 }, { "clip_ratio/high_max": 0.001895297387818573, "clip_ratio/high_mean": 0.0007559829828096554, "clip_ratio/low_mean": 0.0008050948035815964, "clip_ratio/low_min": 5.155710277904291e-05, "clip_ratio/region_mean": 0.0015610778173140716, "epoch": 5.438483965014577, "grad_norm": 0.15165205299854279, "learning_rate": 7.5e-07, "loss": 0.0192, "step": 527 }, { "clip_ratio/high_max": 0.0021283276946633123, "clip_ratio/high_mean": 0.0007894895352364983, "clip_ratio/low_mean": 0.0006395249602064723, "clip_ratio/low_min": 1.750209958117921e-05, "clip_ratio/region_mean": 0.00142901449726196, "epoch": 5.447813411078717, "grad_norm": 0.14030307531356812, "learning_rate": 7.5e-07, "loss": 0.0173, "step": 528 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0448521205357143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4048.0, "completions/mean_length": 736.5260009765625, "completions/mean_terminated_length": 578.770751953125, "completions/min_length": 83.0, "completions/min_terminated_length": 83.0, "epoch": 5.457142857142857, "grad_norm": 0.15601588785648346, "learning_rate": 7.5e-07, "loss": -0.0309, "num_tokens": 318694616.0, "reward": 0.6028181314468384, "reward_std": 0.1752912700176239, "rewards/simpleverify_reward/mean": 0.6028180718421936, "rewards/simpleverify_reward/std": 0.48933133482933044, "step": 529 }, { "clip_ratio/high_max": 0.0020664685871452093, "clip_ratio/high_mean": 0.0008543217882106546, "clip_ratio/low_mean": 0.000630089672995382, "clip_ratio/low_min": 1.1255177014390938e-05, "clip_ratio/region_mean": 0.0014844114339211956, "epoch": 5.466472303206997, "grad_norm": 0.13175208866596222, "learning_rate": 7.5e-07, "loss": -0.0289, "step": 530 }, { "clip_ratio/high_max": 0.00206903421349125, "clip_ratio/high_mean": 0.0007484028483304428, "clip_ratio/low_mean": 0.0005665749004037934, "clip_ratio/low_min": 3.8368951209122315e-05, "clip_ratio/region_mean": 0.001314977736910805, "epoch": 5.475801749271137, "grad_norm": 0.14648321270942688, "learning_rate": 7.5e-07, "loss": 0.0163, "step": 531 }, { "clip_ratio/high_max": 0.0019385957857593894, "clip_ratio/high_mean": 0.000738438408006914, "clip_ratio/low_mean": 0.0005601047905656742, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012985431967535987, "epoch": 5.485131195335277, "grad_norm": 0.13005149364471436, "learning_rate": 7.5e-07, "loss": -0.0339, "step": 532 }, { "clip_ratio/high_max": 0.0018583045748528093, "clip_ratio/high_mean": 0.0007093772328516934, "clip_ratio/low_mean": 0.0005678227516909828, "clip_ratio/low_min": 4.0351405914407223e-05, "clip_ratio/region_mean": 0.0012771999863616657, "epoch": 5.494460641399417, "grad_norm": 0.142225980758667, "learning_rate": 7.5e-07, "loss": 0.0128, "step": 533 }, { "clip_ratio/high_max": 0.002043795171630336, "clip_ratio/high_mean": 0.000768740157582215, "clip_ratio/low_mean": 0.0006217967020347714, "clip_ratio/low_min": 1.7016063793562353e-05, "clip_ratio/region_mean": 0.001390536875987891, "epoch": 5.503790087463557, "grad_norm": 0.13709872961044312, "learning_rate": 7.5e-07, "loss": -0.0149, "step": 534 }, { "clip_ratio/high_max": 0.0019343629319337197, "clip_ratio/high_mean": 0.0007862950624257792, "clip_ratio/low_mean": 0.0005848059272466344, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001371101003314834, "epoch": 5.513119533527696, "grad_norm": 0.14975519478321075, "learning_rate": 7.5e-07, "loss": 0.0095, "step": 535 }, { "clip_ratio/high_max": 0.0019426629660301842, "clip_ratio/high_mean": 0.000733998009309289, "clip_ratio/low_mean": 0.0005530406569960178, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012870386635768227, "epoch": 5.522448979591837, "grad_norm": 0.1275554597377777, "learning_rate": 7.5e-07, "loss": -0.0117, "step": 536 }, { "clip_ratio/high_max": 0.002399505530775059, "clip_ratio/high_mean": 0.0008882091424311511, "clip_ratio/low_mean": 0.0006821070119258366, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015703161407145672, "epoch": 5.531778425655976, "grad_norm": 0.1552540361881256, "learning_rate": 7.5e-07, "loss": 0.0067, "step": 537 }, { "clip_ratio/high_max": 0.0020409221833688207, "clip_ratio/high_mean": 0.0008051876811805414, "clip_ratio/low_mean": 0.0006024402609909885, "clip_ratio/low_min": 3.385370109754149e-05, "clip_ratio/region_mean": 0.0014076279512664769, "epoch": 5.541107871720117, "grad_norm": 0.14855311810970306, "learning_rate": 7.5e-07, "loss": -0.0291, "step": 538 }, { "clip_ratio/high_max": 0.0019380232552066445, "clip_ratio/high_mean": 0.0008848120269249193, "clip_ratio/low_mean": 0.0006793721822759835, "clip_ratio/low_min": 1.3001872503082268e-05, "clip_ratio/region_mean": 0.0015641841891920194, "epoch": 5.550437317784256, "grad_norm": 0.13955125212669373, "learning_rate": 7.5e-07, "loss": -0.0622, "step": 539 }, { "clip_ratio/high_max": 0.0019953803857788444, "clip_ratio/high_mean": 0.0007622739367434406, "clip_ratio/low_mean": 0.0006504954408228514, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014127694012131542, "epoch": 5.559766763848397, "grad_norm": 0.13137786090373993, "learning_rate": 7.5e-07, "loss": 0.0079, "step": 540 }, { "clip_ratio/high_max": 0.0015172537459875457, "clip_ratio/high_mean": 0.0006522807252622442, "clip_ratio/low_mean": 0.0007162897700254689, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013685704761883244, "epoch": 5.569096209912536, "grad_norm": 0.14050988852977753, "learning_rate": 7.5e-07, "loss": 0.0235, "step": 541 }, { "clip_ratio/high_max": 0.0023753159475745633, "clip_ratio/high_mean": 0.0009099433409573976, "clip_ratio/low_mean": 0.0007758950787319918, "clip_ratio/low_min": 7.499362254748121e-05, "clip_ratio/region_mean": 0.0016858384406077676, "epoch": 5.578425655976677, "grad_norm": 0.1585514098405838, "learning_rate": 7.5e-07, "loss": 0.0021, "step": 542 }, { "clip_ratio/high_max": 0.001973818209080491, "clip_ratio/high_mean": 0.0008450071654806379, "clip_ratio/low_mean": 0.000658911798382178, "clip_ratio/low_min": 4.0454863665218e-05, "clip_ratio/region_mean": 0.001503918978414731, "epoch": 5.587755102040816, "grad_norm": 0.12890534102916718, "learning_rate": 7.5e-07, "loss": -0.023, "step": 543 }, { "clip_ratio/high_max": 0.0018228923945571296, "clip_ratio/high_mean": 0.0007538139179814607, "clip_ratio/low_mean": 0.0006461421762651298, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001399956105160527, "epoch": 5.597084548104956, "grad_norm": 0.12542645633220673, "learning_rate": 7.5e-07, "loss": -0.0394, "step": 544 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0372488839285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4093.0, "completions/mean_length": 707.4022216796875, "completions/mean_terminated_length": 576.2971801757812, "completions/min_length": 90.0, "completions/min_terminated_length": 90.0, "epoch": 5.606413994169096, "grad_norm": 0.1620146781206131, "learning_rate": 7.5e-07, "loss": 0.0216, "num_tokens": 328090574.0, "reward": 0.6291853189468384, "reward_std": 0.18085815012454987, "rewards/simpleverify_reward/mean": 0.6291852593421936, "rewards/simpleverify_reward/std": 0.4830397963523865, "step": 545 }, { "clip_ratio/high_max": 0.0021733193098043557, "clip_ratio/high_mean": 0.0008254812128143385, "clip_ratio/low_mean": 0.0006559187177117565, "clip_ratio/low_min": 9.818091621127678e-05, "clip_ratio/region_mean": 0.0014813999514444731, "epoch": 5.615743440233236, "grad_norm": 0.27969613671302795, "learning_rate": 7.5e-07, "loss": 0.0167, "step": 546 }, { "clip_ratio/high_max": 0.0017518860840937123, "clip_ratio/high_mean": 0.0007219312828965485, "clip_ratio/low_mean": 0.0006072914711694466, "clip_ratio/low_min": 3.858227046293905e-05, "clip_ratio/region_mean": 0.001329222766798921, "epoch": 5.625072886297376, "grad_norm": 0.1317666918039322, "learning_rate": 7.5e-07, "loss": -0.0174, "step": 547 }, { "clip_ratio/high_max": 0.0018082113529089838, "clip_ratio/high_mean": 0.0007290618641491164, "clip_ratio/low_mean": 0.0005743412657466251, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013034031180723105, "epoch": 5.634402332361516, "grad_norm": 0.145670086145401, "learning_rate": 7.5e-07, "loss": -0.0036, "step": 548 }, { "clip_ratio/high_max": 0.001906351993966382, "clip_ratio/high_mean": 0.0007980836289789295, "clip_ratio/low_mean": 0.0006206918815223617, "clip_ratio/low_min": 1.6910173144424334e-05, "clip_ratio/region_mean": 0.0014187754859449342, "epoch": 5.643731778425656, "grad_norm": 0.15721869468688965, "learning_rate": 7.5e-07, "loss": -0.0057, "step": 549 }, { "clip_ratio/high_max": 0.0023759028845233843, "clip_ratio/high_mean": 0.001017322745610727, "clip_ratio/low_mean": 0.0006012233643559739, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016185460845008492, "epoch": 5.653061224489796, "grad_norm": 0.1564369648694992, "learning_rate": 7.5e-07, "loss": -0.0929, "step": 550 }, { "clip_ratio/high_max": 0.002130025124642998, "clip_ratio/high_mean": 0.0008188908996089594, "clip_ratio/low_mean": 0.0007361940297414549, "clip_ratio/low_min": 9.321341258328175e-05, "clip_ratio/region_mean": 0.001555084905703552, "epoch": 5.662390670553936, "grad_norm": 0.14726947247982025, "learning_rate": 7.5e-07, "loss": 0.0035, "step": 551 }, { "clip_ratio/high_max": 0.0020327612801338546, "clip_ratio/high_mean": 0.0008112550240184646, "clip_ratio/low_mean": 0.0006377961617545225, "clip_ratio/low_min": 1.4089269825490192e-05, "clip_ratio/region_mean": 0.001449051185772987, "epoch": 5.671720116618076, "grad_norm": 0.1425078958272934, "learning_rate": 7.5e-07, "loss": -0.0009, "step": 552 }, { "clip_ratio/high_max": 0.002091341608320363, "clip_ratio/high_mean": 0.0008001693749974947, "clip_ratio/low_mean": 0.0007362401147474884, "clip_ratio/low_min": 1.7477628716733307e-05, "clip_ratio/region_mean": 0.0015364094833785202, "epoch": 5.681049562682215, "grad_norm": 0.18631397187709808, "learning_rate": 7.5e-07, "loss": -0.0052, "step": 553 }, { "clip_ratio/high_max": 0.0024525459739379585, "clip_ratio/high_mean": 0.0009680784414740629, "clip_ratio/low_mean": 0.000648213497697725, "clip_ratio/low_min": 1.2833676009904593e-05, "clip_ratio/region_mean": 0.001616291941900272, "epoch": 5.690379008746356, "grad_norm": 0.1294698864221573, "learning_rate": 7.5e-07, "loss": -0.014, "step": 554 }, { "clip_ratio/high_max": 0.002274360966111999, "clip_ratio/high_mean": 0.0008175483235390857, "clip_ratio/low_mean": 0.000844893031171523, "clip_ratio/low_min": 2.2461814296548255e-05, "clip_ratio/region_mean": 0.001662441347434651, "epoch": 5.699708454810495, "grad_norm": 0.1542716771364212, "learning_rate": 7.5e-07, "loss": 0.011, "step": 555 }, { "clip_ratio/high_max": 0.002149791114788968, "clip_ratio/high_mean": 0.0008412498118559597, "clip_ratio/low_mean": 0.0008322192607010948, "clip_ratio/low_min": 4.729989268525969e-05, "clip_ratio/region_mean": 0.0016734690871089697, "epoch": 5.709037900874636, "grad_norm": 0.17362697422504425, "learning_rate": 7.5e-07, "loss": 0.0021, "step": 556 }, { "clip_ratio/high_max": 0.0022520429629366845, "clip_ratio/high_mean": 0.0010038840991910547, "clip_ratio/low_mean": 0.0007930289002615609, "clip_ratio/low_min": 2.97522483378998e-05, "clip_ratio/region_mean": 0.0017969130640267394, "epoch": 5.718367346938775, "grad_norm": 0.1421990990638733, "learning_rate": 7.5e-07, "loss": -0.0679, "step": 557 }, { "clip_ratio/high_max": 0.001958798799023498, "clip_ratio/high_mean": 0.0007906637965788832, "clip_ratio/low_mean": 0.0007618542676937068, "clip_ratio/low_min": 1.0368281436967663e-05, "clip_ratio/region_mean": 0.00155251806427259, "epoch": 5.727696793002916, "grad_norm": 0.1543121635913849, "learning_rate": 7.5e-07, "loss": 0.0185, "step": 558 }, { "clip_ratio/high_max": 0.0019684868020704016, "clip_ratio/high_mean": 0.0008261296334239887, "clip_ratio/low_mean": 0.0008153286198648857, "clip_ratio/low_min": 4.4595455619855784e-05, "clip_ratio/region_mean": 0.0016414582933066413, "epoch": 5.737026239067055, "grad_norm": 0.1354813277721405, "learning_rate": 7.5e-07, "loss": 0.002, "step": 559 }, { "clip_ratio/high_max": 0.0019232395570725203, "clip_ratio/high_mean": 0.0008211987496906659, "clip_ratio/low_mean": 0.0009517913131276146, "clip_ratio/low_min": 8.724151030037319e-05, "clip_ratio/region_mean": 0.0017729900500853546, "epoch": 5.746355685131196, "grad_norm": 0.1533970981836319, "learning_rate": 7.5e-07, "loss": 0.0216, "step": 560 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0387137276785714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4079.0, "completions/mean_length": 712.2650146484375, "completions/mean_terminated_length": 575.992431640625, "completions/min_length": 102.0, "completions/min_terminated_length": 102.0, "epoch": 5.755685131195335, "grad_norm": 0.15179075300693512, "learning_rate": 7.5e-07, "loss": -0.0322, "num_tokens": 337464341.0, "reward": 0.6335100531578064, "reward_std": 0.15934942662715912, "rewards/simpleverify_reward/mean": 0.6335100531578064, "rewards/simpleverify_reward/std": 0.48186230659484863, "step": 561 }, { "clip_ratio/high_max": 0.0021215183587628417, "clip_ratio/high_mean": 0.0007357240501733031, "clip_ratio/low_mean": 0.00042237881189066684, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011581028775253799, "epoch": 5.765014577259475, "grad_norm": 0.14836333692073822, "learning_rate": 7.5e-07, "loss": -0.025, "step": 562 }, { "clip_ratio/high_max": 0.0017979169606405776, "clip_ratio/high_mean": 0.0007877040807215963, "clip_ratio/low_mean": 0.0005388262379710795, "clip_ratio/low_min": 1.3790820958092809e-05, "clip_ratio/region_mean": 0.0013265303132357076, "epoch": 5.774344023323615, "grad_norm": 0.14841952919960022, "learning_rate": 7.5e-07, "loss": -0.0031, "step": 563 }, { "clip_ratio/high_max": 0.002140020136721432, "clip_ratio/high_mean": 0.0008108326874207705, "clip_ratio/low_mean": 0.0005231303994150949, "clip_ratio/low_min": 1.1675695532176178e-05, "clip_ratio/region_mean": 0.001333963060460519, "epoch": 5.783673469387755, "grad_norm": 0.12047097086906433, "learning_rate": 7.5e-07, "loss": -0.0316, "step": 564 }, { "clip_ratio/high_max": 0.002095599702442996, "clip_ratio/high_mean": 0.0007320838467421709, "clip_ratio/low_mean": 0.0004693112823588308, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012013951236440334, "epoch": 5.793002915451895, "grad_norm": 0.14276212453842163, "learning_rate": 7.5e-07, "loss": -0.0021, "step": 565 }, { "clip_ratio/high_max": 0.0018879740237025544, "clip_ratio/high_mean": 0.0007188997897173977, "clip_ratio/low_mean": 0.0004658610205297009, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011847608220705297, "epoch": 5.802332361516035, "grad_norm": 0.12351705878973007, "learning_rate": 7.5e-07, "loss": -0.0097, "step": 566 }, { "clip_ratio/high_max": 0.0017424239813408349, "clip_ratio/high_mean": 0.0006896389240864664, "clip_ratio/low_mean": 0.0005613826078842976, "clip_ratio/low_min": 3.4578148188302293e-05, "clip_ratio/region_mean": 0.001251021556527121, "epoch": 5.811661807580175, "grad_norm": 0.12631390988826752, "learning_rate": 7.5e-07, "loss": 0.0038, "step": 567 }, { "clip_ratio/high_max": 0.0021470280626090243, "clip_ratio/high_mean": 0.0007829938822396798, "clip_ratio/low_mean": 0.0005467937271532719, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013297876321303193, "epoch": 5.820991253644315, "grad_norm": 0.14081837236881256, "learning_rate": 7.5e-07, "loss": -0.0206, "step": 568 }, { "clip_ratio/high_max": 0.0018999095118488185, "clip_ratio/high_mean": 0.0007353512901318027, "clip_ratio/low_mean": 0.0005705691955881775, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001305920464801602, "epoch": 5.830320699708455, "grad_norm": 0.14431434869766235, "learning_rate": 7.5e-07, "loss": 0.0019, "step": 569 }, { "clip_ratio/high_max": 0.001954229850525735, "clip_ratio/high_mean": 0.0008313549842569046, "clip_ratio/low_mean": 0.0006642581538471859, "clip_ratio/low_min": 1.5858919141464867e-05, "clip_ratio/region_mean": 0.0014956131162762176, "epoch": 5.839650145772595, "grad_norm": 0.1539442241191864, "learning_rate": 7.5e-07, "loss": 0.0245, "step": 570 }, { "clip_ratio/high_max": 0.0018279430805705488, "clip_ratio/high_mean": 0.0007081842941261129, "clip_ratio/low_mean": 0.0006180423170007998, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013262266038509551, "epoch": 5.848979591836734, "grad_norm": 0.1299973875284195, "learning_rate": 7.5e-07, "loss": 0.0346, "step": 571 }, { "clip_ratio/high_max": 0.0020398747001308948, "clip_ratio/high_mean": 0.0007798960868967697, "clip_ratio/low_mean": 0.0006698565484839492, "clip_ratio/low_min": 1.3244331057649106e-05, "clip_ratio/region_mean": 0.0014497526062768884, "epoch": 5.858309037900875, "grad_norm": 0.136127769947052, "learning_rate": 7.5e-07, "loss": -0.0034, "step": 572 }, { "clip_ratio/high_max": 0.001978742831852287, "clip_ratio/high_mean": 0.0007647985385119682, "clip_ratio/low_mean": 0.0007077274894982111, "clip_ratio/low_min": 3.733014818863012e-05, "clip_ratio/region_mean": 0.0014725260298291687, "epoch": 5.867638483965014, "grad_norm": 0.13378772139549255, "learning_rate": 7.5e-07, "loss": 0.0146, "step": 573 }, { "clip_ratio/high_max": 0.002071208924462553, "clip_ratio/high_mean": 0.0007814205291651888, "clip_ratio/low_mean": 0.0007307590985874413, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015121796241146512, "epoch": 5.876967930029155, "grad_norm": 0.14835326373577118, "learning_rate": 7.5e-07, "loss": -0.0382, "step": 574 }, { "clip_ratio/high_max": 0.002126071496604709, "clip_ratio/high_mean": 0.0007972416624397738, "clip_ratio/low_mean": 0.0006887358176754788, "clip_ratio/low_min": 1.5006002286099829e-05, "clip_ratio/region_mean": 0.0014859774964861572, "epoch": 5.886297376093294, "grad_norm": 0.1280345916748047, "learning_rate": 7.5e-07, "loss": -0.0179, "step": 575 }, { "clip_ratio/high_max": 0.002218440713477321, "clip_ratio/high_mean": 0.0008852649007167201, "clip_ratio/low_mean": 0.000641814743175928, "clip_ratio/low_min": 1.3371844033827074e-05, "clip_ratio/region_mean": 0.0015270796429831535, "epoch": 5.895626822157435, "grad_norm": 0.14455702900886536, "learning_rate": 7.5e-07, "loss": -0.0383, "step": 576 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0387137276785714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4058.0, "completions/mean_length": 711.6426391601562, "completions/mean_terminated_length": 575.3449096679688, "completions/min_length": 107.0, "completions/min_terminated_length": 107.0, "epoch": 6.0093294460641395, "grad_norm": 0.14437825977802277, "learning_rate": 7.5e-07, "loss": -0.035, "num_tokens": 346824377.0, "reward": 0.6278599500656128, "reward_std": 0.16975037753582, "rewards/simpleverify_reward/mean": 0.6278599500656128, "rewards/simpleverify_reward/std": 0.48339226841926575, "step": 577 }, { "clip_ratio/high_max": 0.0017327860623481683, "clip_ratio/high_mean": 0.0007291922520380467, "clip_ratio/low_mean": 0.0005461472364913789, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001275339505809825, "epoch": 6.01865889212828, "grad_norm": 0.14812950789928436, "learning_rate": 7.5e-07, "loss": -0.0245, "step": 578 }, { "clip_ratio/high_max": 0.0017225505180249456, "clip_ratio/high_mean": 0.0007148690547182923, "clip_ratio/low_mean": 0.00047223797628248576, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011871070273627993, "epoch": 6.0279883381924195, "grad_norm": 0.13332140445709229, "learning_rate": 7.5e-07, "loss": -0.0373, "step": 579 }, { "clip_ratio/high_max": 0.002061446852167137, "clip_ratio/high_mean": 0.0007735830968158552, "clip_ratio/low_mean": 0.0006678274912701454, "clip_ratio/low_min": 4.831096066482132e-05, "clip_ratio/region_mean": 0.0014414105935429689, "epoch": 6.03731778425656, "grad_norm": 0.14159540832042694, "learning_rate": 7.5e-07, "loss": -0.0005, "step": 580 }, { "clip_ratio/high_max": 0.0018177512829424813, "clip_ratio/high_mean": 0.0006948841619305313, "clip_ratio/low_mean": 0.0005009868777960946, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011958710383623838, "epoch": 6.0466472303206995, "grad_norm": 0.13179261982440948, "learning_rate": 7.5e-07, "loss": 0.0156, "step": 581 }, { "clip_ratio/high_max": 0.0020604070523404516, "clip_ratio/high_mean": 0.0008056612023210619, "clip_ratio/low_mean": 0.000624629507001373, "clip_ratio/low_min": 2.7813511223939713e-05, "clip_ratio/region_mean": 0.0014302907075034454, "epoch": 6.05597667638484, "grad_norm": 0.1463959813117981, "learning_rate": 7.5e-07, "loss": -0.0192, "step": 582 }, { "clip_ratio/high_max": 0.0016208306515181903, "clip_ratio/high_mean": 0.0006328521230898332, "clip_ratio/low_mean": 0.0006042219574737828, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012370740587357432, "epoch": 6.0653061224489795, "grad_norm": 0.1539170742034912, "learning_rate": 7.5e-07, "loss": 0.0393, "step": 583 }, { "clip_ratio/high_max": 0.00217858290852746, "clip_ratio/high_mean": 0.0007685676391702145, "clip_ratio/low_mean": 0.0006676905341009842, "clip_ratio/low_min": 2.4826216758810915e-05, "clip_ratio/region_mean": 0.0014362582078319974, "epoch": 6.07463556851312, "grad_norm": 0.1532258689403534, "learning_rate": 7.5e-07, "loss": 0.0212, "step": 584 }, { "clip_ratio/high_max": 0.0020631929219234735, "clip_ratio/high_mean": 0.0007434409881170723, "clip_ratio/low_mean": 0.0006320503143797396, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001375491305225296, "epoch": 6.0839650145772595, "grad_norm": 0.15775316953659058, "learning_rate": 7.5e-07, "loss": -0.015, "step": 585 }, { "clip_ratio/high_max": 0.0020389468845678493, "clip_ratio/high_mean": 0.0008200324573408579, "clip_ratio/low_mean": 0.0006819217669544742, "clip_ratio/low_min": 2.8988868507440202e-05, "clip_ratio/region_mean": 0.0015019542406662367, "epoch": 6.093294460641399, "grad_norm": 0.16255685687065125, "learning_rate": 7.5e-07, "loss": -0.0186, "step": 586 }, { "clip_ratio/high_max": 0.0018253019552503247, "clip_ratio/high_mean": 0.0007422212584060617, "clip_ratio/low_mean": 0.0006493696255347459, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013915908930357546, "epoch": 6.1026239067055394, "grad_norm": 0.14404422044754028, "learning_rate": 7.5e-07, "loss": -0.029, "step": 587 }, { "clip_ratio/high_max": 0.0019375060473976191, "clip_ratio/high_mean": 0.0007072245025483426, "clip_ratio/low_mean": 0.000722469563697814, "clip_ratio/low_min": 4.3851343434653245e-05, "clip_ratio/region_mean": 0.001429694049875252, "epoch": 6.111953352769679, "grad_norm": 0.14755015075206757, "learning_rate": 7.5e-07, "loss": 0.0245, "step": 588 }, { "clip_ratio/high_max": 0.0020318450333434157, "clip_ratio/high_mean": 0.0008408126213907963, "clip_ratio/low_mean": 0.0006301839093794115, "clip_ratio/low_min": 2.987578591273632e-05, "clip_ratio/region_mean": 0.0014709965216752607, "epoch": 6.121282798833819, "grad_norm": 0.14266759157180786, "learning_rate": 7.5e-07, "loss": -0.0148, "step": 589 }, { "clip_ratio/high_max": 0.001842397283326136, "clip_ratio/high_mean": 0.0008213844412239268, "clip_ratio/low_mean": 0.0007044906124065164, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015258749881468248, "epoch": 6.130612244897959, "grad_norm": 0.13525165617465973, "learning_rate": 7.5e-07, "loss": 0.0118, "step": 590 }, { "clip_ratio/high_max": 0.002040315368503798, "clip_ratio/high_mean": 0.0008640420692245243, "clip_ratio/low_mean": 0.0008129297129926272, "clip_ratio/low_min": 3.194691362295998e-05, "clip_ratio/region_mean": 0.0016769718058640137, "epoch": 6.139941690962099, "grad_norm": 0.15187829732894897, "learning_rate": 7.5e-07, "loss": 0.0113, "step": 591 }, { "clip_ratio/high_max": 0.002435842739942018, "clip_ratio/high_mean": 0.0009937034192262217, "clip_ratio/low_mean": 0.0005633812866108201, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015570846990158316, "epoch": 6.149271137026239, "grad_norm": 0.13633862137794495, "learning_rate": 7.5e-07, "loss": -0.0616, "step": 592 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.041224888392857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4070.0, "completions/mean_length": 717.737060546875, "completions/mean_terminated_length": 572.4802856445312, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 6.158600583090379, "grad_norm": 0.1618008315563202, "learning_rate": 7.5e-07, "loss": -0.013, "num_tokens": 356131143.0, "reward": 0.6270926594734192, "reward_std": 0.1608853042125702, "rewards/simpleverify_reward/mean": 0.6270926594734192, "rewards/simpleverify_reward/std": 0.4835946261882782, "step": 593 }, { "clip_ratio/high_max": 0.0016918488581723068, "clip_ratio/high_mean": 0.0006694685034744907, "clip_ratio/low_mean": 0.0004938773281537578, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011633458125288598, "epoch": 6.167930029154519, "grad_norm": 0.1327877789735794, "learning_rate": 7.5e-07, "loss": -0.004, "step": 594 }, { "clip_ratio/high_max": 0.0018197697208961472, "clip_ratio/high_mean": 0.000697474300977774, "clip_ratio/low_mean": 0.0004399873741931515, "clip_ratio/low_min": 2.088205837935675e-05, "clip_ratio/region_mean": 0.0011374616624379996, "epoch": 6.1772594752186585, "grad_norm": 0.14326748251914978, "learning_rate": 7.5e-07, "loss": 0.0139, "step": 595 }, { "clip_ratio/high_max": 0.00169919944164576, "clip_ratio/high_mean": 0.0007179354179243091, "clip_ratio/low_mean": 0.0004236135343944625, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011415489461796824, "epoch": 6.186588921282799, "grad_norm": 0.13177576661109924, "learning_rate": 7.5e-07, "loss": -0.0193, "step": 596 }, { "clip_ratio/high_max": 0.0016963764210231602, "clip_ratio/high_mean": 0.0006889189717185218, "clip_ratio/low_mean": 0.0005204114263506199, "clip_ratio/low_min": 2.262990255985642e-05, "clip_ratio/region_mean": 0.0012093304339941824, "epoch": 6.1959183673469385, "grad_norm": 0.1305747777223587, "learning_rate": 7.5e-07, "loss": -0.0084, "step": 597 }, { "clip_ratio/high_max": 0.0017902054823935032, "clip_ratio/high_mean": 0.0008011709614947904, "clip_ratio/low_mean": 0.0005540434303838992, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013552143791457638, "epoch": 6.205247813411079, "grad_norm": 0.1440616399049759, "learning_rate": 7.5e-07, "loss": -0.0241, "step": 598 }, { "clip_ratio/high_max": 0.002108467509970069, "clip_ratio/high_mean": 0.000838946290059539, "clip_ratio/low_mean": 0.0005850105017088936, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014239567426557187, "epoch": 6.214577259475218, "grad_norm": 0.14234045147895813, "learning_rate": 7.5e-07, "loss": -0.0437, "step": 599 }, { "clip_ratio/high_max": 0.002020923733653035, "clip_ratio/high_mean": 0.0008571628077334026, "clip_ratio/low_mean": 0.0005715294037145213, "clip_ratio/low_min": 2.7015345040126704e-05, "clip_ratio/region_mean": 0.0014286922014434822, "epoch": 6.223906705539359, "grad_norm": 0.14921830594539642, "learning_rate": 7.5e-07, "loss": -0.0378, "step": 600 }, { "clip_ratio/high_max": 0.0019420422322582453, "clip_ratio/high_mean": 0.0006686542765237391, "clip_ratio/low_mean": 0.0006705219084324199, "clip_ratio/low_min": 5.7539788031135686e-05, "clip_ratio/region_mean": 0.0013391761858656537, "epoch": 6.233236151603498, "grad_norm": 0.1587226539850235, "learning_rate": 7.5e-07, "loss": 0.017, "step": 601 }, { "clip_ratio/high_max": 0.0017153744338429533, "clip_ratio/high_mean": 0.0006641166655754205, "clip_ratio/low_mean": 0.0005222271302045556, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011863437939609867, "epoch": 6.242565597667639, "grad_norm": 0.12741662561893463, "learning_rate": 7.5e-07, "loss": -0.0124, "step": 602 }, { "clip_ratio/high_max": 0.0020537403142952826, "clip_ratio/high_mean": 0.0007519024384237127, "clip_ratio/low_mean": 0.0006348693550535245, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001386771778925322, "epoch": 6.251895043731778, "grad_norm": 0.14960499107837677, "learning_rate": 7.5e-07, "loss": -0.0151, "step": 603 }, { "clip_ratio/high_max": 0.001797732307750266, "clip_ratio/high_mean": 0.0007773291945341043, "clip_ratio/low_mean": 0.0006093860047258204, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013867152301827446, "epoch": 6.261224489795918, "grad_norm": 0.14696171879768372, "learning_rate": 7.5e-07, "loss": -0.039, "step": 604 }, { "clip_ratio/high_max": 0.0017433667162549682, "clip_ratio/high_mean": 0.000707363047695253, "clip_ratio/low_mean": 0.00070553730984102, "clip_ratio/low_min": 4.731191984319594e-05, "clip_ratio/region_mean": 0.0014129003247944638, "epoch": 6.270553935860058, "grad_norm": 0.1731572449207306, "learning_rate": 7.5e-07, "loss": -0.0113, "step": 605 }, { "clip_ratio/high_max": 0.0021051324147265404, "clip_ratio/high_mean": 0.0008289777470054105, "clip_ratio/low_mean": 0.0007024840379017405, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015314617958210874, "epoch": 6.279883381924198, "grad_norm": 0.1469292789697647, "learning_rate": 7.5e-07, "loss": -0.0349, "step": 606 }, { "clip_ratio/high_max": 0.001608591410331428, "clip_ratio/high_mean": 0.0006413010523829143, "clip_ratio/low_mean": 0.0007223418087960454, "clip_ratio/low_min": 2.6334832000429742e-05, "clip_ratio/region_mean": 0.0013636428557219915, "epoch": 6.289212827988338, "grad_norm": 0.15991683304309845, "learning_rate": 7.5e-07, "loss": 0.0189, "step": 607 }, { "clip_ratio/high_max": 0.002046465247985907, "clip_ratio/high_mean": 0.0007549554939032532, "clip_ratio/low_mean": 0.0006800533428759081, "clip_ratio/low_min": 1.1846095731016248e-05, "clip_ratio/region_mean": 0.0014350088458741084, "epoch": 6.298542274052478, "grad_norm": 0.14413142204284668, "learning_rate": 7.5e-07, "loss": 0.0062, "step": 608 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0468052455357143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4051.0, "completions/mean_length": 753.8692626953125, "completions/mean_terminated_length": 589.7587280273438, "completions/min_length": 122.0, "completions/min_terminated_length": 122.0, "epoch": 6.307871720116618, "grad_norm": 0.14510147273540497, "learning_rate": 7.5e-07, "loss": -0.0222, "num_tokens": 365617708.0, "reward": 0.6273019313812256, "reward_std": 0.17144402861595154, "rewards/simpleverify_reward/mean": 0.6273018717765808, "rewards/simpleverify_reward/std": 0.4835395812988281, "step": 609 }, { "clip_ratio/high_max": 0.0018875946261687204, "clip_ratio/high_mean": 0.0008046406583162025, "clip_ratio/low_mean": 0.0004819231571673299, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012865638527728152, "epoch": 6.317201166180758, "grad_norm": 0.13272511959075928, "learning_rate": 7.5e-07, "loss": -0.0549, "step": 610 }, { "clip_ratio/high_max": 0.0016824228077894077, "clip_ratio/high_mean": 0.0007127084354578983, "clip_ratio/low_mean": 0.0005097550802020123, "clip_ratio/low_min": 1.7519270841148682e-05, "clip_ratio/region_mean": 0.0012224635465827305, "epoch": 6.326530612244898, "grad_norm": 0.16274964809417725, "learning_rate": 7.5e-07, "loss": 0.0085, "step": 611 }, { "clip_ratio/high_max": 0.0017428358769393526, "clip_ratio/high_mean": 0.0007596647883474361, "clip_ratio/low_mean": 0.0005610150155916926, "clip_ratio/low_min": 4.7826699301367626e-05, "clip_ratio/region_mean": 0.0013206798466853797, "epoch": 6.335860058309038, "grad_norm": 0.13701848685741425, "learning_rate": 7.5e-07, "loss": -0.0414, "step": 612 }, { "clip_ratio/high_max": 0.0017871666750579607, "clip_ratio/high_mean": 0.0006746828539689886, "clip_ratio/low_mean": 0.0005811996879856451, "clip_ratio/low_min": 2.357496941840509e-05, "clip_ratio/region_mean": 0.0012558825401356444, "epoch": 6.345189504373177, "grad_norm": 0.13143177330493927, "learning_rate": 7.5e-07, "loss": -0.0053, "step": 613 }, { "clip_ratio/high_max": 0.0017042807048710529, "clip_ratio/high_mean": 0.0006790051520511042, "clip_ratio/low_mean": 0.0005468308254421572, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012258359602128621, "epoch": 6.354518950437318, "grad_norm": 0.14682742953300476, "learning_rate": 7.5e-07, "loss": -0.0356, "step": 614 }, { "clip_ratio/high_max": 0.0019391291934880428, "clip_ratio/high_mean": 0.0007530369148298632, "clip_ratio/low_mean": 0.0005582143542142148, "clip_ratio/low_min": 1.2680056897806935e-05, "clip_ratio/region_mean": 0.0013112512897350825, "epoch": 6.363848396501457, "grad_norm": 0.13666421175003052, "learning_rate": 7.5e-07, "loss": -0.0192, "step": 615 }, { "clip_ratio/high_max": 0.0018203500439994968, "clip_ratio/high_mean": 0.0007181194596341811, "clip_ratio/low_mean": 0.0006861815745651256, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014043010523892008, "epoch": 6.373177842565598, "grad_norm": 0.13893982768058777, "learning_rate": 7.5e-07, "loss": -0.0018, "step": 616 }, { "clip_ratio/high_max": 0.0019293480818305397, "clip_ratio/high_mean": 0.0007691737118875608, "clip_ratio/low_mean": 0.0007242798155857599, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014934535429347306, "epoch": 6.382507288629737, "grad_norm": 0.16041231155395508, "learning_rate": 7.5e-07, "loss": -0.0082, "step": 617 }, { "clip_ratio/high_max": 0.001849303844210226, "clip_ratio/high_mean": 0.000785755399192567, "clip_ratio/low_mean": 0.0006856367072032299, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014713921191287227, "epoch": 6.391836734693878, "grad_norm": 0.15051783621311188, "learning_rate": 7.5e-07, "loss": -0.0296, "step": 618 }, { "clip_ratio/high_max": 0.0020765578592545353, "clip_ratio/high_mean": 0.0008947478963818867, "clip_ratio/low_mean": 0.000678559226798825, "clip_ratio/low_min": 2.4314334950759076e-05, "clip_ratio/region_mean": 0.0015733071122667752, "epoch": 6.401166180758017, "grad_norm": 0.15413621068000793, "learning_rate": 7.5e-07, "loss": -0.0363, "step": 619 }, { "clip_ratio/high_max": 0.0018161916050303262, "clip_ratio/high_mean": 0.0008140516383718932, "clip_ratio/low_mean": 0.0008345760998054175, "clip_ratio/low_min": 5.140257053426467e-05, "clip_ratio/region_mean": 0.0016486277381773107, "epoch": 6.410495626822158, "grad_norm": 0.16109833121299744, "learning_rate": 7.5e-07, "loss": 0.0078, "step": 620 }, { "clip_ratio/high_max": 0.0018495932017685845, "clip_ratio/high_mean": 0.0007879464101279154, "clip_ratio/low_mean": 0.0007538515765190823, "clip_ratio/low_min": 6.252438743103994e-05, "clip_ratio/region_mean": 0.0015417979666381143, "epoch": 6.419825072886297, "grad_norm": 0.1499427706003189, "learning_rate": 7.5e-07, "loss": -0.0007, "step": 621 }, { "clip_ratio/high_max": 0.0016265619560726918, "clip_ratio/high_mean": 0.0006227475660125492, "clip_ratio/low_mean": 0.000756420282414183, "clip_ratio/low_min": 1.2235708709340543e-05, "clip_ratio/region_mean": 0.0013791678720735945, "epoch": 6.429154518950437, "grad_norm": 0.12825413048267365, "learning_rate": 7.5e-07, "loss": 0.0218, "step": 622 }, { "clip_ratio/high_max": 0.0019666071166284382, "clip_ratio/high_mean": 0.0008720048226678045, "clip_ratio/low_mean": 0.0008394651231355965, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001711469900328666, "epoch": 6.438483965014577, "grad_norm": 0.15256235003471375, "learning_rate": 7.5e-07, "loss": -0.0103, "step": 623 }, { "clip_ratio/high_max": 0.0023034227851894684, "clip_ratio/high_mean": 0.0009735247131175129, "clip_ratio/low_mean": 0.0006682452076347545, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016417699080193415, "epoch": 6.447813411078717, "grad_norm": 0.15692903101444244, "learning_rate": 7.5e-07, "loss": -0.0603, "step": 624 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0459681919642857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4024.0, "completions/mean_length": 739.6214599609375, "completions/mean_terminated_length": 577.9008178710938, "completions/min_length": 90.0, "completions/min_terminated_length": 90.0, "epoch": 6.457142857142857, "grad_norm": 0.15271349251270294, "learning_rate": 7.5e-07, "loss": -0.0078, "num_tokens": 374927217.0, "reward": 0.6380441188812256, "reward_std": 0.1630680412054062, "rewards/simpleverify_reward/mean": 0.6380440592765808, "rewards/simpleverify_reward/std": 0.4805828928947449, "step": 625 }, { "clip_ratio/high_max": 0.002015148915234022, "clip_ratio/high_mean": 0.0008268495403171983, "clip_ratio/low_mean": 0.00048292772908098414, "clip_ratio/low_min": 9.311680514656473e-06, "clip_ratio/region_mean": 0.0013097772825858556, "epoch": 6.466472303206997, "grad_norm": 0.16737298667430878, "learning_rate": 7.5e-07, "loss": -0.0519, "step": 626 }, { "clip_ratio/high_max": 0.001983954309253022, "clip_ratio/high_mean": 0.0007367752277787076, "clip_ratio/low_mean": 0.0005404947642091429, "clip_ratio/low_min": 3.197982550773304e-05, "clip_ratio/region_mean": 0.0012772699956258293, "epoch": 6.475801749271137, "grad_norm": 0.14042143523693085, "learning_rate": 7.5e-07, "loss": -0.001, "step": 627 }, { "clip_ratio/high_max": 0.0014868882426526397, "clip_ratio/high_mean": 0.0006278407490754034, "clip_ratio/low_mean": 0.0005912371661906946, "clip_ratio/low_min": 2.6702877221396193e-05, "clip_ratio/region_mean": 0.0012190779343654867, "epoch": 6.485131195335277, "grad_norm": 0.1444530487060547, "learning_rate": 7.5e-07, "loss": 0.0208, "step": 628 }, { "clip_ratio/high_max": 0.0020371876562421676, "clip_ratio/high_mean": 0.0008832373223413015, "clip_ratio/low_mean": 0.0005282158917907509, "clip_ratio/low_min": 1.577486182213761e-05, "clip_ratio/region_mean": 0.0014114532059466, "epoch": 6.494460641399417, "grad_norm": 0.15208545327186584, "learning_rate": 7.5e-07, "loss": -0.0043, "step": 629 }, { "clip_ratio/high_max": 0.0017727448903315235, "clip_ratio/high_mean": 0.0006916831616763375, "clip_ratio/low_mean": 0.00045784485064359615, "clip_ratio/low_min": 1.0435798685648479e-05, "clip_ratio/region_mean": 0.0011495280123199336, "epoch": 6.503790087463557, "grad_norm": 0.12699995934963226, "learning_rate": 7.5e-07, "loss": -0.0242, "step": 630 }, { "clip_ratio/high_max": 0.0018553467743913643, "clip_ratio/high_mean": 0.000768099936976796, "clip_ratio/low_mean": 0.0005642463802359998, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013323463354026899, "epoch": 6.513119533527696, "grad_norm": 0.14404644072055817, "learning_rate": 7.5e-07, "loss": 0.0251, "step": 631 }, { "clip_ratio/high_max": 0.0016864517419890035, "clip_ratio/high_mean": 0.0007302408912437386, "clip_ratio/low_mean": 0.0005873899731341226, "clip_ratio/low_min": 1.5858919141464867e-05, "clip_ratio/region_mean": 0.0013176308566471562, "epoch": 6.522448979591837, "grad_norm": 0.1489955335855484, "learning_rate": 7.5e-07, "loss": -0.0173, "step": 632 }, { "clip_ratio/high_max": 0.0018859826086554676, "clip_ratio/high_mean": 0.0008295598890981637, "clip_ratio/low_mean": 0.000745209990782314, "clip_ratio/low_min": 2.133105772372801e-05, "clip_ratio/region_mean": 0.001574769881699467, "epoch": 6.531778425655976, "grad_norm": 0.15356360375881195, "learning_rate": 7.5e-07, "loss": 0.0187, "step": 633 }, { "clip_ratio/high_max": 0.0017588610098755453, "clip_ratio/high_mean": 0.0007124961393856211, "clip_ratio/low_mean": 0.0005540206739169662, "clip_ratio/low_min": 1.2831041203753557e-05, "clip_ratio/region_mean": 0.00126651685059187, "epoch": 6.541107871720117, "grad_norm": 0.13636483252048492, "learning_rate": 7.5e-07, "loss": -0.029, "step": 634 }, { "clip_ratio/high_max": 0.0018659069173736498, "clip_ratio/high_mean": 0.0008191129436454503, "clip_ratio/low_mean": 0.0005369201926441747, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013560331244661938, "epoch": 6.550437317784256, "grad_norm": 0.13648338615894318, "learning_rate": 7.5e-07, "loss": -0.034, "step": 635 }, { "clip_ratio/high_max": 0.0022533312658197246, "clip_ratio/high_mean": 0.0008111231873044744, "clip_ratio/low_mean": 0.00059922245600319, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014103456451266538, "epoch": 6.559766763848397, "grad_norm": 823.8951416015625, "learning_rate": 7.5e-07, "loss": -0.013, "step": 636 }, { "clip_ratio/high_max": 0.0021348805021261796, "clip_ratio/high_mean": 0.0008478036725136917, "clip_ratio/low_mean": 0.0005656686362272012, "clip_ratio/low_min": 3.016292703250656e-05, "clip_ratio/region_mean": 0.0014134722987364512, "epoch": 6.569096209912536, "grad_norm": 0.1411423236131668, "learning_rate": 7.5e-07, "loss": -0.0244, "step": 637 }, { "clip_ratio/high_max": 0.0020692730686278082, "clip_ratio/high_mean": 0.0007561859729321441, "clip_ratio/low_mean": 0.0007480166568711866, "clip_ratio/low_min": 4.371939485281473e-05, "clip_ratio/region_mean": 0.00150420263162232, "epoch": 6.578425655976677, "grad_norm": 0.14228340983390808, "learning_rate": 7.5e-07, "loss": 0.0049, "step": 638 }, { "clip_ratio/high_max": 0.001970432189409621, "clip_ratio/high_mean": 0.000804665465693688, "clip_ratio/low_mean": 0.0007714284674875671, "clip_ratio/low_min": 7.276593350979965e-05, "clip_ratio/region_mean": 0.0015760939786559902, "epoch": 6.587755102040816, "grad_norm": 0.14025475084781647, "learning_rate": 7.5e-07, "loss": -0.0138, "step": 639 }, { "clip_ratio/high_max": 0.0022830161251476966, "clip_ratio/high_mean": 0.0008736449817661196, "clip_ratio/low_mean": 0.0006079675931687234, "clip_ratio/low_min": 1.74143224285217e-05, "clip_ratio/region_mean": 0.001481612602219684, "epoch": 6.597084548104956, "grad_norm": 0.1381550282239914, "learning_rate": 7.5e-07, "loss": -0.0126, "step": 640 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.04638671875, "completions/max_length": 4096.0, "completions/max_terminated_length": 3984.0, "completions/mean_length": 738.9176025390625, "completions/mean_terminated_length": 575.6185913085938, "completions/min_length": 103.0, "completions/min_terminated_length": 103.0, "epoch": 6.606413994169096, "grad_norm": 0.15690408647060394, "learning_rate": 7.5e-07, "loss": -0.0266, "num_tokens": 384263771.0, "reward": 0.6283482313156128, "reward_std": 0.16640058159828186, "rewards/simpleverify_reward/mean": 0.6283482313156128, "rewards/simpleverify_reward/std": 0.48326289653778076, "step": 641 }, { "clip_ratio/high_max": 0.00199780391267268, "clip_ratio/high_mean": 0.0008489883948641364, "clip_ratio/low_mean": 0.0004754136609790294, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001324402073805686, "epoch": 6.615743440233236, "grad_norm": 0.14223334193229675, "learning_rate": 7.5e-07, "loss": -0.0555, "step": 642 }, { "clip_ratio/high_max": 0.0017222353963006753, "clip_ratio/high_mean": 0.0006984553183428943, "clip_ratio/low_mean": 0.0005159888651178335, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001214444186189212, "epoch": 6.625072886297376, "grad_norm": 0.13556860387325287, "learning_rate": 7.5e-07, "loss": -0.0059, "step": 643 }, { "clip_ratio/high_max": 0.0020858568968833424, "clip_ratio/high_mean": 0.0007711522684985539, "clip_ratio/low_mean": 0.0004666265413106885, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012377788298181258, "epoch": 6.634402332361516, "grad_norm": 0.13091479241847992, "learning_rate": 7.5e-07, "loss": -0.048, "step": 644 }, { "clip_ratio/high_max": 0.0015320665515901055, "clip_ratio/high_mean": 0.0006400903730536811, "clip_ratio/low_mean": 0.0005496188387041911, "clip_ratio/low_min": 1.5439723938470706e-05, "clip_ratio/region_mean": 0.0011897092190338299, "epoch": 6.643731778425656, "grad_norm": 0.21802809834480286, "learning_rate": 7.5e-07, "loss": -0.0094, "step": 645 }, { "clip_ratio/high_max": 0.0016357160056941211, "clip_ratio/high_mean": 0.0006113899235060671, "clip_ratio/low_mean": 0.0006127896585894632, "clip_ratio/low_min": 1.3504753951565363e-05, "clip_ratio/region_mean": 0.0012241795884619933, "epoch": 6.653061224489796, "grad_norm": 0.11883927881717682, "learning_rate": 7.5e-07, "loss": 0.025, "step": 646 }, { "clip_ratio/high_max": 0.001807776225177804, "clip_ratio/high_mean": 0.0006397325978468871, "clip_ratio/low_mean": 0.0005864945433131652, "clip_ratio/low_min": 4.22753628299688e-05, "clip_ratio/region_mean": 0.0012262271484360099, "epoch": 6.662390670553936, "grad_norm": 0.15073582530021667, "learning_rate": 7.5e-07, "loss": 0.0107, "step": 647 }, { "clip_ratio/high_max": 0.0018039499773294665, "clip_ratio/high_mean": 0.0007175396876846207, "clip_ratio/low_mean": 0.0006909687381266849, "clip_ratio/low_min": 2.6477440769667737e-05, "clip_ratio/region_mean": 0.0014085083930694964, "epoch": 6.671720116618076, "grad_norm": 0.14415042102336884, "learning_rate": 7.5e-07, "loss": 0.0085, "step": 648 }, { "clip_ratio/high_max": 0.001875352900242433, "clip_ratio/high_mean": 0.0007955970449984306, "clip_ratio/low_mean": 0.0006346267787193938, "clip_ratio/low_min": 4.358437945484184e-05, "clip_ratio/region_mean": 0.001430223805073183, "epoch": 6.681049562682215, "grad_norm": 0.15524768829345703, "learning_rate": 7.5e-07, "loss": -0.0034, "step": 649 }, { "clip_ratio/high_max": 0.001972623693291098, "clip_ratio/high_mean": 0.0008767974723014049, "clip_ratio/low_mean": 0.0005820033838972449, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014588008634746075, "epoch": 6.690379008746356, "grad_norm": 0.1478559523820877, "learning_rate": 7.5e-07, "loss": -0.0252, "step": 650 }, { "clip_ratio/high_max": 0.0023275444182218052, "clip_ratio/high_mean": 0.0009320235185441561, "clip_ratio/low_mean": 0.0006440448632929474, "clip_ratio/low_min": 1.7954611394088715e-05, "clip_ratio/region_mean": 0.001576068374561146, "epoch": 6.699708454810495, "grad_norm": 0.15866731107234955, "learning_rate": 7.5e-07, "loss": -0.0195, "step": 651 }, { "clip_ratio/high_max": 0.0019237421583966352, "clip_ratio/high_mean": 0.0007637134149263147, "clip_ratio/low_mean": 0.0007567306347482372, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015204440423985943, "epoch": 6.709037900874636, "grad_norm": 0.15022048354148865, "learning_rate": 7.5e-07, "loss": 0.0309, "step": 652 }, { "clip_ratio/high_max": 0.0019663256316562183, "clip_ratio/high_mean": 0.000846671227918705, "clip_ratio/low_mean": 0.0006282518879743293, "clip_ratio/low_min": 5.3605825087288395e-05, "clip_ratio/region_mean": 0.0014749231268069707, "epoch": 6.718367346938775, "grad_norm": 0.143341064453125, "learning_rate": 7.5e-07, "loss": 0.0007, "step": 653 }, { "clip_ratio/high_max": 0.0020055451823282056, "clip_ratio/high_mean": 0.0008221649550250731, "clip_ratio/low_mean": 0.0007301433824977721, "clip_ratio/low_min": 4.7175078179861885e-05, "clip_ratio/region_mean": 0.0015523083275184035, "epoch": 6.727696793002916, "grad_norm": 0.16908293962478638, "learning_rate": 7.5e-07, "loss": -0.0282, "step": 654 }, { "clip_ratio/high_max": 0.0021823822680744343, "clip_ratio/high_mean": 0.0009132653394772206, "clip_ratio/low_mean": 0.0008196706294256728, "clip_ratio/low_min": 1.5451174476766028e-05, "clip_ratio/region_mean": 0.001732935994368745, "epoch": 6.737026239067055, "grad_norm": 0.15041415393352509, "learning_rate": 7.5e-07, "loss": -0.0148, "step": 655 }, { "clip_ratio/high_max": 0.0019198799673176836, "clip_ratio/high_mean": 0.0008361529507965315, "clip_ratio/low_mean": 0.0007851405389374122, "clip_ratio/low_min": 1.797526601876598e-05, "clip_ratio/region_mean": 0.001621293486095965, "epoch": 6.746355685131196, "grad_norm": 0.18422050774097443, "learning_rate": 7.5e-07, "loss": -0.0051, "step": 656 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.05029296875, "completions/max_length": 4096.0, "completions/max_terminated_length": 4085.0, "completions/mean_length": 762.7794799804688, "completions/mean_terminated_length": 586.2644653320312, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 6.755685131195335, "grad_norm": 0.138418048620224, "learning_rate": 7.5e-07, "loss": 0.0082, "num_tokens": 393682337.0, "reward": 0.6202567219734192, "reward_std": 0.15855827927589417, "rewards/simpleverify_reward/mean": 0.6202567219734192, "rewards/simpleverify_reward/std": 0.48533982038497925, "step": 657 }, { "clip_ratio/high_max": 0.0014160998944134917, "clip_ratio/high_mean": 0.000629902255241177, "clip_ratio/low_mean": 0.00041091716457231087, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010408194139017724, "epoch": 6.765014577259475, "grad_norm": 0.13166925311088562, "learning_rate": 7.5e-07, "loss": -0.0441, "step": 658 }, { "clip_ratio/high_max": 0.001755007600877434, "clip_ratio/high_mean": 0.0006864079587103333, "clip_ratio/low_mean": 0.0005357425034162588, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001222150429384783, "epoch": 6.774344023323615, "grad_norm": 0.1593075543642044, "learning_rate": 7.5e-07, "loss": 0.0005, "step": 659 }, { "clip_ratio/high_max": 0.0019473503016342875, "clip_ratio/high_mean": 0.0008216363639803603, "clip_ratio/low_mean": 0.000477546696401987, "clip_ratio/low_min": 1.2079628504579887e-05, "clip_ratio/region_mean": 0.0012991830590181053, "epoch": 6.783673469387755, "grad_norm": 0.5133810043334961, "learning_rate": 7.5e-07, "loss": -0.0171, "step": 660 }, { "clip_ratio/high_max": 0.0017149955911008874, "clip_ratio/high_mean": 0.0006838672661615419, "clip_ratio/low_mean": 0.0005126951864440343, "clip_ratio/low_min": 2.3773298380547203e-05, "clip_ratio/region_mean": 0.0011965624689764809, "epoch": 6.793002915451895, "grad_norm": 0.1488444209098816, "learning_rate": 7.5e-07, "loss": 0.0031, "step": 661 }, { "clip_ratio/high_max": 0.0018749155278783292, "clip_ratio/high_mean": 0.0006893877507536672, "clip_ratio/low_mean": 0.0005125875441081007, "clip_ratio/low_min": 1.5144172721193172e-05, "clip_ratio/region_mean": 0.0012019752757623792, "epoch": 6.802332361516035, "grad_norm": 0.155256450176239, "learning_rate": 7.5e-07, "loss": -0.0293, "step": 662 }, { "clip_ratio/high_max": 0.001850458596891258, "clip_ratio/high_mean": 0.0008022677930057398, "clip_ratio/low_mean": 0.0005855383988091489, "clip_ratio/low_min": 3.5328382182342466e-05, "clip_ratio/region_mean": 0.0013878062054573093, "epoch": 6.811661807580175, "grad_norm": 0.1462802290916443, "learning_rate": 7.5e-07, "loss": -0.0289, "step": 663 }, { "clip_ratio/high_max": 0.001861911550804507, "clip_ratio/high_mean": 0.0007257436136569595, "clip_ratio/low_mean": 0.0006115326023063972, "clip_ratio/low_min": 1.1386408914404456e-05, "clip_ratio/region_mean": 0.0013372762405197136, "epoch": 6.820991253644315, "grad_norm": 0.1548410803079605, "learning_rate": 7.5e-07, "loss": 0.0034, "step": 664 }, { "clip_ratio/high_max": 0.002215773427451495, "clip_ratio/high_mean": 0.0007448262895195512, "clip_ratio/low_mean": 0.0006522358853544574, "clip_ratio/low_min": 1.0383784683654085e-05, "clip_ratio/region_mean": 0.0013970621912449133, "epoch": 6.830320699708455, "grad_norm": 0.15187279880046844, "learning_rate": 7.5e-07, "loss": 0.0109, "step": 665 }, { "clip_ratio/high_max": 0.0016084734270407353, "clip_ratio/high_mean": 0.0006212003772816388, "clip_ratio/low_mean": 0.0005678457710018847, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011890461573784705, "epoch": 6.839650145772595, "grad_norm": 0.13286980986595154, "learning_rate": 7.5e-07, "loss": -0.0177, "step": 666 }, { "clip_ratio/high_max": 0.0017346014210488647, "clip_ratio/high_mean": 0.0006993096030782908, "clip_ratio/low_mean": 0.000636841887171613, "clip_ratio/low_min": 6.305697024799883e-05, "clip_ratio/region_mean": 0.0013361515011638403, "epoch": 6.848979591836734, "grad_norm": 0.13938280940055847, "learning_rate": 7.5e-07, "loss": -0.022, "step": 667 }, { "clip_ratio/high_max": 0.002252941922051832, "clip_ratio/high_mean": 0.0008800966952549061, "clip_ratio/low_mean": 0.0005893516467949667, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014694483452331042, "epoch": 6.858309037900875, "grad_norm": 0.18164406716823578, "learning_rate": 7.5e-07, "loss": -0.0678, "step": 668 }, { "clip_ratio/high_max": 0.0018807639426086098, "clip_ratio/high_mean": 0.0007167377734731417, "clip_ratio/low_mean": 0.0006782824402762344, "clip_ratio/low_min": 2.135656905011274e-05, "clip_ratio/region_mean": 0.0013950202373962384, "epoch": 6.867638483965014, "grad_norm": 0.1495630443096161, "learning_rate": 7.5e-07, "loss": -0.0226, "step": 669 }, { "clip_ratio/high_max": 0.0022174107216414995, "clip_ratio/high_mean": 0.0008660386411065701, "clip_ratio/low_mean": 0.0006772729302610969, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015433116059284657, "epoch": 6.876967930029155, "grad_norm": 0.14866295456886292, "learning_rate": 7.5e-07, "loss": -0.0339, "step": 670 }, { "clip_ratio/high_max": 0.0018651086757017765, "clip_ratio/high_mean": 0.000703249737398437, "clip_ratio/low_mean": 0.0006512499503514846, "clip_ratio/low_min": 1.4149875823932234e-05, "clip_ratio/region_mean": 0.0013544996800192166, "epoch": 6.886297376093294, "grad_norm": 0.1299654245376587, "learning_rate": 7.5e-07, "loss": -0.0102, "step": 671 }, { "clip_ratio/high_max": 0.0019114840360998642, "clip_ratio/high_mean": 0.0008322006706293905, "clip_ratio/low_mean": 0.0006434455408452777, "clip_ratio/low_min": 1.3133010725141503e-05, "clip_ratio/region_mean": 0.0014756461932847742, "epoch": 6.895626822157435, "grad_norm": 0.1514529436826706, "learning_rate": 7.5e-07, "loss": -0.0385, "step": 672 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.052943638392857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 4058.0, "completions/mean_length": 765.3746948242188, "completions/mean_terminated_length": 579.1814575195312, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 7.0093294460641395, "grad_norm": 0.16375640034675598, "learning_rate": 7.5e-07, "loss": -0.0185, "num_tokens": 402986428.0, "reward": 0.6353934407234192, "reward_std": 0.1652134209871292, "rewards/simpleverify_reward/mean": 0.6353934407234192, "rewards/simpleverify_reward/std": 0.48133644461631775, "step": 673 }, { "clip_ratio/high_max": 0.0017083152451959904, "clip_ratio/high_mean": 0.0006443929405577364, "clip_ratio/low_mean": 0.0005769078134107986, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012213007466925774, "epoch": 7.01865889212828, "grad_norm": 0.16694578528404236, "learning_rate": 7.5e-07, "loss": 0.0154, "step": 674 }, { "clip_ratio/high_max": 0.001891181593236979, "clip_ratio/high_mean": 0.0006942209729459137, "clip_ratio/low_mean": 0.0005020902717660647, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011963112447119784, "epoch": 7.0279883381924195, "grad_norm": 0.14960572123527527, "learning_rate": 7.5e-07, "loss": -0.0155, "step": 675 }, { "clip_ratio/high_max": 0.0017296887090196833, "clip_ratio/high_mean": 0.0007441574816766661, "clip_ratio/low_mean": 0.0005432257698885223, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012873832638433669, "epoch": 7.03731778425656, "grad_norm": 0.1398526132106781, "learning_rate": 7.5e-07, "loss": -0.022, "step": 676 }, { "clip_ratio/high_max": 0.00205533517873846, "clip_ratio/high_mean": 0.0008148067445290508, "clip_ratio/low_mean": 0.0005547845557885012, "clip_ratio/low_min": 6.973042036406696e-05, "clip_ratio/region_mean": 0.0013695913075935096, "epoch": 7.0466472303206995, "grad_norm": 0.15198364853858948, "learning_rate": 7.5e-07, "loss": -0.051, "step": 677 }, { "clip_ratio/high_max": 0.001663786493736552, "clip_ratio/high_mean": 0.0006342320593830664, "clip_ratio/low_mean": 0.0006293951482803095, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012636271894734818, "epoch": 7.05597667638484, "grad_norm": 0.15587592124938965, "learning_rate": 7.5e-07, "loss": 0.0091, "step": 678 }, { "clip_ratio/high_max": 0.0018258775162394159, "clip_ratio/high_mean": 0.0007446961608366109, "clip_ratio/low_mean": 0.0005999986988172168, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013446948687487748, "epoch": 7.0653061224489795, "grad_norm": 0.15402409434318542, "learning_rate": 7.5e-07, "loss": -0.0236, "step": 679 }, { "clip_ratio/high_max": 0.0018527431275288109, "clip_ratio/high_mean": 0.0007503867054765578, "clip_ratio/low_mean": 0.0005766164267697604, "clip_ratio/low_min": 2.1258503693388775e-05, "clip_ratio/region_mean": 0.0013270031340653077, "epoch": 7.07463556851312, "grad_norm": 0.16684487462043762, "learning_rate": 7.5e-07, "loss": -0.0106, "step": 680 }, { "clip_ratio/high_max": 0.002000240550842136, "clip_ratio/high_mean": 0.000764767530199606, "clip_ratio/low_mean": 0.0007455532668245723, "clip_ratio/low_min": 2.7782994948211126e-05, "clip_ratio/region_mean": 0.00151032079156721, "epoch": 7.0839650145772595, "grad_norm": 0.1435156613588333, "learning_rate": 7.5e-07, "loss": -0.0304, "step": 681 }, { "clip_ratio/high_max": 0.0019596883503254503, "clip_ratio/high_mean": 0.0008632284652776434, "clip_ratio/low_mean": 0.0006464695452450542, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015096980205271393, "epoch": 7.093294460641399, "grad_norm": 0.13767409324645996, "learning_rate": 7.5e-07, "loss": -0.0375, "step": 682 }, { "clip_ratio/high_max": 0.0017130150808952749, "clip_ratio/high_mean": 0.0007099612412275746, "clip_ratio/low_mean": 0.0007026775829217513, "clip_ratio/low_min": 2.4621945158287417e-05, "clip_ratio/region_mean": 0.0014126388414297253, "epoch": 7.1026239067055394, "grad_norm": 0.14880412817001343, "learning_rate": 7.5e-07, "loss": -0.0107, "step": 683 }, { "clip_ratio/high_max": 0.0018284792895428836, "clip_ratio/high_mean": 0.0008373051841772394, "clip_ratio/low_mean": 0.0006767833856429206, "clip_ratio/low_min": 5.06470532855019e-05, "clip_ratio/region_mean": 0.0015140885661821812, "epoch": 7.111953352769679, "grad_norm": 0.14368824660778046, "learning_rate": 7.5e-07, "loss": -0.0158, "step": 684 }, { "clip_ratio/high_max": 0.0019341823208378628, "clip_ratio/high_mean": 0.0007642787204531487, "clip_ratio/low_mean": 0.0007518287475249963, "clip_ratio/low_min": 6.446724910347257e-05, "clip_ratio/region_mean": 0.0015161074516072404, "epoch": 7.121282798833819, "grad_norm": 9.7994966506958, "learning_rate": 7.5e-07, "loss": 0.0048, "step": 685 }, { "clip_ratio/high_max": 0.0017872295902634505, "clip_ratio/high_mean": 0.0007939262441141182, "clip_ratio/low_mean": 0.0006774624107492855, "clip_ratio/low_min": 5.1282051572343335e-05, "clip_ratio/region_mean": 0.001471388663048856, "epoch": 7.130612244897959, "grad_norm": 0.15469302237033844, "learning_rate": 7.5e-07, "loss": -0.0516, "step": 686 }, { "clip_ratio/high_max": 0.0022342899355862755, "clip_ratio/high_mean": 0.0008694918869878165, "clip_ratio/low_mean": 0.0006951795603526989, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001564671503729187, "epoch": 7.139941690962099, "grad_norm": 0.1536482870578766, "learning_rate": 7.5e-07, "loss": -0.0068, "step": 687 }, { "clip_ratio/high_max": 0.002016991551499814, "clip_ratio/high_mean": 0.0007548785761173349, "clip_ratio/low_mean": 0.0008442215003015008, "clip_ratio/low_min": 3.502512845443562e-05, "clip_ratio/region_mean": 0.0015991000764188357, "epoch": 7.149271137026239, "grad_norm": 0.15686963498592377, "learning_rate": 7.5e-07, "loss": -0.0467, "step": 688 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0569893973214286, "completions/max_length": 4096.0, "completions/max_terminated_length": 4026.0, "completions/mean_length": 780.4641723632812, "completions/mean_terminated_length": 580.0948486328125, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 7.158600583090379, "grad_norm": 0.17618359625339508, "learning_rate": 7.5e-07, "loss": 0.007, "num_tokens": 412245298.0, "reward": 0.6285575032234192, "reward_std": 0.16242355108261108, "rewards/simpleverify_reward/mean": 0.6285575032234192, "rewards/simpleverify_reward/std": 0.48320725560188293, "step": 689 }, { "clip_ratio/high_max": 0.0018734315817710012, "clip_ratio/high_mean": 0.0007336403709814476, "clip_ratio/low_mean": 0.0004939794198435266, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012276197885512374, "epoch": 7.167930029154519, "grad_norm": 0.15015822649002075, "learning_rate": 7.5e-07, "loss": -0.0398, "step": 690 }, { "clip_ratio/high_max": 0.001737969776513637, "clip_ratio/high_mean": 0.0007338206005442771, "clip_ratio/low_mean": 0.0004884250552095182, "clip_ratio/low_min": 1.2555243301903829e-05, "clip_ratio/region_mean": 0.0012222456607560162, "epoch": 7.1772594752186585, "grad_norm": 0.14463281631469727, "learning_rate": 7.5e-07, "loss": -0.0182, "step": 691 }, { "clip_ratio/high_max": 0.0020437708371900953, "clip_ratio/high_mean": 0.0007418441982736113, "clip_ratio/low_mean": 0.0005057129860688292, "clip_ratio/low_min": 1.3703135664400179e-05, "clip_ratio/region_mean": 0.0012475572075345553, "epoch": 7.186588921282799, "grad_norm": 0.1534474641084671, "learning_rate": 7.5e-07, "loss": 0.0003, "step": 692 }, { "clip_ratio/high_max": 0.0016474945405207109, "clip_ratio/high_mean": 0.0006615494130528532, "clip_ratio/low_mean": 0.0005931766118010273, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001254726044862764, "epoch": 7.1959183673469385, "grad_norm": 0.14516086876392365, "learning_rate": 7.5e-07, "loss": 0.0205, "step": 693 }, { "clip_ratio/high_max": 0.0020229612673574593, "clip_ratio/high_mean": 0.0006957830501050921, "clip_ratio/low_mean": 0.0005038106596657599, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011995937275059987, "epoch": 7.205247813411079, "grad_norm": 0.14696240425109863, "learning_rate": 7.5e-07, "loss": 0.0123, "step": 694 }, { "clip_ratio/high_max": 0.0019726134778466076, "clip_ratio/high_mean": 0.0008632520839455537, "clip_ratio/low_mean": 0.0004956204411428189, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013588725196314044, "epoch": 7.214577259475218, "grad_norm": 0.14136524498462677, "learning_rate": 7.5e-07, "loss": -0.0471, "step": 695 }, { "clip_ratio/high_max": 0.0019800792724709027, "clip_ratio/high_mean": 0.000775204742240021, "clip_ratio/low_mean": 0.0005424208347903914, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013176255524740554, "epoch": 7.223906705539359, "grad_norm": 0.13511642813682556, "learning_rate": 7.5e-07, "loss": -0.037, "step": 696 }, { "clip_ratio/high_max": 0.0017812010046327487, "clip_ratio/high_mean": 0.0006582618580068811, "clip_ratio/low_mean": 0.0005314594627634506, "clip_ratio/low_min": 1.5883100786595605e-05, "clip_ratio/region_mean": 0.001189721286209533, "epoch": 7.233236151603498, "grad_norm": 0.14615724980831146, "learning_rate": 7.5e-07, "loss": 0.0185, "step": 697 }, { "clip_ratio/high_max": 0.001953574177605333, "clip_ratio/high_mean": 0.000822401335426548, "clip_ratio/low_mean": 0.00048457100524501584, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013069723499938846, "epoch": 7.242565597667639, "grad_norm": 0.14012150466442108, "learning_rate": 7.5e-07, "loss": -0.0497, "step": 698 }, { "clip_ratio/high_max": 0.0020731136683025397, "clip_ratio/high_mean": 0.0008497257513226941, "clip_ratio/low_mean": 0.0005310697542881826, "clip_ratio/low_min": 1.551253444631584e-05, "clip_ratio/region_mean": 0.0013807955183438025, "epoch": 7.251895043731778, "grad_norm": 0.15136414766311646, "learning_rate": 7.5e-07, "loss": -0.0543, "step": 699 }, { "clip_ratio/high_max": 0.0023403381928801537, "clip_ratio/high_mean": 0.0008208424096665112, "clip_ratio/low_mean": 0.0005804902757517993, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014013326144777238, "epoch": 7.261224489795918, "grad_norm": 0.17820538580417633, "learning_rate": 7.5e-07, "loss": -0.0313, "step": 700 }, { "clip_ratio/high_max": 0.0017218276334460825, "clip_ratio/high_mean": 0.0006934673947398551, "clip_ratio/low_mean": 0.0007235640405269805, "clip_ratio/low_min": 1.1115062989119906e-05, "clip_ratio/region_mean": 0.001417031446180772, "epoch": 7.270553935860058, "grad_norm": 0.15877608954906464, "learning_rate": 7.5e-07, "loss": 0.0352, "step": 701 }, { "clip_ratio/high_max": 0.0020181173349556047, "clip_ratio/high_mean": 0.0008187802623069729, "clip_ratio/low_mean": 0.0006845509087725077, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015033311647130176, "epoch": 7.279883381924198, "grad_norm": 0.1531139612197876, "learning_rate": 7.5e-07, "loss": -0.0289, "step": 702 }, { "clip_ratio/high_max": 0.0021943873216514476, "clip_ratio/high_mean": 0.0008225231449614512, "clip_ratio/low_mean": 0.0006715855433867546, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001494108721090015, "epoch": 7.289212827988338, "grad_norm": 0.15595756471157074, "learning_rate": 7.5e-07, "loss": -0.0267, "step": 703 }, { "clip_ratio/high_max": 0.002224099764134735, "clip_ratio/high_mean": 0.0008858158162183827, "clip_ratio/low_mean": 0.0005329153436832712, "clip_ratio/low_min": 8.940065526985563e-06, "clip_ratio/region_mean": 0.001418731171725085, "epoch": 7.298542274052478, "grad_norm": 0.14444921910762787, "learning_rate": 7.5e-07, "loss": -0.0545, "step": 704 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0531529017857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4066.0, "completions/mean_length": 762.4534912109375, "completions/mean_terminated_length": 575.3190307617188, "completions/min_length": 107.0, "completions/min_terminated_length": 107.0, "epoch": 7.307871720116618, "grad_norm": 0.16082683205604553, "learning_rate": 7.5e-07, "loss": -0.0052, "num_tokens": 421476631.0, "reward": 0.635323703289032, "reward_std": 0.16241860389709473, "rewards/simpleverify_reward/mean": 0.6353236436843872, "rewards/simpleverify_reward/std": 0.48135602474212646, "step": 705 }, { "clip_ratio/high_max": 0.00177922954026144, "clip_ratio/high_mean": 0.0006653426244156435, "clip_ratio/low_mean": 0.0004154925986767921, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010808352380990982, "epoch": 7.317201166180758, "grad_norm": 0.14120681583881378, "learning_rate": 7.5e-07, "loss": -0.0164, "step": 706 }, { "clip_ratio/high_max": 0.0019260492772446014, "clip_ratio/high_mean": 0.0007525825149059528, "clip_ratio/low_mean": 0.0004940942271787208, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012466767548175994, "epoch": 7.326530612244898, "grad_norm": 0.14259853959083557, "learning_rate": 7.5e-07, "loss": -0.0106, "step": 707 }, { "clip_ratio/high_max": 0.00180638357414864, "clip_ratio/high_mean": 0.0007026096454865183, "clip_ratio/low_mean": 0.0004570431337924674, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011596527747315122, "epoch": 7.335860058309038, "grad_norm": 0.19045647978782654, "learning_rate": 7.5e-07, "loss": -0.0087, "step": 708 }, { "clip_ratio/high_max": 0.0018552787914813962, "clip_ratio/high_mean": 0.0006998994449531892, "clip_ratio/low_mean": 0.0005821326863042486, "clip_ratio/low_min": 2.7342105568095576e-05, "clip_ratio/region_mean": 0.0012820321389881428, "epoch": 7.345189504373177, "grad_norm": 0.1780974417924881, "learning_rate": 7.5e-07, "loss": 0.0122, "step": 709 }, { "clip_ratio/high_max": 0.0018093337239406537, "clip_ratio/high_mean": 0.00060743105950678, "clip_ratio/low_mean": 0.0005936190418651677, "clip_ratio/low_min": 1.5504838302149437e-05, "clip_ratio/region_mean": 0.0012010500977339689, "epoch": 7.354518950437318, "grad_norm": 0.19587813317775726, "learning_rate": 7.5e-07, "loss": 0.0634, "step": 710 }, { "clip_ratio/high_max": 0.0017564270328875864, "clip_ratio/high_mean": 0.0007704717199885636, "clip_ratio/low_mean": 0.0004885444341198308, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012590161168191116, "epoch": 7.363848396501457, "grad_norm": 0.1423058956861496, "learning_rate": 7.5e-07, "loss": -0.0299, "step": 711 }, { "clip_ratio/high_max": 0.0020254886912880465, "clip_ratio/high_mean": 0.0008278020632133121, "clip_ratio/low_mean": 0.0005639527589664795, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013917548167228233, "epoch": 7.373177842565598, "grad_norm": 0.14101800322532654, "learning_rate": 7.5e-07, "loss": -0.0265, "step": 712 }, { "clip_ratio/high_max": 0.0019415199421928264, "clip_ratio/high_mean": 0.000778908117354149, "clip_ratio/low_mean": 0.00041596228948037606, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001194870397739578, "epoch": 7.382507288629737, "grad_norm": 0.18535375595092773, "learning_rate": 7.5e-07, "loss": -0.0492, "step": 713 }, { "clip_ratio/high_max": 0.0018777611257974058, "clip_ratio/high_mean": 0.0007497518872696673, "clip_ratio/low_mean": 0.0005861604295205325, "clip_ratio/low_min": 1.6391293684137054e-05, "clip_ratio/region_mean": 0.0013359123113332316, "epoch": 7.391836734693878, "grad_norm": 0.13208149373531342, "learning_rate": 7.5e-07, "loss": -0.0237, "step": 714 }, { "clip_ratio/high_max": 0.002036265064816689, "clip_ratio/high_mean": 0.0007922372287794133, "clip_ratio/low_mean": 0.0006292807356658159, "clip_ratio/low_min": 4.110490044695325e-05, "clip_ratio/region_mean": 0.0014215179871825967, "epoch": 7.401166180758017, "grad_norm": 0.13866929709911346, "learning_rate": 7.5e-07, "loss": -0.051, "step": 715 }, { "clip_ratio/high_max": 0.001988333486224292, "clip_ratio/high_mean": 0.0007550418304163031, "clip_ratio/low_mean": 0.0006623770577789401, "clip_ratio/low_min": 7.259140056703473e-05, "clip_ratio/region_mean": 0.0014174188836477697, "epoch": 7.410495626822158, "grad_norm": 0.19525648653507233, "learning_rate": 7.5e-07, "loss": -0.0278, "step": 716 }, { "clip_ratio/high_max": 0.0016699040861567482, "clip_ratio/high_mean": 0.0006739438558724942, "clip_ratio/low_mean": 0.0006032748206052929, "clip_ratio/low_min": 1.4824478057562374e-05, "clip_ratio/region_mean": 0.0012772186528309248, "epoch": 7.419825072886297, "grad_norm": 0.12868088483810425, "learning_rate": 7.5e-07, "loss": -0.0192, "step": 717 }, { "clip_ratio/high_max": 0.0018789810928865336, "clip_ratio/high_mean": 0.0007677504563616822, "clip_ratio/low_mean": 0.0006087646925152512, "clip_ratio/low_min": 1.6037978639360517e-05, "clip_ratio/region_mean": 0.0013765151597908698, "epoch": 7.429154518950437, "grad_norm": 0.1436985731124878, "learning_rate": 7.5e-07, "loss": -0.0075, "step": 718 }, { "clip_ratio/high_max": 0.0019554107493604533, "clip_ratio/high_mean": 0.0007810673287167447, "clip_ratio/low_mean": 0.0007209363102447242, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015020036298665218, "epoch": 7.438483965014577, "grad_norm": 0.14344625174999237, "learning_rate": 7.5e-07, "loss": 0.004, "step": 719 }, { "clip_ratio/high_max": 0.001938105298904702, "clip_ratio/high_mean": 0.0007839665267965756, "clip_ratio/low_mean": 0.0005927293104832643, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001376695803628536, "epoch": 7.447813411078717, "grad_norm": 0.15025757253170013, "learning_rate": 7.5e-07, "loss": -0.0457, "step": 720 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0567103794642857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4087.0, "completions/mean_length": 797.709228515625, "completions/mean_terminated_length": 599.4166259765625, "completions/min_length": 111.0, "completions/min_terminated_length": 111.0, "epoch": 7.457142857142857, "grad_norm": 0.16559194028377533, "learning_rate": 7.5e-07, "loss": -0.0415, "num_tokens": 431003110.0, "reward": 0.6295340657234192, "reward_std": 0.16759903728961945, "rewards/simpleverify_reward/mean": 0.6295340657234192, "rewards/simpleverify_reward/std": 0.48294639587402344, "step": 721 }, { "clip_ratio/high_max": 0.001704756636172533, "clip_ratio/high_mean": 0.0006873776983411517, "clip_ratio/low_mean": 0.0004975732945240452, "clip_ratio/low_min": 2.2049744075047784e-05, "clip_ratio/region_mean": 0.0011849510083266068, "epoch": 7.466472303206997, "grad_norm": 0.1656133085489273, "learning_rate": 7.5e-07, "loss": 0.0029, "step": 722 }, { "clip_ratio/high_max": 0.0016375585582864005, "clip_ratio/high_mean": 0.0007106672164809424, "clip_ratio/low_mean": 0.0005246617083685123, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001235328945767833, "epoch": 7.475801749271137, "grad_norm": 0.150642529129982, "learning_rate": 7.5e-07, "loss": -0.0071, "step": 723 }, { "clip_ratio/high_max": 0.0018736306628852617, "clip_ratio/high_mean": 0.0007382205658359453, "clip_ratio/low_mean": 0.00042551847582217306, "clip_ratio/low_min": 3.083562205574708e-05, "clip_ratio/region_mean": 0.0011637390525720548, "epoch": 7.485131195335277, "grad_norm": 0.15628425776958466, "learning_rate": 7.5e-07, "loss": -0.0447, "step": 724 }, { "clip_ratio/high_max": 0.002185415716667194, "clip_ratio/high_mean": 0.0009036489173013251, "clip_ratio/low_mean": 0.0005142241798239411, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014178730816638563, "epoch": 7.494460641399417, "grad_norm": 0.15522664785385132, "learning_rate": 7.5e-07, "loss": -0.0159, "step": 725 }, { "clip_ratio/high_max": 0.0020591233915183693, "clip_ratio/high_mean": 0.000779803409386659, "clip_ratio/low_mean": 0.00054254572296486, "clip_ratio/low_min": 2.1402644051704556e-05, "clip_ratio/region_mean": 0.0013223491405369714, "epoch": 7.503790087463557, "grad_norm": 0.14517955482006073, "learning_rate": 7.5e-07, "loss": -0.0446, "step": 726 }, { "clip_ratio/high_max": 0.0019552375524654053, "clip_ratio/high_mean": 0.0007647404909221223, "clip_ratio/low_mean": 0.0005286236273605027, "clip_ratio/low_min": 3.571428533177823e-05, "clip_ratio/region_mean": 0.0012933641592098866, "epoch": 7.513119533527696, "grad_norm": 0.14664947986602783, "learning_rate": 7.5e-07, "loss": -0.0259, "step": 727 }, { "clip_ratio/high_max": 0.001746623220242327, "clip_ratio/high_mean": 0.0006675476688542403, "clip_ratio/low_mean": 0.000709929541699239, "clip_ratio/low_min": 4.4170781620778143e-05, "clip_ratio/region_mean": 0.001377477245114278, "epoch": 7.522448979591837, "grad_norm": 0.14990726113319397, "learning_rate": 7.5e-07, "loss": 0.0614, "step": 728 }, { "clip_ratio/high_max": 0.0018837425814126618, "clip_ratio/high_mean": 0.0007174470720201498, "clip_ratio/low_mean": 0.0005649453132718918, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012823923862015363, "epoch": 7.531778425655976, "grad_norm": 0.1455548107624054, "learning_rate": 7.5e-07, "loss": -0.0193, "step": 729 }, { "clip_ratio/high_max": 0.0018609251746966038, "clip_ratio/high_mean": 0.0007084952594595961, "clip_ratio/low_mean": 0.0006627828934142599, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013712781401409302, "epoch": 7.541107871720117, "grad_norm": 0.15016871690750122, "learning_rate": 7.5e-07, "loss": 0.0246, "step": 730 }, { "clip_ratio/high_max": 0.001903753996884916, "clip_ratio/high_mean": 0.0008132198108796729, "clip_ratio/low_mean": 0.0005660908536810894, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013793106627417728, "epoch": 7.550437317784256, "grad_norm": 0.14749141037464142, "learning_rate": 7.5e-07, "loss": -0.0392, "step": 731 }, { "clip_ratio/high_max": 0.002173381028114818, "clip_ratio/high_mean": 0.0008399735361308558, "clip_ratio/low_mean": 0.000623801935944357, "clip_ratio/low_min": 2.7873677026946098e-05, "clip_ratio/region_mean": 0.0014637754793511704, "epoch": 7.559766763848397, "grad_norm": 0.15868736803531647, "learning_rate": 7.5e-07, "loss": -0.027, "step": 732 }, { "clip_ratio/high_max": 0.0018633372928889003, "clip_ratio/high_mean": 0.0007369629693130264, "clip_ratio/low_mean": 0.0006802293792134151, "clip_ratio/low_min": 1.1493196325318422e-05, "clip_ratio/region_mean": 0.0014171923794492614, "epoch": 7.569096209912536, "grad_norm": 0.13641317188739777, "learning_rate": 7.5e-07, "loss": -0.0082, "step": 733 }, { "clip_ratio/high_max": 0.0023375456876237877, "clip_ratio/high_mean": 0.0009331368873972679, "clip_ratio/low_mean": 0.0005287312733344152, "clip_ratio/low_min": 1.4437514437304344e-05, "clip_ratio/region_mean": 0.001461868167098146, "epoch": 7.578425655976677, "grad_norm": 0.15333192050457, "learning_rate": 7.5e-07, "loss": -0.0596, "step": 734 }, { "clip_ratio/high_max": 0.001837596260884311, "clip_ratio/high_mean": 0.00080898556370812, "clip_ratio/low_mean": 0.0007163467471400509, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015253322962962557, "epoch": 7.587755102040816, "grad_norm": 0.15254798531532288, "learning_rate": 7.5e-07, "loss": -0.016, "step": 735 }, { "clip_ratio/high_max": 0.0018546779720054474, "clip_ratio/high_mean": 0.0007660824485355988, "clip_ratio/low_mean": 0.0008212237426050706, "clip_ratio/low_min": 7.618250492669176e-05, "clip_ratio/region_mean": 0.00158730618932168, "epoch": 7.597084548104956, "grad_norm": 0.14929336309432983, "learning_rate": 7.5e-07, "loss": -0.0267, "step": 736 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.053989955357142905, "completions/max_length": 4096.0, "completions/max_terminated_length": 4079.0, "completions/mean_length": 766.4072875976562, "completions/mean_terminated_length": 576.3832397460938, "completions/min_length": 87.0, "completions/min_terminated_length": 87.0, "epoch": 7.606413994169096, "grad_norm": 0.17715825140476227, "learning_rate": 7.5e-07, "loss": -0.0259, "num_tokens": 440254156.0, "reward": 0.6339285969734192, "reward_std": 0.15910275280475616, "rewards/simpleverify_reward/mean": 0.6339285969734192, "rewards/simpleverify_reward/std": 0.481746107339859, "step": 737 }, { "clip_ratio/high_max": 0.001614579487068113, "clip_ratio/high_mean": 0.000590638968787971, "clip_ratio/low_mean": 0.0005109970074954617, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011016359749191906, "epoch": 7.615743440233236, "grad_norm": 0.1434967815876007, "learning_rate": 7.5e-07, "loss": -0.0215, "step": 738 }, { "clip_ratio/high_max": 0.0016930880628933664, "clip_ratio/high_mean": 0.0006966397777432576, "clip_ratio/low_mean": 0.0004709291283688799, "clip_ratio/low_min": 1.668932327447692e-05, "clip_ratio/region_mean": 0.0011675689056573901, "epoch": 7.625072886297376, "grad_norm": 0.21499468386173248, "learning_rate": 7.5e-07, "loss": -0.0582, "step": 739 }, { "clip_ratio/high_max": 0.001636965776924626, "clip_ratio/high_mean": 0.0006717963060509646, "clip_ratio/low_mean": 0.0006152405221655499, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001287036830035504, "epoch": 7.634402332361516, "grad_norm": 0.17928476631641388, "learning_rate": 7.5e-07, "loss": -0.0171, "step": 740 }, { "clip_ratio/high_max": 0.0018700750297284685, "clip_ratio/high_mean": 0.0007504797631554538, "clip_ratio/low_mean": 0.0005158741078048479, "clip_ratio/low_min": 1.4730143448105082e-05, "clip_ratio/region_mean": 0.001266353894607164, "epoch": 7.643731778425656, "grad_norm": 0.1491251438856125, "learning_rate": 7.5e-07, "loss": -0.0277, "step": 741 }, { "clip_ratio/high_max": 0.0016168165020644665, "clip_ratio/high_mean": 0.0006179574429552304, "clip_ratio/low_mean": 0.0005419353292381857, "clip_ratio/low_min": 2.4220458726631477e-05, "clip_ratio/region_mean": 0.0011598927667364478, "epoch": 7.653061224489796, "grad_norm": 0.1423831582069397, "learning_rate": 7.5e-07, "loss": 0.0072, "step": 742 }, { "clip_ratio/high_max": 0.001553223490191158, "clip_ratio/high_mean": 0.0006167005703900941, "clip_ratio/low_mean": 0.0005660462848027237, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001182746829726966, "epoch": 7.662390670553936, "grad_norm": 0.14778827130794525, "learning_rate": 7.5e-07, "loss": -0.019, "step": 743 }, { "clip_ratio/high_max": 0.0017078005002986174, "clip_ratio/high_mean": 0.0006650638561040978, "clip_ratio/low_mean": 0.0004538110733847134, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011188749631401151, "epoch": 7.671720116618076, "grad_norm": 0.15038929879665375, "learning_rate": 7.5e-07, "loss": -0.0647, "step": 744 }, { "clip_ratio/high_max": 0.0024271321381093003, "clip_ratio/high_mean": 0.0008930733747547492, "clip_ratio/low_mean": 0.000570828044146765, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014639014370914083, "epoch": 7.681049562682215, "grad_norm": 0.1501818597316742, "learning_rate": 7.5e-07, "loss": -0.0282, "step": 745 }, { "clip_ratio/high_max": 0.0016337671295332257, "clip_ratio/high_mean": 0.0006804577742514084, "clip_ratio/low_mean": 0.0008456128416582942, "clip_ratio/low_min": 3.223401108698454e-05, "clip_ratio/region_mean": 0.0015260706095432397, "epoch": 7.690379008746356, "grad_norm": 0.17827773094177246, "learning_rate": 7.5e-07, "loss": 0.0206, "step": 746 }, { "clip_ratio/high_max": 0.0019554562895791605, "clip_ratio/high_mean": 0.0007792215510562528, "clip_ratio/low_mean": 0.0006119230092735961, "clip_ratio/low_min": 1.4029180420038756e-05, "clip_ratio/region_mean": 0.0013911445494159125, "epoch": 7.699708454810495, "grad_norm": 0.15351438522338867, "learning_rate": 7.5e-07, "loss": -0.0081, "step": 747 }, { "clip_ratio/high_max": 0.001948348231962882, "clip_ratio/high_mean": 0.0008220163090300048, "clip_ratio/low_mean": 0.00064259464488714, "clip_ratio/low_min": 2.464272347424412e-05, "clip_ratio/region_mean": 0.001464610922994325, "epoch": 7.709037900874636, "grad_norm": 0.1654413342475891, "learning_rate": 7.5e-07, "loss": -0.0018, "step": 748 }, { "clip_ratio/high_max": 0.0020112146357860183, "clip_ratio/high_mean": 0.0007201080679806182, "clip_ratio/low_mean": 0.0006797881142119877, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013998961621837225, "epoch": 7.718367346938775, "grad_norm": 0.14220020174980164, "learning_rate": 7.5e-07, "loss": -0.0181, "step": 749 }, { "clip_ratio/high_max": 0.0018569256535556633, "clip_ratio/high_mean": 0.0007327942266783793, "clip_ratio/low_mean": 0.0006818052952439757, "clip_ratio/low_min": 2.0345052689663135e-05, "clip_ratio/region_mean": 0.0014145995119179133, "epoch": 7.727696793002916, "grad_norm": 0.16031725704669952, "learning_rate": 7.5e-07, "loss": -0.0015, "step": 750 }, { "clip_ratio/high_max": 0.0019113019698124845, "clip_ratio/high_mean": 0.0007803648786648409, "clip_ratio/low_mean": 0.0007747162399027729, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001555081114929635, "epoch": 7.737026239067055, "grad_norm": 0.1839870810508728, "learning_rate": 7.5e-07, "loss": -0.0019, "step": 751 }, { "clip_ratio/high_max": 0.002537401029258035, "clip_ratio/high_mean": 0.000934531675738981, "clip_ratio/low_mean": 0.0007420130905302358, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016765447726356797, "epoch": 7.746355685131196, "grad_norm": 0.13780714571475983, "learning_rate": 7.5e-07, "loss": -0.033, "step": 752 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.051967075892857095, "completions/max_length": 4096.0, "completions/max_terminated_length": 3988.0, "completions/mean_length": 751.0382080078125, "completions/mean_terminated_length": 567.6817626953125, "completions/min_length": 103.0, "completions/min_terminated_length": 103.0, "epoch": 7.755685131195335, "grad_norm": 0.17398518323898315, "learning_rate": 7.5e-07, "loss": -0.001, "num_tokens": 449414063.0, "reward": 0.64453125, "reward_std": 0.1460321992635727, "rewards/simpleverify_reward/mean": 0.64453125, "rewards/simpleverify_reward/std": 0.4786717891693115, "step": 753 }, { "clip_ratio/high_max": 0.0018104156260960735, "clip_ratio/high_mean": 0.0007695699496252928, "clip_ratio/low_mean": 0.0004762973403558135, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012458672717912123, "epoch": 7.765014577259475, "grad_norm": 0.15931761264801025, "learning_rate": 7.5e-07, "loss": -0.0655, "step": 754 }, { "clip_ratio/high_max": 0.0020879170479020104, "clip_ratio/high_mean": 0.0007365204892266775, "clip_ratio/low_mean": 0.000434937353929854, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011714578213286586, "epoch": 7.774344023323615, "grad_norm": 0.14202260971069336, "learning_rate": 7.5e-07, "loss": -0.0271, "step": 755 }, { "clip_ratio/high_max": 0.0019057986282859929, "clip_ratio/high_mean": 0.0007716069694652106, "clip_ratio/low_mean": 0.0004166129896248094, "clip_ratio/low_min": 1.716090082481969e-05, "clip_ratio/region_mean": 0.0011882199687534012, "epoch": 7.783673469387755, "grad_norm": 0.14127841591835022, "learning_rate": 7.5e-07, "loss": -0.034, "step": 756 }, { "clip_ratio/high_max": 0.00171403176500462, "clip_ratio/high_mean": 0.0006302779620455112, "clip_ratio/low_mean": 0.000498042308663571, "clip_ratio/low_min": 1.300457734032534e-05, "clip_ratio/region_mean": 0.0011283202529739356, "epoch": 7.793002915451895, "grad_norm": 0.14400330185890198, "learning_rate": 7.5e-07, "loss": -0.0431, "step": 757 }, { "clip_ratio/high_max": 0.0018229440538561903, "clip_ratio/high_mean": 0.0006566461161128245, "clip_ratio/low_mean": 0.0005162627085155691, "clip_ratio/low_min": 1.1020983947673813e-05, "clip_ratio/region_mean": 0.0011729088037100155, "epoch": 7.802332361516035, "grad_norm": 0.14935310184955597, "learning_rate": 7.5e-07, "loss": 0.0049, "step": 758 }, { "clip_ratio/high_max": 0.002010187912674155, "clip_ratio/high_mean": 0.000676978603223688, "clip_ratio/low_mean": 0.00045649609546671854, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011334746741340496, "epoch": 7.811661807580175, "grad_norm": 0.15381397306919098, "learning_rate": 7.5e-07, "loss": -0.032, "step": 759 }, { "clip_ratio/high_max": 0.0019156305497745052, "clip_ratio/high_mean": 0.0007434338549501263, "clip_ratio/low_mean": 0.00045166136260377243, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011950952211918775, "epoch": 7.820991253644315, "grad_norm": 0.14693649113178253, "learning_rate": 7.5e-07, "loss": -0.0238, "step": 760 }, { "clip_ratio/high_max": 0.001549379278003471, "clip_ratio/high_mean": 0.0006427169214475725, "clip_ratio/low_mean": 0.0004984645961485512, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011411815539759118, "epoch": 7.830320699708455, "grad_norm": 0.13279041647911072, "learning_rate": 7.5e-07, "loss": -0.0005, "step": 761 }, { "clip_ratio/high_max": 0.001737942460749764, "clip_ratio/high_mean": 0.0006784697052353295, "clip_ratio/low_mean": 0.0005636318619508529, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012421015708241612, "epoch": 7.839650145772595, "grad_norm": 0.1417316198348999, "learning_rate": 7.5e-07, "loss": -0.0133, "step": 762 }, { "clip_ratio/high_max": 0.001817911343096057, "clip_ratio/high_mean": 0.0007369960967480438, "clip_ratio/low_mean": 0.0004927475456497632, "clip_ratio/low_min": 1.0548523277975619e-05, "clip_ratio/region_mean": 0.0012297436296648812, "epoch": 7.848979591836734, "grad_norm": 0.20056122541427612, "learning_rate": 7.5e-07, "loss": -0.0214, "step": 763 }, { "clip_ratio/high_max": 0.0020768209360539913, "clip_ratio/high_mean": 0.0007816545930836583, "clip_ratio/low_mean": 0.0005607932253042236, "clip_ratio/low_min": 2.49351687671151e-05, "clip_ratio/region_mean": 0.0013424478274828289, "epoch": 7.858309037900875, "grad_norm": 0.14772675931453705, "learning_rate": 7.5e-07, "loss": -0.0467, "step": 764 }, { "clip_ratio/high_max": 0.0018844616351998411, "clip_ratio/high_mean": 0.0007595038168801693, "clip_ratio/low_mean": 0.0005605583000942715, "clip_ratio/low_min": 5.0010894483421e-05, "clip_ratio/region_mean": 0.0013200621506257448, "epoch": 7.867638483965014, "grad_norm": 0.13467906415462494, "learning_rate": 7.5e-07, "loss": -0.0037, "step": 765 }, { "clip_ratio/high_max": 0.002230770347523503, "clip_ratio/high_mean": 0.0008236243193096016, "clip_ratio/low_mean": 0.0005840859485033434, "clip_ratio/low_min": 1.0240864867228083e-05, "clip_ratio/region_mean": 0.0014077102532610297, "epoch": 7.876967930029155, "grad_norm": 0.14033855497837067, "learning_rate": 7.5e-07, "loss": 0.0071, "step": 766 }, { "clip_ratio/high_max": 0.001689575186901493, "clip_ratio/high_mean": 0.0006765262332919519, "clip_ratio/low_mean": 0.0005827221407344041, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012592483508342411, "epoch": 7.886297376093294, "grad_norm": 0.1747748851776123, "learning_rate": 7.5e-07, "loss": -0.0267, "step": 767 }, { "clip_ratio/high_max": 0.0017525040639156941, "clip_ratio/high_mean": 0.0006861169604235329, "clip_ratio/low_mean": 0.0007035520684439689, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001389669043419417, "epoch": 7.895626822157435, "grad_norm": 0.18817906081676483, "learning_rate": 7.5e-07, "loss": 0.0289, "step": 768 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0625697544642857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4066.0, "completions/mean_length": 802.5285034179688, "completions/mean_terminated_length": 582.7023315429688, "completions/min_length": 121.0, "completions/min_terminated_length": 121.0, "epoch": 8.00932944606414, "grad_norm": 0.17451567947864532, "learning_rate": 7.5e-07, "loss": -0.0306, "num_tokens": 458677959.0, "reward": 0.6341378688812256, "reward_std": 0.15605799853801727, "rewards/simpleverify_reward/mean": 0.6341378092765808, "rewards/simpleverify_reward/std": 0.48168787360191345, "step": 769 }, { "clip_ratio/high_max": 0.0015648513654014096, "clip_ratio/high_mean": 0.0006440864053729456, "clip_ratio/low_mean": 0.00044257842819206417, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001086664811737137, "epoch": 8.018658892128279, "grad_norm": 0.13310597836971283, "learning_rate": 7.5e-07, "loss": -0.0103, "step": 770 }, { "clip_ratio/high_max": 0.0017713887646095827, "clip_ratio/high_mean": 0.0007171302759161335, "clip_ratio/low_mean": 0.000468477327558503, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011856076198455412, "epoch": 8.02798833819242, "grad_norm": 0.15243253111839294, "learning_rate": 7.5e-07, "loss": -0.0298, "step": 771 }, { "clip_ratio/high_max": 0.0019838565713143907, "clip_ratio/high_mean": 0.0007644477464054944, "clip_ratio/low_mean": 0.0004785212613569456, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012429689995769877, "epoch": 8.03731778425656, "grad_norm": 0.14485971629619598, "learning_rate": 7.5e-07, "loss": -0.0596, "step": 772 }, { "clip_ratio/high_max": 0.0017684130907582585, "clip_ratio/high_mean": 0.0006038401224941481, "clip_ratio/low_mean": 0.00038379503712349106, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009876351505226921, "epoch": 8.0466472303207, "grad_norm": 0.1470165252685547, "learning_rate": 7.5e-07, "loss": -0.0181, "step": 773 }, { "clip_ratio/high_max": 0.0018451346186338924, "clip_ratio/high_mean": 0.0007216728772618808, "clip_ratio/low_mean": 0.0005221866103966022, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001243859478563536, "epoch": 8.055976676384839, "grad_norm": 0.1546369343996048, "learning_rate": 7.5e-07, "loss": -0.0119, "step": 774 }, { "clip_ratio/high_max": 0.0016357618114852812, "clip_ratio/high_mean": 0.0007382598960248288, "clip_ratio/low_mean": 0.0005440925425546084, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012823524702980649, "epoch": 8.06530612244898, "grad_norm": 0.15702003240585327, "learning_rate": 7.5e-07, "loss": -0.0321, "step": 775 }, { "clip_ratio/high_max": 0.0017328190770058427, "clip_ratio/high_mean": 0.0006745382988810888, "clip_ratio/low_mean": 0.000586364478294854, "clip_ratio/low_min": 9.300595593231265e-06, "clip_ratio/region_mean": 0.0012609027580765542, "epoch": 8.07463556851312, "grad_norm": 0.14636313915252686, "learning_rate": 7.5e-07, "loss": -0.0303, "step": 776 }, { "clip_ratio/high_max": 0.001797206172341248, "clip_ratio/high_mean": 0.0006664671136604738, "clip_ratio/low_mean": 0.0006327585015242221, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012992255979042966, "epoch": 8.08396501457726, "grad_norm": 0.2030400186777115, "learning_rate": 7.5e-07, "loss": 0.0004, "step": 777 }, { "clip_ratio/high_max": 0.0018739380102488212, "clip_ratio/high_mean": 0.0006462712535721948, "clip_ratio/low_mean": 0.0005620669035124592, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012083381443517283, "epoch": 8.093294460641399, "grad_norm": 0.14616705477237701, "learning_rate": 7.5e-07, "loss": 0.01, "step": 778 }, { "clip_ratio/high_max": 0.002301622516824864, "clip_ratio/high_mean": 0.0008078222708718386, "clip_ratio/low_mean": 0.000529087213180901, "clip_ratio/low_min": 2.551020406826865e-05, "clip_ratio/region_mean": 0.0013369094667723402, "epoch": 8.102623906705539, "grad_norm": 0.19839352369308472, "learning_rate": 7.5e-07, "loss": -0.0275, "step": 779 }, { "clip_ratio/high_max": 0.001517340999271255, "clip_ratio/high_mean": 0.0006670945113000926, "clip_ratio/low_mean": 0.0006441478701617598, "clip_ratio/low_min": 3.888204082613811e-05, "clip_ratio/region_mean": 0.0013112423839629628, "epoch": 8.11195335276968, "grad_norm": 2.4264001846313477, "learning_rate": 7.5e-07, "loss": -0.0284, "step": 780 }, { "clip_ratio/high_max": 0.0017626602730160812, "clip_ratio/high_mean": 0.0007068235381666454, "clip_ratio/low_mean": 0.0007253620060510002, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014321855196612887, "epoch": 8.12128279883382, "grad_norm": 0.1815481185913086, "learning_rate": 7.5e-07, "loss": 0.0059, "step": 781 }, { "clip_ratio/high_max": 0.0023720296485407744, "clip_ratio/high_mean": 0.0008491657372360351, "clip_ratio/low_mean": 0.0005541971695492975, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014033629486220889, "epoch": 8.130612244897959, "grad_norm": 0.2260289490222931, "learning_rate": 7.5e-07, "loss": -0.047, "step": 782 }, { "clip_ratio/high_max": 0.0016023914649849758, "clip_ratio/high_mean": 0.0006392403174686478, "clip_ratio/low_mean": 0.000604383552854415, "clip_ratio/low_min": 1.569563028169796e-05, "clip_ratio/region_mean": 0.0012436238721420523, "epoch": 8.139941690962099, "grad_norm": 0.15404430031776428, "learning_rate": 7.5e-07, "loss": -0.0132, "step": 783 }, { "clip_ratio/high_max": 0.00212355478288373, "clip_ratio/high_mean": 0.0008419377154496033, "clip_ratio/low_mean": 0.0006796614925406175, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015215992571029346, "epoch": 8.14927113702624, "grad_norm": 0.17326971888542175, "learning_rate": 7.5e-07, "loss": -0.0295, "step": 784 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0599888392857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3968.0, "completions/mean_length": 786.262939453125, "completions/mean_terminated_length": 575.0448608398438, "completions/min_length": 90.0, "completions/min_terminated_length": 90.0, "epoch": 8.15860058309038, "grad_norm": 0.1684148609638214, "learning_rate": 7.5e-07, "loss": -0.0129, "num_tokens": 467894336.0, "reward": 0.6417410969734192, "reward_std": 0.1536712348461151, "rewards/simpleverify_reward/mean": 0.6417410969734192, "rewards/simpleverify_reward/std": 0.4795054495334625, "step": 785 }, { "clip_ratio/high_max": 0.0019447706217761151, "clip_ratio/high_mean": 0.000765609282098012, "clip_ratio/low_mean": 0.0004662471715164429, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012318564586166758, "epoch": 8.167930029154519, "grad_norm": 0.17987744510173798, "learning_rate": 7.5e-07, "loss": -0.0178, "step": 786 }, { "clip_ratio/high_max": 0.001699012082099216, "clip_ratio/high_mean": 0.0006419972887670156, "clip_ratio/low_mean": 0.0004877512883467716, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011297485689283349, "epoch": 8.177259475218658, "grad_norm": 0.8651576638221741, "learning_rate": 7.5e-07, "loss": 0.0181, "step": 787 }, { "clip_ratio/high_max": 0.0016650943653075956, "clip_ratio/high_mean": 0.0006624285524594598, "clip_ratio/low_mean": 0.0004203844023322745, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010828129707078915, "epoch": 8.186588921282798, "grad_norm": 0.14004600048065186, "learning_rate": 7.5e-07, "loss": -0.0456, "step": 788 }, { "clip_ratio/high_max": 0.0017802742913772818, "clip_ratio/high_mean": 0.000626418490355718, "clip_ratio/low_mean": 0.0004407494807310286, "clip_ratio/low_min": 2.18990880966885e-05, "clip_ratio/region_mean": 0.0010671679956431035, "epoch": 8.19591836734694, "grad_norm": 0.15608137845993042, "learning_rate": 7.5e-07, "loss": -0.0235, "step": 789 }, { "clip_ratio/high_max": 0.0014472541406576056, "clip_ratio/high_mean": 0.0005924658325966448, "clip_ratio/low_mean": 0.0005715928564313799, "clip_ratio/low_min": 1.3403388038568664e-05, "clip_ratio/region_mean": 0.0011640586744761094, "epoch": 8.205247813411079, "grad_norm": 0.13819986581802368, "learning_rate": 7.5e-07, "loss": -0.0186, "step": 790 }, { "clip_ratio/high_max": 0.0019574796315282583, "clip_ratio/high_mean": 0.0007486705035262275, "clip_ratio/low_mean": 0.0004959335137755261, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001244603994564386, "epoch": 8.214577259475218, "grad_norm": 0.43259745836257935, "learning_rate": 7.5e-07, "loss": -0.0256, "step": 791 }, { "clip_ratio/high_max": 0.0017514266110083554, "clip_ratio/high_mean": 0.0007337903734878637, "clip_ratio/low_mean": 0.0005690697635145625, "clip_ratio/low_min": 3.8926089473534375e-05, "clip_ratio/region_mean": 0.0013028601279074792, "epoch": 8.223906705539358, "grad_norm": 0.14517711102962494, "learning_rate": 7.5e-07, "loss": -0.0375, "step": 792 }, { "clip_ratio/high_max": 0.0021276640181895345, "clip_ratio/high_mean": 0.0008273907278635306, "clip_ratio/low_mean": 0.0005789343076685327, "clip_ratio/low_min": 1.2583048373926431e-05, "clip_ratio/region_mean": 0.001406325027346611, "epoch": 8.2332361516035, "grad_norm": 0.14928658306598663, "learning_rate": 7.5e-07, "loss": -0.0273, "step": 793 }, { "clip_ratio/high_max": 0.00195116595205036, "clip_ratio/high_mean": 0.0007684267075092066, "clip_ratio/low_mean": 0.0005939095026405994, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001362336210149806, "epoch": 8.242565597667639, "grad_norm": 0.15543316304683685, "learning_rate": 7.5e-07, "loss": -0.024, "step": 794 }, { "clip_ratio/high_max": 0.0019697198513313197, "clip_ratio/high_mean": 0.0008164256341842702, "clip_ratio/low_mean": 0.000611602625440355, "clip_ratio/low_min": 6.608116927964147e-05, "clip_ratio/region_mean": 0.0014280282448453363, "epoch": 8.251895043731778, "grad_norm": 0.1902627795934677, "learning_rate": 7.5e-07, "loss": -0.018, "step": 795 }, { "clip_ratio/high_max": 0.0019133175519527867, "clip_ratio/high_mean": 0.0007679554782953346, "clip_ratio/low_mean": 0.0005890623569939635, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013570178125519305, "epoch": 8.261224489795918, "grad_norm": 0.2150283306837082, "learning_rate": 7.5e-07, "loss": -0.0554, "step": 796 }, { "clip_ratio/high_max": 0.0018442540713294875, "clip_ratio/high_mean": 0.000706298013938067, "clip_ratio/low_mean": 0.0006965526672502165, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001402850688464241, "epoch": 8.270553935860057, "grad_norm": 0.25014835596084595, "learning_rate": 7.5e-07, "loss": 0.0343, "step": 797 }, { "clip_ratio/high_max": 0.002031430591159733, "clip_ratio/high_mean": 0.0007468760504707461, "clip_ratio/low_mean": 0.0006659616319666384, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014128376824373845, "epoch": 8.279883381924199, "grad_norm": 0.16120575368404388, "learning_rate": 7.5e-07, "loss": -0.0162, "step": 798 }, { "clip_ratio/high_max": 0.00224413386604283, "clip_ratio/high_mean": 0.0009507588365522679, "clip_ratio/low_mean": 0.0005185753498153645, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014693342127429787, "epoch": 8.289212827988338, "grad_norm": 0.15893661975860596, "learning_rate": 7.5e-07, "loss": -0.0729, "step": 799 }, { "clip_ratio/high_max": 0.0020159643136139493, "clip_ratio/high_mean": 0.0008079562785496819, "clip_ratio/low_mean": 0.0006116634049249114, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014196196716511622, "epoch": 8.298542274052478, "grad_norm": 0.15004783868789673, "learning_rate": 7.5e-07, "loss": -0.0394, "step": 800 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0601981026785714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4031.0, "completions/mean_length": 778.435302734375, "completions/mean_terminated_length": 565.9318237304688, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 8.307871720116617, "grad_norm": 0.19055113196372986, "learning_rate": 7.5e-07, "loss": -0.0043, "num_tokens": 476923352.0, "reward": 0.6575056314468384, "reward_std": 0.1572684645652771, "rewards/simpleverify_reward/mean": 0.6575055718421936, "rewards/simpleverify_reward/std": 0.47456052899360657, "step": 801 }, { "clip_ratio/high_max": 0.0016763532476034015, "clip_ratio/high_mean": 0.0006925877278263215, "clip_ratio/low_mean": 0.0005386040747907828, "clip_ratio/low_min": 1.5367591913673095e-05, "clip_ratio/region_mean": 0.0012311917962506413, "epoch": 8.317201166180759, "grad_norm": 0.14033429324626923, "learning_rate": 7.5e-07, "loss": -0.0142, "step": 802 }, { "clip_ratio/high_max": 0.001554703361762222, "clip_ratio/high_mean": 0.0006111730053817155, "clip_ratio/low_mean": 0.0005029977710364619, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011141707618662622, "epoch": 8.326530612244898, "grad_norm": 0.14961810410022736, "learning_rate": 7.5e-07, "loss": 0.0069, "step": 803 }, { "clip_ratio/high_max": 0.001861559157987358, "clip_ratio/high_mean": 0.0006974393254495226, "clip_ratio/low_mean": 0.0004366694583950448, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001134108784754062, "epoch": 8.335860058309038, "grad_norm": 0.17139099538326263, "learning_rate": 7.5e-07, "loss": -0.0456, "step": 804 }, { "clip_ratio/high_max": 0.001853428180766059, "clip_ratio/high_mean": 0.0007959915110404836, "clip_ratio/low_mean": 0.00039026061494951136, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00118625217146473, "epoch": 8.345189504373177, "grad_norm": 0.17150598764419556, "learning_rate": 7.5e-07, "loss": -0.0566, "step": 805 }, { "clip_ratio/high_max": 0.0022581355297006667, "clip_ratio/high_mean": 0.0009279392379539786, "clip_ratio/low_mean": 0.0004464529888537072, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013743922318099067, "epoch": 8.354518950437317, "grad_norm": 0.16611802577972412, "learning_rate": 7.5e-07, "loss": -0.0285, "step": 806 }, { "clip_ratio/high_max": 0.0019943050865549594, "clip_ratio/high_mean": 0.0008077756083366694, "clip_ratio/low_mean": 0.0004143783617109875, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012221539436723106, "epoch": 8.363848396501458, "grad_norm": 0.15287677943706512, "learning_rate": 7.5e-07, "loss": -0.055, "step": 807 }, { "clip_ratio/high_max": 0.0019643079649540596, "clip_ratio/high_mean": 0.0007582406578876544, "clip_ratio/low_mean": 0.0005824161735290545, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001340656825050246, "epoch": 8.373177842565598, "grad_norm": 0.17471714317798615, "learning_rate": 7.5e-07, "loss": -0.0092, "step": 808 }, { "clip_ratio/high_max": 0.0016974394966382533, "clip_ratio/high_mean": 0.0006968913130549481, "clip_ratio/low_mean": 0.00046901241648811265, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011659037409117445, "epoch": 8.382507288629737, "grad_norm": 0.1421576291322708, "learning_rate": 7.5e-07, "loss": -0.0215, "step": 809 }, { "clip_ratio/high_max": 0.0018209119007224217, "clip_ratio/high_mean": 0.0007840645303076599, "clip_ratio/low_mean": 0.0005507643045348232, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013348288339329883, "epoch": 8.391836734693877, "grad_norm": 0.17301343381404877, "learning_rate": 7.5e-07, "loss": -0.0008, "step": 810 }, { "clip_ratio/high_max": 0.001977451567654498, "clip_ratio/high_mean": 0.0008345061069121584, "clip_ratio/low_mean": 0.0005985737734590657, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001433079884009203, "epoch": 8.401166180758018, "grad_norm": 0.1611846387386322, "learning_rate": 7.5e-07, "loss": -0.0345, "step": 811 }, { "clip_ratio/high_max": 0.00207273492924287, "clip_ratio/high_mean": 0.00081811733343784, "clip_ratio/low_mean": 0.000611917748756241, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00143003507037065, "epoch": 8.410495626822158, "grad_norm": 0.15623748302459717, "learning_rate": 7.5e-07, "loss": -0.0142, "step": 812 }, { "clip_ratio/high_max": 0.00217682864604285, "clip_ratio/high_mean": 0.0008717720811546315, "clip_ratio/low_mean": 0.0006009193357385811, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014726914087077603, "epoch": 8.419825072886297, "grad_norm": 0.1719358265399933, "learning_rate": 7.5e-07, "loss": -0.0361, "step": 813 }, { "clip_ratio/high_max": 0.0021689918867195956, "clip_ratio/high_mean": 0.0009015829837153433, "clip_ratio/low_mean": 0.0006766592814528849, "clip_ratio/low_min": 2.6209114366793074e-05, "clip_ratio/region_mean": 0.001578242296091048, "epoch": 8.429154518950437, "grad_norm": 0.17162537574768066, "learning_rate": 7.5e-07, "loss": 0.0006, "step": 814 }, { "clip_ratio/high_max": 0.002222531627921853, "clip_ratio/high_mean": 0.0008528907001164043, "clip_ratio/low_mean": 0.0007383059073617915, "clip_ratio/low_min": 3.7270459870342165e-05, "clip_ratio/region_mean": 0.0015911966002022382, "epoch": 8.438483965014576, "grad_norm": 0.16442124545574188, "learning_rate": 7.5e-07, "loss": -0.0141, "step": 815 }, { "clip_ratio/high_max": 0.0021592886187136173, "clip_ratio/high_mean": 0.0008357991719094571, "clip_ratio/low_mean": 0.0006150317158244434, "clip_ratio/low_min": 3.9723209738440346e-05, "clip_ratio/region_mean": 0.0014508308959193528, "epoch": 8.447813411078718, "grad_norm": 0.14195366203784943, "learning_rate": 7.5e-07, "loss": -0.0208, "step": 816 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0626395089285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4078.0, "completions/mean_length": 798.216552734375, "completions/mean_terminated_length": 577.8407592773438, "completions/min_length": 108.0, "completions/min_terminated_length": 108.0, "epoch": 8.457142857142857, "grad_norm": 0.14743660390377045, "learning_rate": 7.5e-07, "loss": 0.0018, "num_tokens": 486135792.0, "reward": 0.6312779188156128, "reward_std": 0.15155144035816193, "rewards/simpleverify_reward/mean": 0.6312779188156128, "rewards/simpleverify_reward/std": 0.4824751913547516, "step": 817 }, { "clip_ratio/high_max": 0.0020197587873553857, "clip_ratio/high_mean": 0.0007845181298762327, "clip_ratio/low_mean": 0.00041077728565142024, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001195295415527653, "epoch": 8.466472303206997, "grad_norm": 0.14429257810115814, "learning_rate": 7.5e-07, "loss": -0.0917, "step": 818 }, { "clip_ratio/high_max": 0.001840015638663317, "clip_ratio/high_mean": 0.0006299031483649742, "clip_ratio/low_mean": 0.0005428195945569314, "clip_ratio/low_min": 7.324483067350229e-05, "clip_ratio/region_mean": 0.0011727227465598844, "epoch": 8.475801749271136, "grad_norm": 0.1863093078136444, "learning_rate": 7.5e-07, "loss": 0.012, "step": 819 }, { "clip_ratio/high_max": 0.0013931638095527887, "clip_ratio/high_mean": 0.0005384778905863641, "clip_ratio/low_mean": 0.0006169883581605973, "clip_ratio/low_min": 3.5288408980704844e-05, "clip_ratio/region_mean": 0.0011554662414710037, "epoch": 8.485131195335278, "grad_norm": 0.176961749792099, "learning_rate": 7.5e-07, "loss": 0.025, "step": 820 }, { "clip_ratio/high_max": 0.0017491420985606965, "clip_ratio/high_mean": 0.0006249336529435823, "clip_ratio/low_mean": 0.0005587574632954784, "clip_ratio/low_min": 3.137015664833598e-05, "clip_ratio/region_mean": 0.0011836911107820924, "epoch": 8.494460641399417, "grad_norm": 0.16199730336666107, "learning_rate": 7.5e-07, "loss": -0.0086, "step": 821 }, { "clip_ratio/high_max": 0.001776929828338325, "clip_ratio/high_mean": 0.0006189213163452223, "clip_ratio/low_mean": 0.0005856739999217098, "clip_ratio/low_min": 1.3799955922877416e-05, "clip_ratio/region_mean": 0.0012045952898915857, "epoch": 8.503790087463557, "grad_norm": 0.15670892596244812, "learning_rate": 7.5e-07, "loss": -0.0187, "step": 822 }, { "clip_ratio/high_max": 0.001709742791717872, "clip_ratio/high_mean": 0.0006807213540014345, "clip_ratio/low_mean": 0.0005727220986955217, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001253443453606451, "epoch": 8.513119533527696, "grad_norm": 0.14288929104804993, "learning_rate": 7.5e-07, "loss": -0.0074, "step": 823 }, { "clip_ratio/high_max": 0.0021273476486385334, "clip_ratio/high_mean": 0.0008528951984771993, "clip_ratio/low_mean": 0.0005253645658740425, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013782597743556835, "epoch": 8.522448979591836, "grad_norm": 0.15359222888946533, "learning_rate": 7.5e-07, "loss": -0.1059, "step": 824 }, { "clip_ratio/high_max": 0.0022392681348719634, "clip_ratio/high_mean": 0.0008184129210349056, "clip_ratio/low_mean": 0.0005401332423389249, "clip_ratio/low_min": 1.1101243217126466e-05, "clip_ratio/region_mean": 0.0013585461529146414, "epoch": 8.531778425655977, "grad_norm": 0.14568038284778595, "learning_rate": 7.5e-07, "loss": -0.0331, "step": 825 }, { "clip_ratio/high_max": 0.0019220653775846586, "clip_ratio/high_mean": 0.0008225721412600251, "clip_ratio/low_mean": 0.000733498842237168, "clip_ratio/low_min": 1.2731717106362339e-05, "clip_ratio/region_mean": 0.00155607102351496, "epoch": 8.541107871720117, "grad_norm": 0.2160366028547287, "learning_rate": 7.5e-07, "loss": -0.0502, "step": 826 }, { "clip_ratio/high_max": 0.002171565647586249, "clip_ratio/high_mean": 0.0008058385155891301, "clip_ratio/low_mean": 0.0006477207643911242, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014535592708853073, "epoch": 8.550437317784256, "grad_norm": 0.15067720413208008, "learning_rate": 7.5e-07, "loss": -0.0102, "step": 827 }, { "clip_ratio/high_max": 0.0018413002726447303, "clip_ratio/high_mean": 0.0006694317435176345, "clip_ratio/low_mean": 0.0006306230525296996, "clip_ratio/low_min": 3.990251570940018e-05, "clip_ratio/region_mean": 0.0013000547769479454, "epoch": 8.559766763848396, "grad_norm": 0.135688915848732, "learning_rate": 7.5e-07, "loss": 0.005, "step": 828 }, { "clip_ratio/high_max": 0.0018688186355575453, "clip_ratio/high_mean": 0.0007015652699919883, "clip_ratio/low_mean": 0.0006903894691276946, "clip_ratio/low_min": 1.0548523277975619e-05, "clip_ratio/region_mean": 0.0013919547272962518, "epoch": 8.569096209912537, "grad_norm": 0.1553802788257599, "learning_rate": 7.5e-07, "loss": 0.0084, "step": 829 }, { "clip_ratio/high_max": 0.0019054352742386982, "clip_ratio/high_mean": 0.0007120491245586891, "clip_ratio/low_mean": 0.000594214961893158, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001306264086451847, "epoch": 8.578425655976677, "grad_norm": 0.14696715772151947, "learning_rate": 7.5e-07, "loss": -0.0356, "step": 830 }, { "clip_ratio/high_max": 0.0018582278717076406, "clip_ratio/high_mean": 0.0007028370091575198, "clip_ratio/low_mean": 0.0007015783849055879, "clip_ratio/low_min": 1.3563368156610522e-05, "clip_ratio/region_mean": 0.0014044153540453408, "epoch": 8.587755102040816, "grad_norm": 0.18939997255802155, "learning_rate": 7.5e-07, "loss": 0.009, "step": 831 }, { "clip_ratio/high_max": 0.001812099744711304, "clip_ratio/high_mean": 0.0006952437606742023, "clip_ratio/low_mean": 0.0006422632231988246, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013375069465837441, "epoch": 8.597084548104956, "grad_norm": 0.13406866788864136, "learning_rate": 7.5e-07, "loss": -0.0358, "step": 832 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0734514508928571, "completions/max_length": 4096.0, "completions/max_terminated_length": 4015.0, "completions/mean_length": 834.4839477539062, "completions/mean_terminated_length": 575.9295654296875, "completions/min_length": 90.0, "completions/min_terminated_length": 90.0, "epoch": 8.606413994169095, "grad_norm": 0.1464221179485321, "learning_rate": 7.5e-07, "loss": -0.0582, "num_tokens": 495205825.0, "reward": 0.6285575032234192, "reward_std": 0.15707026422023773, "rewards/simpleverify_reward/mean": 0.6285575032234192, "rewards/simpleverify_reward/std": 0.48320725560188293, "step": 833 }, { "clip_ratio/high_max": 0.0015690221262047999, "clip_ratio/high_mean": 0.0005976830843792413, "clip_ratio/low_mean": 0.0004811142644030042, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001078797344234772, "epoch": 8.615743440233237, "grad_norm": 0.14834553003311157, "learning_rate": 7.5e-07, "loss": -0.012, "step": 834 }, { "clip_ratio/high_max": 0.0018350300124438945, "clip_ratio/high_mean": 0.0007464935461030109, "clip_ratio/low_mean": 0.0004594036463458906, "clip_ratio/low_min": 3.2055746487458237e-05, "clip_ratio/region_mean": 0.001205897187901428, "epoch": 8.625072886297376, "grad_norm": 0.15197254717350006, "learning_rate": 7.5e-07, "loss": -0.0281, "step": 835 }, { "clip_ratio/high_max": 0.0021186085141380318, "clip_ratio/high_mean": 0.0007961754181451397, "clip_ratio/low_mean": 0.0003821309201157419, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011783063200709876, "epoch": 8.634402332361516, "grad_norm": 0.15256810188293457, "learning_rate": 7.5e-07, "loss": -0.0403, "step": 836 }, { "clip_ratio/high_max": 0.0018335424101678655, "clip_ratio/high_mean": 0.0006637335172854364, "clip_ratio/low_mean": 0.0005099054887978127, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001173638978798408, "epoch": 8.643731778425655, "grad_norm": 0.1569153219461441, "learning_rate": 7.5e-07, "loss": -0.0081, "step": 837 }, { "clip_ratio/high_max": 0.002266490992042236, "clip_ratio/high_mean": 0.0009206256672769086, "clip_ratio/low_mean": 0.0004452459515960072, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013658716197824106, "epoch": 8.653061224489797, "grad_norm": 0.1565588116645813, "learning_rate": 7.5e-07, "loss": -0.0533, "step": 838 }, { "clip_ratio/high_max": 0.0017400081706000492, "clip_ratio/high_mean": 0.0007169405344029656, "clip_ratio/low_mean": 0.0005230852138993214, "clip_ratio/low_min": 9.594718903827015e-06, "clip_ratio/region_mean": 0.0012400257837725803, "epoch": 8.662390670553936, "grad_norm": 0.15673516690731049, "learning_rate": 7.5e-07, "loss": -0.0223, "step": 839 }, { "clip_ratio/high_max": 0.0016748279122111853, "clip_ratio/high_mean": 0.0006913425295351772, "clip_ratio/low_mean": 0.0005027930915275647, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011941356060560793, "epoch": 8.671720116618076, "grad_norm": 0.16844604909420013, "learning_rate": 7.5e-07, "loss": 0.0134, "step": 840 }, { "clip_ratio/high_max": 0.001734967707307078, "clip_ratio/high_mean": 0.0007502139324060408, "clip_ratio/low_mean": 0.00048240528212772915, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012326192045293283, "epoch": 8.681049562682215, "grad_norm": 0.18074902892112732, "learning_rate": 7.5e-07, "loss": -0.0412, "step": 841 }, { "clip_ratio/high_max": 0.0020704258531623054, "clip_ratio/high_mean": 0.0007321401844819775, "clip_ratio/low_mean": 0.0006252181410673074, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013573582727985922, "epoch": 8.690379008746355, "grad_norm": 0.19468273222446442, "learning_rate": 7.5e-07, "loss": -0.0205, "step": 842 }, { "clip_ratio/high_max": 0.0020111117737542372, "clip_ratio/high_mean": 0.0007532043036917457, "clip_ratio/low_mean": 0.0006771319049221347, "clip_ratio/low_min": 1.1677877409965731e-05, "clip_ratio/region_mean": 0.001430336204066407, "epoch": 8.699708454810496, "grad_norm": 0.15880000591278076, "learning_rate": 7.5e-07, "loss": -0.0477, "step": 843 }, { "clip_ratio/high_max": 0.0017715568428684492, "clip_ratio/high_mean": 0.0007654289756828803, "clip_ratio/low_mean": 0.000523810268532543, "clip_ratio/low_min": 5.70379379496444e-05, "clip_ratio/region_mean": 0.001289239229663508, "epoch": 8.709037900874636, "grad_norm": 0.18041208386421204, "learning_rate": 7.5e-07, "loss": -0.0653, "step": 844 }, { "clip_ratio/high_max": 0.002138702020602068, "clip_ratio/high_mean": 0.0008491840271744877, "clip_ratio/low_mean": 0.0005986913715787523, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014478754019364715, "epoch": 8.718367346938775, "grad_norm": 0.1621696650981903, "learning_rate": 7.5e-07, "loss": -0.054, "step": 845 }, { "clip_ratio/high_max": 0.0021597430495603476, "clip_ratio/high_mean": 0.0007440086319547845, "clip_ratio/low_mean": 0.0005448160891319276, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001288824736548122, "epoch": 8.727696793002915, "grad_norm": 0.1472221165895462, "learning_rate": 7.5e-07, "loss": -0.0165, "step": 846 }, { "clip_ratio/high_max": 0.001853753528848756, "clip_ratio/high_mean": 0.0007936309430078836, "clip_ratio/low_mean": 0.0006157243760753772, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014093552890699357, "epoch": 8.737026239067056, "grad_norm": 0.20731234550476074, "learning_rate": 7.5e-07, "loss": -0.01, "step": 847 }, { "clip_ratio/high_max": 0.002452915177855175, "clip_ratio/high_mean": 0.0008988457684608875, "clip_ratio/low_mean": 0.0005814039986944408, "clip_ratio/low_min": 3.147029201500118e-05, "clip_ratio/region_mean": 0.0014802497826167382, "epoch": 8.746355685131196, "grad_norm": 0.1598658263683319, "learning_rate": 7.5e-07, "loss": -0.0546, "step": 848 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0650809151785714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4072.0, "completions/mean_length": 805.913330078125, "completions/mean_terminated_length": 576.8861694335938, "completions/min_length": 109.0, "completions/min_terminated_length": 109.0, "epoch": 8.755685131195335, "grad_norm": 0.16640372574329376, "learning_rate": 7.5e-07, "loss": -0.0196, "num_tokens": 504336430.0, "reward": 0.6517857313156128, "reward_std": 0.1454060822725296, "rewards/simpleverify_reward/mean": 0.6517857313156128, "rewards/simpleverify_reward/std": 0.47642090916633606, "step": 849 }, { "clip_ratio/high_max": 0.0017207607452291995, "clip_ratio/high_mean": 0.0006007511665302445, "clip_ratio/low_mean": 0.00038283634376057307, "clip_ratio/low_min": 1.617494854144752e-05, "clip_ratio/region_mean": 0.0009835875280259643, "epoch": 8.765014577259475, "grad_norm": 0.1364213228225708, "learning_rate": 7.5e-07, "loss": -0.0288, "step": 850 }, { "clip_ratio/high_max": 0.0020645309996325523, "clip_ratio/high_mean": 0.0007297518113773549, "clip_ratio/low_mean": 0.0004907256043225061, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00122047738841502, "epoch": 8.774344023323614, "grad_norm": 0.15398328006267548, "learning_rate": 7.5e-07, "loss": -0.0411, "step": 851 }, { "clip_ratio/high_max": 0.0018938304783659987, "clip_ratio/high_mean": 0.0007575108520541107, "clip_ratio/low_mean": 0.0004103918249711569, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011679026756610256, "epoch": 8.783673469387756, "grad_norm": 0.15911385416984558, "learning_rate": 7.5e-07, "loss": -0.0628, "step": 852 }, { "clip_ratio/high_max": 0.001627883326364099, "clip_ratio/high_mean": 0.000639898080862622, "clip_ratio/low_mean": 0.00047881505815894343, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011187131276528817, "epoch": 8.793002915451895, "grad_norm": 0.16956353187561035, "learning_rate": 7.5e-07, "loss": -0.007, "step": 853 }, { "clip_ratio/high_max": 0.0018299127768841572, "clip_ratio/high_mean": 0.0007358697330346331, "clip_ratio/low_mean": 0.0004568539898173185, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011927237101190258, "epoch": 8.802332361516035, "grad_norm": 0.1600894182920456, "learning_rate": 7.5e-07, "loss": -0.0285, "step": 854 }, { "clip_ratio/high_max": 0.0021396908268798143, "clip_ratio/high_mean": 0.0007470229575119447, "clip_ratio/low_mean": 0.0005825737462146208, "clip_ratio/low_min": 2.0997817046009004e-05, "clip_ratio/region_mean": 0.001329596692812629, "epoch": 8.811661807580174, "grad_norm": 0.15897150337696075, "learning_rate": 7.5e-07, "loss": 0.0116, "step": 855 }, { "clip_ratio/high_max": 0.0018247455373057164, "clip_ratio/high_mean": 0.000676374920658418, "clip_ratio/low_mean": 0.0005540029960684478, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012303779476496857, "epoch": 8.820991253644316, "grad_norm": 0.15521934628486633, "learning_rate": 7.5e-07, "loss": -0.049, "step": 856 }, { "clip_ratio/high_max": 0.0017216810119862203, "clip_ratio/high_mean": 0.0006376517922035418, "clip_ratio/low_mean": 0.00048062425594253, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011182760372321354, "epoch": 8.830320699708455, "grad_norm": 0.14478684961795807, "learning_rate": 7.5e-07, "loss": -0.0127, "step": 857 }, { "clip_ratio/high_max": 0.0020060584611201193, "clip_ratio/high_mean": 0.0007976130555107375, "clip_ratio/low_mean": 0.0005861452946192003, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001383758346491959, "epoch": 8.839650145772595, "grad_norm": 0.16231799125671387, "learning_rate": 7.5e-07, "loss": -0.0475, "step": 858 }, { "clip_ratio/high_max": 0.001754676970449509, "clip_ratio/high_mean": 0.0007032658577372786, "clip_ratio/low_mean": 0.000540597894541861, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012438637641025707, "epoch": 8.848979591836734, "grad_norm": 0.14798182249069214, "learning_rate": 7.5e-07, "loss": -0.0107, "step": 859 }, { "clip_ratio/high_max": 0.0015468864403374027, "clip_ratio/high_mean": 0.0006349746363412123, "clip_ratio/low_mean": 0.0005126160522195278, "clip_ratio/low_min": 1.1539881597855128e-05, "clip_ratio/region_mean": 0.0011475906867417507, "epoch": 8.858309037900874, "grad_norm": 0.1409570276737213, "learning_rate": 7.5e-07, "loss": -0.0065, "step": 860 }, { "clip_ratio/high_max": 0.0019214923195249867, "clip_ratio/high_mean": 0.0007631503121956484, "clip_ratio/low_mean": 0.0005996798705609763, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013628301967401057, "epoch": 8.867638483965015, "grad_norm": 0.14452746510505676, "learning_rate": 7.5e-07, "loss": -0.061, "step": 861 }, { "clip_ratio/high_max": 0.0018079954825225286, "clip_ratio/high_mean": 0.0007564757797808852, "clip_ratio/low_mean": 0.0006024361046002014, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013589118716481607, "epoch": 8.876967930029155, "grad_norm": 0.14397919178009033, "learning_rate": 7.5e-07, "loss": -0.0224, "step": 862 }, { "clip_ratio/high_max": 0.0017017508471326437, "clip_ratio/high_mean": 0.0006359168955896166, "clip_ratio/low_mean": 0.0006671437849945505, "clip_ratio/low_min": 1.4004032891534735e-05, "clip_ratio/region_mean": 0.0013030606787651777, "epoch": 8.886297376093294, "grad_norm": 0.18207333981990814, "learning_rate": 7.5e-07, "loss": 0.0471, "step": 863 }, { "clip_ratio/high_max": 0.0018545937491580844, "clip_ratio/high_mean": 0.0007566359618067509, "clip_ratio/low_mean": 0.0006739680884493282, "clip_ratio/low_min": 3.269786157034105e-05, "clip_ratio/region_mean": 0.0014306040357041638, "epoch": 8.895626822157434, "grad_norm": 0.16728153824806213, "learning_rate": 7.5e-07, "loss": 0.0027, "step": 864 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0725446428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 4004.0, "completions/mean_length": 833.2066040039062, "completions/mean_terminated_length": 577.9940795898438, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 9.00932944606414, "grad_norm": 0.1675836145877838, "learning_rate": 7.5e-07, "loss": 0.0137, "num_tokens": 513479383.0, "reward": 0.6341378688812256, "reward_std": 0.15359030663967133, "rewards/simpleverify_reward/mean": 0.6341378092765808, "rewards/simpleverify_reward/std": 0.48168787360191345, "step": 865 }, { "clip_ratio/high_max": 0.0019845901042572223, "clip_ratio/high_mean": 0.0007294581082533114, "clip_ratio/low_mean": 0.0004399054419081949, "clip_ratio/low_min": 1.400089604430832e-05, "clip_ratio/region_mean": 0.0011693635387928225, "epoch": 9.018658892128279, "grad_norm": 0.1395116001367569, "learning_rate": 7.5e-07, "loss": -0.0026, "step": 866 }, { "clip_ratio/high_max": 0.001939541740284767, "clip_ratio/high_mean": 0.0007587728032376617, "clip_ratio/low_mean": 0.00046045550493545306, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012192283029435202, "epoch": 9.02798833819242, "grad_norm": 0.17707183957099915, "learning_rate": 7.5e-07, "loss": -0.0269, "step": 867 }, { "clip_ratio/high_max": 0.001744936678733211, "clip_ratio/high_mean": 0.0006737496769346762, "clip_ratio/low_mean": 0.0004260482173776836, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010997979115927592, "epoch": 9.03731778425656, "grad_norm": 0.19795486330986023, "learning_rate": 7.5e-07, "loss": -0.0482, "step": 868 }, { "clip_ratio/high_max": 0.0018912615960289259, "clip_ratio/high_mean": 0.0006883464211568935, "clip_ratio/low_mean": 0.0005242409879429033, "clip_ratio/low_min": 1.9555694962036796e-05, "clip_ratio/region_mean": 0.001212587405461818, "epoch": 9.0466472303207, "grad_norm": 0.1485835462808609, "learning_rate": 7.5e-07, "loss": -0.0233, "step": 869 }, { "clip_ratio/high_max": 0.0018209236659458838, "clip_ratio/high_mean": 0.0007843976909498451, "clip_ratio/low_mean": 0.00043537844067031983, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001219776131620165, "epoch": 9.055976676384839, "grad_norm": 0.16000379621982574, "learning_rate": 7.5e-07, "loss": -0.0322, "step": 870 }, { "clip_ratio/high_max": 0.0017503316958027426, "clip_ratio/high_mean": 0.0008068082788668107, "clip_ratio/low_mean": 0.0005502683243321371, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013570765950134955, "epoch": 9.06530612244898, "grad_norm": 0.1756790280342102, "learning_rate": 7.5e-07, "loss": -0.0372, "step": 871 }, { "clip_ratio/high_max": 0.0019254113285569474, "clip_ratio/high_mean": 0.0008270867510873359, "clip_ratio/low_mean": 0.0004454954569155234, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012725821870844811, "epoch": 9.07463556851312, "grad_norm": 0.16848498582839966, "learning_rate": 7.5e-07, "loss": -0.0686, "step": 872 }, { "clip_ratio/high_max": 0.0019352103990968317, "clip_ratio/high_mean": 0.0007017505331532448, "clip_ratio/low_mean": 0.0006351798401738051, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013369303633226082, "epoch": 9.08396501457726, "grad_norm": 0.14615021646022797, "learning_rate": 7.5e-07, "loss": -0.0254, "step": 873 }, { "clip_ratio/high_max": 0.0019351525952515658, "clip_ratio/high_mean": 0.000786844058893621, "clip_ratio/low_mean": 0.0005309650200615579, "clip_ratio/low_min": 2.7229159968555905e-05, "clip_ratio/region_mean": 0.0013178090848668944, "epoch": 9.093294460641399, "grad_norm": 0.14412963390350342, "learning_rate": 7.5e-07, "loss": -0.0419, "step": 874 }, { "clip_ratio/high_max": 0.002256176132505061, "clip_ratio/high_mean": 0.0008868444492691197, "clip_ratio/low_mean": 0.0004098419858564739, "clip_ratio/low_min": 1.036312369251391e-05, "clip_ratio/region_mean": 0.0012966864342160989, "epoch": 9.102623906705539, "grad_norm": 0.15333758294582367, "learning_rate": 7.5e-07, "loss": -0.0702, "step": 875 }, { "clip_ratio/high_max": 0.0017864763649413362, "clip_ratio/high_mean": 0.0006965833144931821, "clip_ratio/low_mean": 0.0006233550161596213, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013199383356550243, "epoch": 9.11195335276968, "grad_norm": 0.1474297195672989, "learning_rate": 7.5e-07, "loss": -0.0082, "step": 876 }, { "clip_ratio/high_max": 0.0019032475356652867, "clip_ratio/high_mean": 0.0007717328517173883, "clip_ratio/low_mean": 0.0006527717923745513, "clip_ratio/low_min": 4.902253931504674e-05, "clip_ratio/region_mean": 0.0014245046622818336, "epoch": 9.12128279883382, "grad_norm": 0.1690133959054947, "learning_rate": 7.5e-07, "loss": 0.0069, "step": 877 }, { "clip_ratio/high_max": 0.0021223424264462665, "clip_ratio/high_mean": 0.0007482026521756779, "clip_ratio/low_mean": 0.0006619834366574651, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001410186043358408, "epoch": 9.130612244897959, "grad_norm": 0.16715925931930542, "learning_rate": 7.5e-07, "loss": -0.0141, "step": 878 }, { "clip_ratio/high_max": 0.002162692566344049, "clip_ratio/high_mean": 0.000939394609304145, "clip_ratio/low_mean": 0.0006147217054603971, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001554116348415846, "epoch": 9.139941690962099, "grad_norm": 0.17435312271118164, "learning_rate": 7.5e-07, "loss": -0.05, "step": 879 }, { "clip_ratio/high_max": 0.0021055892793810926, "clip_ratio/high_mean": 0.0008630731936136726, "clip_ratio/low_mean": 0.0006955916032893583, "clip_ratio/low_min": 2.1746694983448833e-05, "clip_ratio/region_mean": 0.0015586647859890945, "epoch": 9.14927113702624, "grad_norm": 0.16486002504825592, "learning_rate": 7.5e-07, "loss": -0.0055, "step": 880 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0705915178571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 4064.0, "completions/mean_length": 823.3965454101562, "completions/mean_terminated_length": 574.8318481445312, "completions/min_length": 104.0, "completions/min_terminated_length": 104.0, "epoch": 9.15860058309038, "grad_norm": 0.1702890843153, "learning_rate": 7.5e-07, "loss": -0.0579, "num_tokens": 522582627.0, "reward": 0.6478794813156128, "reward_std": 0.15938900411128998, "rewards/simpleverify_reward/mean": 0.6478794813156128, "rewards/simpleverify_reward/std": 0.47764796018600464, "step": 881 }, { "clip_ratio/high_max": 0.0019835606144624762, "clip_ratio/high_mean": 0.0006947408837731928, "clip_ratio/low_mean": 0.000433071357292647, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001127812258346239, "epoch": 9.167930029154519, "grad_norm": 0.16139176487922668, "learning_rate": 7.5e-07, "loss": -0.0166, "step": 882 }, { "clip_ratio/high_max": 0.0016480522390338592, "clip_ratio/high_mean": 0.0007091421248333063, "clip_ratio/low_mean": 0.00038184971799637424, "clip_ratio/low_min": 4.1608463106967974e-05, "clip_ratio/region_mean": 0.001090991856472101, "epoch": 9.177259475218658, "grad_norm": 0.14212752878665924, "learning_rate": 7.5e-07, "loss": -0.0177, "step": 883 }, { "clip_ratio/high_max": 0.001936475673574023, "clip_ratio/high_mean": 0.0007684732918278314, "clip_ratio/low_mean": 0.00045782869256072445, "clip_ratio/low_min": 2.8153153834864497e-05, "clip_ratio/region_mean": 0.0012263020071259234, "epoch": 9.186588921282798, "grad_norm": 0.14305353164672852, "learning_rate": 7.5e-07, "loss": -0.0404, "step": 884 }, { "clip_ratio/high_max": 0.0023189889907371253, "clip_ratio/high_mean": 0.0008008147033251589, "clip_ratio/low_mean": 0.00048267222405229404, "clip_ratio/low_min": 1.4661036402685568e-05, "clip_ratio/region_mean": 0.0012834869012294803, "epoch": 9.19591836734694, "grad_norm": 0.1544424444437027, "learning_rate": 7.5e-07, "loss": -0.0172, "step": 885 }, { "clip_ratio/high_max": 0.001738669838232454, "clip_ratio/high_mean": 0.0006940857492736541, "clip_ratio/low_mean": 0.00045227875307318754, "clip_ratio/low_min": 3.227968409191817e-05, "clip_ratio/region_mean": 0.0011463645241747145, "epoch": 9.205247813411079, "grad_norm": 0.13887962698936462, "learning_rate": 7.5e-07, "loss": -0.0219, "step": 886 }, { "clip_ratio/high_max": 0.0018338000809308141, "clip_ratio/high_mean": 0.0007929154835437657, "clip_ratio/low_mean": 0.0005099900238292321, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013029054935032036, "epoch": 9.214577259475218, "grad_norm": 0.17708319425582886, "learning_rate": 7.5e-07, "loss": -0.0071, "step": 887 }, { "clip_ratio/high_max": 0.001997584186028689, "clip_ratio/high_mean": 0.0008014569557417417, "clip_ratio/low_mean": 0.0005079872862552293, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001309444251091918, "epoch": 9.223906705539358, "grad_norm": 0.17351391911506653, "learning_rate": 7.5e-07, "loss": -0.0311, "step": 888 }, { "clip_ratio/high_max": 0.0019228676246711984, "clip_ratio/high_mean": 0.0007740015862509608, "clip_ratio/low_mean": 0.0006183327268445282, "clip_ratio/low_min": 1.6108248019008897e-05, "clip_ratio/region_mean": 0.0013923343067290261, "epoch": 9.2332361516035, "grad_norm": 0.1752086579799652, "learning_rate": 7.5e-07, "loss": 0.0035, "step": 889 }, { "clip_ratio/high_max": 0.0017998751609411556, "clip_ratio/high_mean": 0.0007426379961543716, "clip_ratio/low_mean": 0.0005576317144004861, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013002697232877836, "epoch": 9.242565597667639, "grad_norm": 0.15948662161827087, "learning_rate": 7.5e-07, "loss": -0.0484, "step": 890 }, { "clip_ratio/high_max": 0.0017671710193098988, "clip_ratio/high_mean": 0.0007292971240531188, "clip_ratio/low_mean": 0.000525132782286164, "clip_ratio/low_min": 1.0654619472916238e-05, "clip_ratio/region_mean": 0.0012544299097498879, "epoch": 9.251895043731778, "grad_norm": 0.15363934636116028, "learning_rate": 7.5e-07, "loss": -0.0127, "step": 891 }, { "clip_ratio/high_max": 0.0020074684434803203, "clip_ratio/high_mean": 0.0007977488330652704, "clip_ratio/low_mean": 0.0004647742325687432, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012625230956473388, "epoch": 9.261224489795918, "grad_norm": 0.15549160540103912, "learning_rate": 7.5e-07, "loss": -0.0798, "step": 892 }, { "clip_ratio/high_max": 0.002337762067327276, "clip_ratio/high_mean": 0.0010068980100186309, "clip_ratio/low_mean": 0.0006656186678810627, "clip_ratio/low_min": 3.43878318744828e-05, "clip_ratio/region_mean": 0.0016725166660762625, "epoch": 9.270553935860057, "grad_norm": 0.15994495153427124, "learning_rate": 7.5e-07, "loss": -0.0657, "step": 893 }, { "clip_ratio/high_max": 0.0018549059714132454, "clip_ratio/high_mean": 0.0007658242629986489, "clip_ratio/low_mean": 0.000623412245658983, "clip_ratio/low_min": 6.26043402007781e-05, "clip_ratio/region_mean": 0.0013892365277570207, "epoch": 9.279883381924199, "grad_norm": 0.17705704271793365, "learning_rate": 7.5e-07, "loss": 0.004, "step": 894 }, { "clip_ratio/high_max": 0.0018817505515471566, "clip_ratio/high_mean": 0.0007645322093594586, "clip_ratio/low_mean": 0.0006255672792576661, "clip_ratio/low_min": 1.637840614421293e-05, "clip_ratio/region_mean": 0.0013900995181757025, "epoch": 9.289212827988338, "grad_norm": 0.17958742380142212, "learning_rate": 7.5e-07, "loss": -0.0288, "step": 895 }, { "clip_ratio/high_max": 0.0018449932140356395, "clip_ratio/high_mean": 0.0007514732824347448, "clip_ratio/low_mean": 0.0006335722619041917, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013850455288775265, "epoch": 9.298542274052478, "grad_norm": 0.1615930050611496, "learning_rate": 7.5e-07, "loss": -0.0041, "step": 896 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0742885044642857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4087.0, "completions/mean_length": 823.7885131835938, "completions/mean_terminated_length": 561.1929321289062, "completions/min_length": 88.0, "completions/min_terminated_length": 88.0, "epoch": 9.307871720116617, "grad_norm": 0.16518811881542206, "learning_rate": 7.5e-07, "loss": -0.0637, "num_tokens": 531455763.0, "reward": 0.6457170844078064, "reward_std": 0.1427905410528183, "rewards/simpleverify_reward/mean": 0.6457170844078064, "rewards/simpleverify_reward/std": 0.4783121347427368, "step": 897 }, { "clip_ratio/high_max": 0.0017037141842592973, "clip_ratio/high_mean": 0.0005894107998756226, "clip_ratio/low_mean": 0.00042755957792905974, "clip_ratio/low_min": 1.3619524906971492e-05, "clip_ratio/region_mean": 0.0010169703891733661, "epoch": 9.317201166180759, "grad_norm": 0.14786043763160706, "learning_rate": 7.5e-07, "loss": -0.0122, "step": 898 }, { "clip_ratio/high_max": 0.002057450448774034, "clip_ratio/high_mean": 0.000804895777037018, "clip_ratio/low_mean": 0.0004235142318975704, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001228409990289947, "epoch": 9.326530612244898, "grad_norm": 0.1597188413143158, "learning_rate": 7.5e-07, "loss": -0.0573, "step": 899 }, { "clip_ratio/high_max": 0.0014503898719340214, "clip_ratio/high_mean": 0.00051448251315378, "clip_ratio/low_mean": 0.0004986034191460931, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010130859445780516, "epoch": 9.335860058309038, "grad_norm": 0.20595631003379822, "learning_rate": 7.5e-07, "loss": -0.0118, "step": 900 }, { "clip_ratio/high_max": 0.001786193963198457, "clip_ratio/high_mean": 0.0006320483862509718, "clip_ratio/low_mean": 0.0005513505084309145, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011833988537546247, "epoch": 9.345189504373177, "grad_norm": 0.15928173065185547, "learning_rate": 7.5e-07, "loss": -0.0146, "step": 901 }, { "clip_ratio/high_max": 0.0018480981998436619, "clip_ratio/high_mean": 0.0006243419993552379, "clip_ratio/low_mean": 0.0004479628005356062, "clip_ratio/low_min": 1.565239108458627e-05, "clip_ratio/region_mean": 0.0010723047780629713, "epoch": 9.354518950437317, "grad_norm": 0.15213532745838165, "learning_rate": 7.5e-07, "loss": -0.0689, "step": 902 }, { "clip_ratio/high_max": 0.0018492582530598156, "clip_ratio/high_mean": 0.0006234355223568855, "clip_ratio/low_mean": 0.0005393089586505084, "clip_ratio/low_min": 1.0548523277975619e-05, "clip_ratio/region_mean": 0.001162744501925772, "epoch": 9.363848396501458, "grad_norm": 0.1578218638896942, "learning_rate": 7.5e-07, "loss": -0.004, "step": 903 }, { "clip_ratio/high_max": 0.0015858829829085153, "clip_ratio/high_mean": 0.0006724854447384132, "clip_ratio/low_mean": 0.0004954201986038242, "clip_ratio/low_min": 4.280168468540069e-05, "clip_ratio/region_mean": 0.0011679056515276898, "epoch": 9.373177842565598, "grad_norm": 0.1270531862974167, "learning_rate": 7.5e-07, "loss": -0.0474, "step": 904 }, { "clip_ratio/high_max": 0.002012949207710335, "clip_ratio/high_mean": 0.0006066288933652686, "clip_ratio/low_mean": 0.0005824823838338489, "clip_ratio/low_min": 3.180084331688704e-05, "clip_ratio/region_mean": 0.0011891112881130539, "epoch": 9.382507288629737, "grad_norm": 0.15258049964904785, "learning_rate": 7.5e-07, "loss": -0.0062, "step": 905 }, { "clip_ratio/high_max": 0.0016881745359569322, "clip_ratio/high_mean": 0.0005441658959171036, "clip_ratio/low_mean": 0.0005261351116132573, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010703009756980464, "epoch": 9.391836734693877, "grad_norm": 0.2538744807243347, "learning_rate": 7.5e-07, "loss": -0.0218, "step": 906 }, { "clip_ratio/high_max": 0.0018167584639741108, "clip_ratio/high_mean": 0.0007034095306153176, "clip_ratio/low_mean": 0.0005094910866318969, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012129006245231722, "epoch": 9.401166180758018, "grad_norm": 0.1799042671918869, "learning_rate": 7.5e-07, "loss": -0.0321, "step": 907 }, { "clip_ratio/high_max": 0.0018029719394689891, "clip_ratio/high_mean": 0.0006292636608122848, "clip_ratio/low_mean": 0.0007463155980076408, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013755792460869998, "epoch": 9.410495626822158, "grad_norm": 0.16241194307804108, "learning_rate": 7.5e-07, "loss": -0.0127, "step": 908 }, { "clip_ratio/high_max": 0.0018363289855187759, "clip_ratio/high_mean": 0.0007397882582154125, "clip_ratio/low_mean": 0.0004722376220342994, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012120258870709222, "epoch": 9.419825072886297, "grad_norm": 0.1909274458885193, "learning_rate": 7.5e-07, "loss": -0.1011, "step": 909 }, { "clip_ratio/high_max": 0.001792800954717677, "clip_ratio/high_mean": 0.0006546555996465031, "clip_ratio/low_mean": 0.0005627764567179838, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012174321127531584, "epoch": 9.429154518950437, "grad_norm": 0.14569957554340363, "learning_rate": 7.5e-07, "loss": -0.0143, "step": 910 }, { "clip_ratio/high_max": 0.0015669852145947516, "clip_ratio/high_mean": 0.0005956488803349202, "clip_ratio/low_mean": 0.0006712807135045296, "clip_ratio/low_min": 1.197088658955181e-05, "clip_ratio/region_mean": 0.0012669296120293438, "epoch": 9.438483965014576, "grad_norm": 0.16823944449424744, "learning_rate": 7.5e-07, "loss": 0.0064, "step": 911 }, { "clip_ratio/high_max": 0.0017173515698232222, "clip_ratio/high_mean": 0.0007360216659435537, "clip_ratio/low_mean": 0.0007537636101915268, "clip_ratio/low_min": 3.018594543391373e-05, "clip_ratio/region_mean": 0.00148978527795407, "epoch": 9.447813411078718, "grad_norm": 0.16414253413677216, "learning_rate": 7.5e-07, "loss": 0.0012, "step": 912 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0726143973214286, "completions/max_length": 4096.0, "completions/max_terminated_length": 4075.0, "completions/mean_length": 826.6928100585938, "completions/mean_terminated_length": 570.7056884765625, "completions/min_length": 101.0, "completions/min_terminated_length": 101.0, "epoch": 9.457142857142857, "grad_norm": 0.20381993055343628, "learning_rate": 7.5e-07, "loss": -0.0113, "num_tokens": 540478087.0, "reward": 0.6501814126968384, "reward_std": 0.15186986327171326, "rewards/simpleverify_reward/mean": 0.6501813530921936, "rewards/simpleverify_reward/std": 0.4769291579723358, "step": 913 }, { "clip_ratio/high_max": 0.0016884102442418225, "clip_ratio/high_mean": 0.000676108589686919, "clip_ratio/low_mean": 0.0004301409371691989, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001106249514123192, "epoch": 9.466472303206997, "grad_norm": 0.16265030205249786, "learning_rate": 7.5e-07, "loss": -0.0207, "step": 914 }, { "clip_ratio/high_max": 0.001542275906103896, "clip_ratio/high_mean": 0.0007247342618939001, "clip_ratio/low_mean": 0.0005514568165381206, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001276191102078883, "epoch": 9.475801749271136, "grad_norm": 0.22472651302814484, "learning_rate": 7.5e-07, "loss": -0.0371, "step": 915 }, { "clip_ratio/high_max": 0.0016846637809067033, "clip_ratio/high_mean": 0.0005633461678371532, "clip_ratio/low_mean": 0.0005423301136033842, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011056762741645798, "epoch": 9.485131195335278, "grad_norm": 0.1702611893415451, "learning_rate": 7.5e-07, "loss": -0.0123, "step": 916 }, { "clip_ratio/high_max": 0.0021090209775138646, "clip_ratio/high_mean": 0.0008323334932356374, "clip_ratio/low_mean": 0.0004600053364356427, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012923388130730018, "epoch": 9.494460641399417, "grad_norm": 0.22097547352313995, "learning_rate": 7.5e-07, "loss": -0.0446, "step": 917 }, { "clip_ratio/high_max": 0.0019490940030664206, "clip_ratio/high_mean": 0.0007163509526435519, "clip_ratio/low_mean": 0.0005166328528503072, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001232983784575481, "epoch": 9.503790087463557, "grad_norm": 0.16860434412956238, "learning_rate": 7.5e-07, "loss": -0.0395, "step": 918 }, { "clip_ratio/high_max": 0.0020291944965720177, "clip_ratio/high_mean": 0.0007020834382274188, "clip_ratio/low_mean": 0.0005859154007339384, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012879988498752937, "epoch": 9.513119533527696, "grad_norm": 0.17978881299495697, "learning_rate": 7.5e-07, "loss": -0.0101, "step": 919 }, { "clip_ratio/high_max": 0.0021859503613086417, "clip_ratio/high_mean": 0.0008477350402245065, "clip_ratio/low_mean": 0.0006031679022271419, "clip_ratio/low_min": 1.4501159967039712e-05, "clip_ratio/region_mean": 0.0014509029460896272, "epoch": 9.522448979591836, "grad_norm": 0.1668407917022705, "learning_rate": 7.5e-07, "loss": -0.0202, "step": 920 }, { "clip_ratio/high_max": 0.0017974044239963405, "clip_ratio/high_mean": 0.0007300680936168646, "clip_ratio/low_mean": 0.0007476339651475428, "clip_ratio/low_min": 1.4541647033183835e-05, "clip_ratio/region_mean": 0.0014777020624023862, "epoch": 9.531778425655977, "grad_norm": 0.1603546142578125, "learning_rate": 7.5e-07, "loss": -0.0297, "step": 921 }, { "clip_ratio/high_max": 0.0018427268660161644, "clip_ratio/high_mean": 0.0006910974734637421, "clip_ratio/low_mean": 0.0005421119913080474, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012332094775047153, "epoch": 9.541107871720117, "grad_norm": 0.14586414396762848, "learning_rate": 7.5e-07, "loss": -0.0522, "step": 922 }, { "clip_ratio/high_max": 0.002105371138895862, "clip_ratio/high_mean": 0.0008432159775111359, "clip_ratio/low_mean": 0.0005696030020772014, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014128189795883372, "epoch": 9.550437317784256, "grad_norm": 0.1693994551897049, "learning_rate": 7.5e-07, "loss": -0.0402, "step": 923 }, { "clip_ratio/high_max": 0.002103536982758669, "clip_ratio/high_mean": 0.0007712419173913077, "clip_ratio/low_mean": 0.00041983774485743197, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011910796602023765, "epoch": 9.559766763848396, "grad_norm": 0.14963465929031372, "learning_rate": 7.5e-07, "loss": -0.0492, "step": 924 }, { "clip_ratio/high_max": 0.0017837269042502157, "clip_ratio/high_mean": 0.0007541131490143016, "clip_ratio/low_mean": 0.0007050994536257349, "clip_ratio/low_min": 1.3658216630574316e-05, "clip_ratio/region_mean": 0.0014592126208299305, "epoch": 9.569096209912537, "grad_norm": 0.14762532711029053, "learning_rate": 7.5e-07, "loss": -0.0105, "step": 925 }, { "clip_ratio/high_max": 0.001979898395802593, "clip_ratio/high_mean": 0.0008004713135960628, "clip_ratio/low_mean": 0.0007784971403452801, "clip_ratio/low_min": 7.304176506295335e-05, "clip_ratio/region_mean": 0.0015789684330229647, "epoch": 9.578425655976677, "grad_norm": 0.16555672883987427, "learning_rate": 7.5e-07, "loss": 0.013, "step": 926 }, { "clip_ratio/high_max": 0.0022702102214680053, "clip_ratio/high_mean": 0.0007470211921827286, "clip_ratio/low_mean": 0.0006111452194090816, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013581664316006936, "epoch": 9.587755102040816, "grad_norm": 0.15509434044361115, "learning_rate": 7.5e-07, "loss": -0.0364, "step": 927 }, { "clip_ratio/high_max": 0.002225688822363736, "clip_ratio/high_mean": 0.0009347169452667003, "clip_ratio/low_mean": 0.000609793829426053, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001544510781968711, "epoch": 9.597084548104956, "grad_norm": 0.17363932728767395, "learning_rate": 7.5e-07, "loss": -0.0788, "step": 928 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0701729910714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3841.0, "completions/mean_length": 805.2437744140625, "completions/mean_terminated_length": 556.89404296875, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 9.606413994169095, "grad_norm": 0.17030362784862518, "learning_rate": 7.5e-07, "loss": -0.0529, "num_tokens": 549304893.0, "reward": 0.6581333875656128, "reward_std": 0.14414624869823456, "rewards/simpleverify_reward/mean": 0.6581333875656128, "rewards/simpleverify_reward/std": 0.474351704120636, "step": 929 }, { "clip_ratio/high_max": 0.001812534228520235, "clip_ratio/high_mean": 0.0006708847949994379, "clip_ratio/low_mean": 0.00044814190732722636, "clip_ratio/low_min": 1.5009605704108253e-05, "clip_ratio/region_mean": 0.0011190267177880742, "epoch": 9.615743440233237, "grad_norm": 0.15958209335803986, "learning_rate": 7.5e-07, "loss": -0.0056, "step": 930 }, { "clip_ratio/high_max": 0.0020800167621928267, "clip_ratio/high_mean": 0.0007123798259272007, "clip_ratio/low_mean": 0.0004949634601416619, "clip_ratio/low_min": 3.208419002476148e-05, "clip_ratio/region_mean": 0.0012073432826582575, "epoch": 9.625072886297376, "grad_norm": 0.160538449883461, "learning_rate": 7.5e-07, "loss": 0.0004, "step": 931 }, { "clip_ratio/high_max": 0.0014582872245227918, "clip_ratio/high_mean": 0.0005605086207651766, "clip_ratio/low_mean": 0.0004575216853481834, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010180303033848759, "epoch": 9.634402332361516, "grad_norm": 0.1545495241880417, "learning_rate": 7.5e-07, "loss": -0.0269, "step": 932 }, { "clip_ratio/high_max": 0.00183458016908844, "clip_ratio/high_mean": 0.0006439313510782085, "clip_ratio/low_mean": 0.0005057793823652901, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011497107407194562, "epoch": 9.643731778425655, "grad_norm": 0.26883018016815186, "learning_rate": 7.5e-07, "loss": -0.0335, "step": 933 }, { "clip_ratio/high_max": 0.0017646005180722568, "clip_ratio/high_mean": 0.000683684227624326, "clip_ratio/low_mean": 0.0004922463858747506, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001175930599856656, "epoch": 9.653061224489797, "grad_norm": 0.18085773289203644, "learning_rate": 7.5e-07, "loss": -0.0193, "step": 934 }, { "clip_ratio/high_max": 0.0018343083447689423, "clip_ratio/high_mean": 0.0006787232127862808, "clip_ratio/low_mean": 0.0005014110884076217, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011801343061961234, "epoch": 9.662390670553936, "grad_norm": 0.15247759222984314, "learning_rate": 7.5e-07, "loss": -0.0283, "step": 935 }, { "clip_ratio/high_max": 0.00166771926524234, "clip_ratio/high_mean": 0.0006160421289678197, "clip_ratio/low_mean": 0.0004802046223630896, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010962467567878775, "epoch": 9.671720116618076, "grad_norm": 0.15830597281455994, "learning_rate": 7.5e-07, "loss": -0.0024, "step": 936 }, { "clip_ratio/high_max": 0.0021894329984206706, "clip_ratio/high_mean": 0.0008023224072530866, "clip_ratio/low_mean": 0.0004508024949245737, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012531248867162503, "epoch": 9.681049562682215, "grad_norm": 0.16151604056358337, "learning_rate": 7.5e-07, "loss": -0.034, "step": 937 }, { "clip_ratio/high_max": 0.0018392219062661752, "clip_ratio/high_mean": 0.0007612966919623432, "clip_ratio/low_mean": 0.0004457540126168169, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012070507073076442, "epoch": 9.690379008746355, "grad_norm": 0.1479826420545578, "learning_rate": 7.5e-07, "loss": -0.0767, "step": 938 }, { "clip_ratio/high_max": 0.0019441155091044493, "clip_ratio/high_mean": 0.0006916653701409814, "clip_ratio/low_mean": 0.0005280294126350782, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012196947973279748, "epoch": 9.699708454810496, "grad_norm": 0.1696830689907074, "learning_rate": 7.5e-07, "loss": -0.016, "step": 939 }, { "clip_ratio/high_max": 0.002137280080205528, "clip_ratio/high_mean": 0.0008922868873924017, "clip_ratio/low_mean": 0.0006594497044716263, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015517365973209962, "epoch": 9.709037900874636, "grad_norm": 0.16550149023532867, "learning_rate": 7.5e-07, "loss": -0.0517, "step": 940 }, { "clip_ratio/high_max": 0.0023039550069370307, "clip_ratio/high_mean": 0.0008639952138764784, "clip_ratio/low_mean": 0.0006476542012023856, "clip_ratio/low_min": 2.7446611966297496e-05, "clip_ratio/region_mean": 0.001511649439635221, "epoch": 9.718367346938775, "grad_norm": 0.16853004693984985, "learning_rate": 7.5e-07, "loss": -0.0183, "step": 941 }, { "clip_ratio/high_max": 0.002057221354334615, "clip_ratio/high_mean": 0.0007469208612747025, "clip_ratio/low_mean": 0.000627394760158495, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013743156305281445, "epoch": 9.727696793002915, "grad_norm": 0.15052206814289093, "learning_rate": 7.5e-07, "loss": -0.055, "step": 942 }, { "clip_ratio/high_max": 0.0019482607931422535, "clip_ratio/high_mean": 0.0008289330980915111, "clip_ratio/low_mean": 0.000503821923302894, "clip_ratio/low_min": 1.5229044947773218e-05, "clip_ratio/region_mean": 0.001332755018665921, "epoch": 9.737026239067056, "grad_norm": 0.17224591970443726, "learning_rate": 7.5e-07, "loss": 0.0014, "step": 943 }, { "clip_ratio/high_max": 0.0019178372458554804, "clip_ratio/high_mean": 0.0006948651684979268, "clip_ratio/low_mean": 0.0007148531185521279, "clip_ratio/low_min": 5.8053778047906235e-05, "clip_ratio/region_mean": 0.0014097182647674344, "epoch": 9.746355685131196, "grad_norm": 0.15325337648391724, "learning_rate": 7.5e-07, "loss": 0.006, "step": 944 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0735212053571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 4074.0, "completions/mean_length": 835.3515014648438, "completions/mean_terminated_length": 576.6010131835938, "completions/min_length": 103.0, "completions/min_terminated_length": 103.0, "epoch": 9.755685131195335, "grad_norm": 0.20119836926460266, "learning_rate": 7.5e-07, "loss": -0.0077, "num_tokens": 558396348.0, "reward": 0.636788547039032, "reward_std": 0.14322754740715027, "rewards/simpleverify_reward/mean": 0.6367884874343872, "rewards/simpleverify_reward/std": 0.4809418320655823, "step": 945 }, { "clip_ratio/high_max": 0.0017949568973563146, "clip_ratio/high_mean": 0.0006698315719404491, "clip_ratio/low_mean": 0.00035855523469763284, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001028386799589498, "epoch": 9.765014577259475, "grad_norm": 0.15362423658370972, "learning_rate": 7.5e-07, "loss": -0.054, "step": 946 }, { "clip_ratio/high_max": 0.001722935528960079, "clip_ratio/high_mean": 0.0005937741661909968, "clip_ratio/low_mean": 0.0004432648229339975, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010370389936724678, "epoch": 9.774344023323614, "grad_norm": 0.6168777346611023, "learning_rate": 7.5e-07, "loss": 0.0278, "step": 947 }, { "clip_ratio/high_max": 0.0016667770942149218, "clip_ratio/high_mean": 0.0006328227264020825, "clip_ratio/low_mean": 0.00040703433523958665, "clip_ratio/low_min": 1.236888965649996e-05, "clip_ratio/region_mean": 0.0010398570593679324, "epoch": 9.783673469387756, "grad_norm": 0.156535342335701, "learning_rate": 7.5e-07, "loss": -0.0413, "step": 948 }, { "clip_ratio/high_max": 0.0017148891784017906, "clip_ratio/high_mean": 0.000647943903459236, "clip_ratio/low_mean": 0.0004722110052171047, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011201549277757294, "epoch": 9.793002915451895, "grad_norm": 0.14729459583759308, "learning_rate": 7.5e-07, "loss": -0.0589, "step": 949 }, { "clip_ratio/high_max": 0.002017307871938101, "clip_ratio/high_mean": 0.0009018645105243195, "clip_ratio/low_mean": 0.00042491373596931226, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013267782705952413, "epoch": 9.802332361516035, "grad_norm": 0.15663863718509674, "learning_rate": 7.5e-07, "loss": -0.1324, "step": 950 }, { "clip_ratio/high_max": 0.002021702010097215, "clip_ratio/high_mean": 0.0007309941429411992, "clip_ratio/low_mean": 0.0004610975483956281, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011920916731469333, "epoch": 9.811661807580174, "grad_norm": 0.1454327255487442, "learning_rate": 7.5e-07, "loss": -0.0599, "step": 951 }, { "clip_ratio/high_max": 0.002111621986841783, "clip_ratio/high_mean": 0.0007044636258797254, "clip_ratio/low_mean": 0.0004378474342274785, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011423110772739165, "epoch": 9.820991253644316, "grad_norm": 0.16145697236061096, "learning_rate": 7.5e-07, "loss": -0.0362, "step": 952 }, { "clip_ratio/high_max": 0.001679302535194438, "clip_ratio/high_mean": 0.0006042265849828254, "clip_ratio/low_mean": 0.0005973916668153834, "clip_ratio/low_min": 4.074315438629128e-05, "clip_ratio/region_mean": 0.0012016182190563995, "epoch": 9.830320699708455, "grad_norm": 0.15648964047431946, "learning_rate": 7.5e-07, "loss": 0.0017, "step": 953 }, { "clip_ratio/high_max": 0.0018688560885493644, "clip_ratio/high_mean": 0.0007145849740481935, "clip_ratio/low_mean": 0.0005909879937462392, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013055729323241394, "epoch": 9.839650145772595, "grad_norm": 0.1766122728586197, "learning_rate": 7.5e-07, "loss": 0.0043, "step": 954 }, { "clip_ratio/high_max": 0.0018647989199962467, "clip_ratio/high_mean": 0.0007312464294955134, "clip_ratio/low_mean": 0.0005555288280447712, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012867752157035284, "epoch": 9.848979591836734, "grad_norm": 0.1307879388332367, "learning_rate": 7.5e-07, "loss": -0.0257, "step": 955 }, { "clip_ratio/high_max": 0.001710251861368306, "clip_ratio/high_mean": 0.0007364690409303876, "clip_ratio/low_mean": 0.0006098020767240087, "clip_ratio/low_min": 9.69292796071386e-06, "clip_ratio/region_mean": 0.0013462711103784386, "epoch": 9.858309037900874, "grad_norm": 0.23310987651348114, "learning_rate": 7.5e-07, "loss": -0.0442, "step": 956 }, { "clip_ratio/high_max": 0.0020220697624608874, "clip_ratio/high_mean": 0.0007335780010180315, "clip_ratio/low_mean": 0.0005874983644389431, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013210763790993951, "epoch": 9.867638483965015, "grad_norm": 0.16800667345523834, "learning_rate": 7.5e-07, "loss": -0.0084, "step": 957 }, { "clip_ratio/high_max": 0.002133222093107179, "clip_ratio/high_mean": 0.0008302668029500637, "clip_ratio/low_mean": 0.0006577375588676659, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014880043599987403, "epoch": 9.876967930029155, "grad_norm": 0.15481150150299072, "learning_rate": 7.5e-07, "loss": -0.0145, "step": 958 }, { "clip_ratio/high_max": 0.002157790666387882, "clip_ratio/high_mean": 0.0008444769246125361, "clip_ratio/low_mean": 0.0008066398095252225, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001651116708671907, "epoch": 9.886297376093294, "grad_norm": 0.1669265329837799, "learning_rate": 7.5e-07, "loss": 0.0201, "step": 959 }, { "clip_ratio/high_max": 0.0021844661387149245, "clip_ratio/high_mean": 0.0008355123845831258, "clip_ratio/low_mean": 0.0006268367360462435, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014623491260863375, "epoch": 9.895626822157434, "grad_norm": 0.16624131798744202, "learning_rate": 7.5e-07, "loss": -0.0254, "step": 960 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0761021205357143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4043.0, "completions/mean_length": 836.7007446289062, "completions/mean_terminated_length": 568.2299194335938, "completions/min_length": 108.0, "completions/min_terminated_length": 108.0, "epoch": 10.00932944606414, "grad_norm": 0.1887584626674652, "learning_rate": 7.5e-07, "loss": -0.0188, "num_tokens": 567391425.0, "reward": 0.640694797039032, "reward_std": 0.14633029699325562, "rewards/simpleverify_reward/mean": 0.6406947374343872, "rewards/simpleverify_reward/std": 0.4798135459423065, "step": 961 }, { "clip_ratio/high_max": 0.002044711283815559, "clip_ratio/high_mean": 0.00079813623415248, "clip_ratio/low_mean": 0.00038353143781932886, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011816676815215033, "epoch": 10.018658892128279, "grad_norm": 0.17504893243312836, "learning_rate": 7.5e-07, "loss": -0.0528, "step": 962 }, { "clip_ratio/high_max": 0.001778903322701808, "clip_ratio/high_mean": 0.0006091165269026533, "clip_ratio/low_mean": 0.00044678425410893396, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010559007750998717, "epoch": 10.02798833819242, "grad_norm": 0.14594250917434692, "learning_rate": 7.5e-07, "loss": -0.0325, "step": 963 }, { "clip_ratio/high_max": 0.001704761765722651, "clip_ratio/high_mean": 0.0006280112647800706, "clip_ratio/low_mean": 0.0003441668909545115, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009721781461848877, "epoch": 10.03731778425656, "grad_norm": 0.197650745511055, "learning_rate": 7.5e-07, "loss": -0.0212, "step": 964 }, { "clip_ratio/high_max": 0.0015850466179472278, "clip_ratio/high_mean": 0.000647898169745531, "clip_ratio/low_mean": 0.0004162742516200524, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010641724347806303, "epoch": 10.0466472303207, "grad_norm": 0.1427040547132492, "learning_rate": 7.5e-07, "loss": -0.0555, "step": 965 }, { "clip_ratio/high_max": 0.0020534890863928013, "clip_ratio/high_mean": 0.0008334254271176178, "clip_ratio/low_mean": 0.0004920281289741979, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013254535697342362, "epoch": 10.055976676384839, "grad_norm": 0.1578725129365921, "learning_rate": 7.5e-07, "loss": -0.0763, "step": 966 }, { "clip_ratio/high_max": 0.0019188011283404194, "clip_ratio/high_mean": 0.0007108524023351492, "clip_ratio/low_mean": 0.0005409228815551614, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012517753093561623, "epoch": 10.06530612244898, "grad_norm": 0.16799408197402954, "learning_rate": 7.5e-07, "loss": -0.007, "step": 967 }, { "clip_ratio/high_max": 0.001761615159921348, "clip_ratio/high_mean": 0.0007244888274726691, "clip_ratio/low_mean": 0.0005333939476486194, "clip_ratio/low_min": 2.0559211407089606e-05, "clip_ratio/region_mean": 0.0012578827809193172, "epoch": 10.07463556851312, "grad_norm": 0.18024101853370667, "learning_rate": 7.5e-07, "loss": -0.053, "step": 968 }, { "clip_ratio/high_max": 0.002064311103822547, "clip_ratio/high_mean": 0.0007158196904128999, "clip_ratio/low_mean": 0.0006683695373794762, "clip_ratio/low_min": 2.5920368898368906e-05, "clip_ratio/region_mean": 0.0013841892541677225, "epoch": 10.08396501457726, "grad_norm": 0.19472025334835052, "learning_rate": 7.5e-07, "loss": 0.0127, "step": 969 }, { "clip_ratio/high_max": 0.001881156102172099, "clip_ratio/high_mean": 0.0006630875604969333, "clip_ratio/low_mean": 0.0005725146702388884, "clip_ratio/low_min": 2.7195767870580312e-05, "clip_ratio/region_mean": 0.0012356022089079488, "epoch": 10.093294460641399, "grad_norm": 0.17317280173301697, "learning_rate": 7.5e-07, "loss": 0.0038, "step": 970 }, { "clip_ratio/high_max": 0.001571766821143683, "clip_ratio/high_mean": 0.0006044599795131944, "clip_ratio/low_mean": 0.0006312891327979742, "clip_ratio/low_min": 1.486325800215127e-05, "clip_ratio/region_mean": 0.0012357490886643063, "epoch": 10.102623906705539, "grad_norm": 0.15381604433059692, "learning_rate": 7.5e-07, "loss": -0.0089, "step": 971 }, { "clip_ratio/high_max": 0.0018635824344528373, "clip_ratio/high_mean": 0.0007205305246316129, "clip_ratio/low_mean": 0.0005344028204490314, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012549333296192344, "epoch": 10.11195335276968, "grad_norm": 0.14960844814777374, "learning_rate": 7.5e-07, "loss": -0.058, "step": 972 }, { "clip_ratio/high_max": 0.002143511643225793, "clip_ratio/high_mean": 0.0007856447773519903, "clip_ratio/low_mean": 0.0007216957619675668, "clip_ratio/low_min": 2.710908665903844e-05, "clip_ratio/region_mean": 0.001507340512034716, "epoch": 10.12128279883382, "grad_norm": 0.14762112498283386, "learning_rate": 7.5e-07, "loss": -0.0508, "step": 973 }, { "clip_ratio/high_max": 0.0018401051856926642, "clip_ratio/high_mean": 0.0007609011645399733, "clip_ratio/low_mean": 0.0005558805405598832, "clip_ratio/low_min": 1.5968318621162325e-05, "clip_ratio/region_mean": 0.0013167817269277293, "epoch": 10.130612244897959, "grad_norm": 0.149007648229599, "learning_rate": 7.5e-07, "loss": -0.0238, "step": 974 }, { "clip_ratio/high_max": 0.002119459240930155, "clip_ratio/high_mean": 0.0007970879269123543, "clip_ratio/low_mean": 0.0006230652537624337, "clip_ratio/low_min": 1.2204647646285594e-05, "clip_ratio/region_mean": 0.001420153184881201, "epoch": 10.139941690962099, "grad_norm": 0.19831141829490662, "learning_rate": 7.5e-07, "loss": -0.0175, "step": 975 }, { "clip_ratio/high_max": 0.0019942672333854716, "clip_ratio/high_mean": 0.0007559655841760105, "clip_ratio/low_mean": 0.0006175389153213473, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001373504532239167, "epoch": 10.14927113702624, "grad_norm": 0.16659224033355713, "learning_rate": 7.5e-07, "loss": 0.0005, "step": 976 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0811244419642857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4040.0, "completions/mean_length": 856.4550170898438, "completions/mean_terminated_length": 570.4464721679688, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 10.15860058309038, "grad_norm": 0.16063711047172546, "learning_rate": 7.5e-07, "loss": -0.0471, "num_tokens": 576343580.0, "reward": 0.6507394313812256, "reward_std": 0.1522856205701828, "rewards/simpleverify_reward/mean": 0.6507393717765808, "rewards/simpleverify_reward/std": 0.4767530560493469, "step": 977 }, { "clip_ratio/high_max": 0.0017855069927463774, "clip_ratio/high_mean": 0.0006443982510973001, "clip_ratio/low_mean": 0.00040330630690732505, "clip_ratio/low_min": 1.826417246775236e-05, "clip_ratio/region_mean": 0.0010477045852894662, "epoch": 10.167930029154519, "grad_norm": 0.167107492685318, "learning_rate": 7.5e-07, "loss": -0.0067, "step": 978 }, { "clip_ratio/high_max": 0.0015722766693215817, "clip_ratio/high_mean": 0.0006616430255235173, "clip_ratio/low_mean": 0.0005117030214023544, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001173346048744861, "epoch": 10.177259475218658, "grad_norm": 0.14947324991226196, "learning_rate": 7.5e-07, "loss": 0.0031, "step": 979 }, { "clip_ratio/high_max": 0.0017708120576571673, "clip_ratio/high_mean": 0.0006873639340483351, "clip_ratio/low_mean": 0.0005619841376756085, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012493480971897952, "epoch": 10.186588921282798, "grad_norm": 0.1742767095565796, "learning_rate": 7.5e-07, "loss": 0.0292, "step": 980 }, { "clip_ratio/high_max": 0.0019446479273028672, "clip_ratio/high_mean": 0.0007938771268527489, "clip_ratio/low_mean": 0.000461813828223967, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00125569094234379, "epoch": 10.19591836734694, "grad_norm": 4754.72021484375, "learning_rate": 7.5e-07, "loss": 0.0361, "step": 981 }, { "clip_ratio/high_max": 0.0019893186836270615, "clip_ratio/high_mean": 0.0007208511506178183, "clip_ratio/low_mean": 0.0004042778641633049, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011251290306972805, "epoch": 10.205247813411079, "grad_norm": 0.16699397563934326, "learning_rate": 7.5e-07, "loss": -0.0447, "step": 982 }, { "clip_ratio/high_max": 0.001916214998345822, "clip_ratio/high_mean": 0.0007380194401775952, "clip_ratio/low_mean": 0.00044226352360965393, "clip_ratio/low_min": 1.5056612937769387e-05, "clip_ratio/region_mean": 0.0011802829430962447, "epoch": 10.214577259475218, "grad_norm": 0.170072540640831, "learning_rate": 7.5e-07, "loss": -0.0571, "step": 983 }, { "clip_ratio/high_max": 0.001889922386908438, "clip_ratio/high_mean": 0.00079181301407516, "clip_ratio/low_mean": 0.00058292555877415, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013747385928581934, "epoch": 10.223906705539358, "grad_norm": 0.16093337535858154, "learning_rate": 7.5e-07, "loss": -0.0494, "step": 984 }, { "clip_ratio/high_max": 0.001878786955785472, "clip_ratio/high_mean": 0.0007598587671964196, "clip_ratio/low_mean": 0.00047677633574494394, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001236635089298943, "epoch": 10.2332361516035, "grad_norm": 0.16168569028377533, "learning_rate": 7.5e-07, "loss": -0.0529, "step": 985 }, { "clip_ratio/high_max": 0.0023514232743764296, "clip_ratio/high_mean": 0.0008351171945832903, "clip_ratio/low_mean": 0.0006149881482997444, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014501053228741512, "epoch": 10.242565597667639, "grad_norm": 0.18956375122070312, "learning_rate": 7.5e-07, "loss": 0.0034, "step": 986 }, { "clip_ratio/high_max": 0.002503912772226613, "clip_ratio/high_mean": 0.0008611267257947475, "clip_ratio/low_mean": 0.0005510949295057799, "clip_ratio/low_min": 1.2902560229122173e-05, "clip_ratio/region_mean": 0.0014122216671239585, "epoch": 10.251895043731778, "grad_norm": 0.165995255112648, "learning_rate": 7.5e-07, "loss": -0.0322, "step": 987 }, { "clip_ratio/high_max": 0.0020468187649385072, "clip_ratio/high_mean": 0.0008722833044885192, "clip_ratio/low_mean": 0.0005151451960045961, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013874285250494722, "epoch": 10.261224489795918, "grad_norm": 0.21341075003147125, "learning_rate": 7.5e-07, "loss": -0.0317, "step": 988 }, { "clip_ratio/high_max": 0.0018791932525346056, "clip_ratio/high_mean": 0.000733352424504119, "clip_ratio/low_mean": 0.0005676259920619486, "clip_ratio/low_min": 2.6875299226958305e-05, "clip_ratio/region_mean": 0.0013009783906454686, "epoch": 10.270553935860057, "grad_norm": 0.20393885672092438, "learning_rate": 7.5e-07, "loss": -0.0434, "step": 989 }, { "clip_ratio/high_max": 0.002096100914059207, "clip_ratio/high_mean": 0.0007750797649350716, "clip_ratio/low_mean": 0.0006070634281059029, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001382143182127038, "epoch": 10.279883381924199, "grad_norm": 0.15591152012348175, "learning_rate": 7.5e-07, "loss": -0.0434, "step": 990 }, { "clip_ratio/high_max": 0.0021168336097616702, "clip_ratio/high_mean": 0.0007781881222399534, "clip_ratio/low_mean": 0.0006208370168678812, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013990251391078345, "epoch": 10.289212827988338, "grad_norm": 0.16634167730808258, "learning_rate": 7.5e-07, "loss": -0.0133, "step": 991 }, { "clip_ratio/high_max": 0.002369063873629784, "clip_ratio/high_mean": 0.0009913682442856953, "clip_ratio/low_mean": 0.0005980406886010314, "clip_ratio/low_min": 3.3958163839997724e-05, "clip_ratio/region_mean": 0.0015894089156063274, "epoch": 10.298542274052478, "grad_norm": 0.18935547769069672, "learning_rate": 7.5e-07, "loss": -0.0469, "step": 992 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0756138392857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4082.0, "completions/mean_length": 828.9551391601562, "completions/mean_terminated_length": 561.7141723632812, "completions/min_length": 109.0, "completions/min_terminated_length": 109.0, "epoch": 10.307871720116617, "grad_norm": 0.15046265721321106, "learning_rate": 7.5e-07, "loss": -0.0267, "num_tokens": 585173328.0, "reward": 0.656808078289032, "reward_std": 0.14057818055152893, "rewards/simpleverify_reward/mean": 0.6568080186843872, "rewards/simpleverify_reward/std": 0.4747914671897888, "step": 993 }, { "clip_ratio/high_max": 0.001520193171018036, "clip_ratio/high_mean": 0.0005750897935286048, "clip_ratio/low_mean": 0.0004643500469683204, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010394398632342927, "epoch": 10.317201166180759, "grad_norm": 0.14093178510665894, "learning_rate": 7.5e-07, "loss": 0.0117, "step": 994 }, { "clip_ratio/high_max": 0.0015577074736938812, "clip_ratio/high_mean": 0.0005835704614582937, "clip_ratio/low_mean": 0.0003927444176952122, "clip_ratio/low_min": 1.7433751054340973e-05, "clip_ratio/region_mean": 0.000976314906438347, "epoch": 10.326530612244898, "grad_norm": 0.14680549502372742, "learning_rate": 7.5e-07, "loss": -0.0166, "step": 995 }, { "clip_ratio/high_max": 0.0016895450971787795, "clip_ratio/high_mean": 0.0006919495781403384, "clip_ratio/low_mean": 0.0003389651878933364, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010309148092346732, "epoch": 10.335860058309038, "grad_norm": 0.17604698240756989, "learning_rate": 7.5e-07, "loss": -0.0338, "step": 996 }, { "clip_ratio/high_max": 0.0018716214326559566, "clip_ratio/high_mean": 0.0006861498059151927, "clip_ratio/low_mean": 0.0004882852463197196, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011744350613298593, "epoch": 10.345189504373177, "grad_norm": 0.15988214313983917, "learning_rate": 7.5e-07, "loss": -0.0357, "step": 997 }, { "clip_ratio/high_max": 0.0019156088965246454, "clip_ratio/high_mean": 0.0007409749869111693, "clip_ratio/low_mean": 0.00042085789482371183, "clip_ratio/low_min": 1.1107162208645605e-05, "clip_ratio/region_mean": 0.0011618329008342698, "epoch": 10.354518950437317, "grad_norm": 0.14632195234298706, "learning_rate": 7.5e-07, "loss": -0.0184, "step": 998 }, { "clip_ratio/high_max": 0.0018694493046496063, "clip_ratio/high_mean": 0.0007118720877770102, "clip_ratio/low_mean": 0.0005283356967993313, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001240207755472511, "epoch": 10.363848396501458, "grad_norm": 0.17512989044189453, "learning_rate": 7.5e-07, "loss": -0.0275, "step": 999 }, { "clip_ratio/high_max": 0.0020950971957063302, "clip_ratio/high_mean": 0.0007345915146288462, "clip_ratio/low_mean": 0.0005415263271970616, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012761178477376234, "epoch": 10.373177842565598, "grad_norm": 0.1792673021554947, "learning_rate": 7.5e-07, "loss": -0.0456, "step": 1000 }, { "clip_ratio/high_max": 0.002085058698867215, "clip_ratio/high_mean": 0.0008191351521418255, "clip_ratio/low_mean": 0.0004952104254698497, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013143455507815816, "epoch": 10.382507288629737, "grad_norm": 0.17341819405555725, "learning_rate": 7.5e-07, "loss": -0.0618, "step": 1001 }, { "clip_ratio/high_max": 0.002187274742027512, "clip_ratio/high_mean": 0.0009033346050273394, "clip_ratio/low_mean": 0.0005730096709157806, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014763442741241306, "epoch": 10.391836734693877, "grad_norm": 0.19216933846473694, "learning_rate": 7.5e-07, "loss": -0.0398, "step": 1002 }, { "clip_ratio/high_max": 0.002000523774768226, "clip_ratio/high_mean": 0.0006770057389076101, "clip_ratio/low_mean": 0.0004933771451760549, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011703828640747815, "epoch": 10.401166180758018, "grad_norm": 0.18383261561393738, "learning_rate": 7.5e-07, "loss": -0.0031, "step": 1003 }, { "clip_ratio/high_max": 0.0019544190072338097, "clip_ratio/high_mean": 0.0006280423958742176, "clip_ratio/low_mean": 0.0005583231641139719, "clip_ratio/low_min": 1.5879064449109137e-05, "clip_ratio/region_mean": 0.0011863655745401047, "epoch": 10.410495626822158, "grad_norm": 0.15856613218784332, "learning_rate": 7.5e-07, "loss": -0.0119, "step": 1004 }, { "clip_ratio/high_max": 0.0018170406328863464, "clip_ratio/high_mean": 0.0007102571676114167, "clip_ratio/low_mean": 0.000589154797125957, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012994119824725203, "epoch": 10.419825072886297, "grad_norm": 0.18972980976104736, "learning_rate": 7.5e-07, "loss": -0.0483, "step": 1005 }, { "clip_ratio/high_max": 0.0024534263066016138, "clip_ratio/high_mean": 0.0008996664901133045, "clip_ratio/low_mean": 0.0005496131479958422, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014492796690319665, "epoch": 10.429154518950437, "grad_norm": 0.1626349240541458, "learning_rate": 7.5e-07, "loss": -0.0425, "step": 1006 }, { "clip_ratio/high_max": 0.0018537559881224297, "clip_ratio/high_mean": 0.0007918584888102487, "clip_ratio/low_mean": 0.0005865130688107456, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013783715512545314, "epoch": 10.438483965014576, "grad_norm": 0.23469623923301697, "learning_rate": 7.5e-07, "loss": -0.0344, "step": 1007 }, { "clip_ratio/high_max": 0.0018188049980381038, "clip_ratio/high_mean": 0.0006948776190256467, "clip_ratio/low_mean": 0.0006030180720699718, "clip_ratio/low_min": 1.7536476661916822e-05, "clip_ratio/region_mean": 0.0012978956729057245, "epoch": 10.447813411078718, "grad_norm": 0.15437249839305878, "learning_rate": 7.5e-07, "loss": -0.0235, "step": 1008 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0725446428571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 3889.0, "completions/mean_length": 808.5235595703125, "completions/mean_terminated_length": 551.38037109375, "completions/min_length": 126.0, "completions/min_terminated_length": 126.0, "epoch": 10.457142857142857, "grad_norm": 0.1712609976530075, "learning_rate": 7.5e-07, "loss": -0.0098, "num_tokens": 593948713.0, "reward": 0.657784640789032, "reward_std": 0.13935622572898865, "rewards/simpleverify_reward/mean": 0.6577845811843872, "rewards/simpleverify_reward/std": 0.47446781396865845, "step": 1009 }, { "clip_ratio/high_max": 0.001575219830556307, "clip_ratio/high_mean": 0.0005943403220953769, "clip_ratio/low_mean": 0.0003991062694694847, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000993446599750314, "epoch": 10.466472303206997, "grad_norm": 0.1575571894645691, "learning_rate": 7.5e-07, "loss": -0.0044, "step": 1010 }, { "clip_ratio/high_max": 0.0014924662609701045, "clip_ratio/high_mean": 0.0006000772318657255, "clip_ratio/low_mean": 0.00043167758667550515, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010317547803424532, "epoch": 10.475801749271136, "grad_norm": 0.17375950515270233, "learning_rate": 7.5e-07, "loss": -0.0008, "step": 1011 }, { "clip_ratio/high_max": 0.0016259059157164302, "clip_ratio/high_mean": 0.0007191824515757617, "clip_ratio/low_mean": 0.0004434649231370713, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011626473788055591, "epoch": 10.485131195335278, "grad_norm": 0.16739006340503693, "learning_rate": 7.5e-07, "loss": -0.0304, "step": 1012 }, { "clip_ratio/high_max": 0.0018819572833308484, "clip_ratio/high_mean": 0.0007163993286667392, "clip_ratio/low_mean": 0.00043550640793910134, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011519057297846302, "epoch": 10.494460641399417, "grad_norm": 0.1779203861951828, "learning_rate": 7.5e-07, "loss": -0.0049, "step": 1013 }, { "clip_ratio/high_max": 0.0018260209872096311, "clip_ratio/high_mean": 0.0006438095188059378, "clip_ratio/low_mean": 0.0005049105657235486, "clip_ratio/low_min": 1.7970098269870505e-05, "clip_ratio/region_mean": 0.0011487200863484759, "epoch": 10.503790087463557, "grad_norm": 0.1553099900484085, "learning_rate": 7.5e-07, "loss": -0.0437, "step": 1014 }, { "clip_ratio/high_max": 0.001713368223136058, "clip_ratio/high_mean": 0.0006778917249903316, "clip_ratio/low_mean": 0.0004422063266247278, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011200980716239428, "epoch": 10.513119533527696, "grad_norm": 0.17738322913646698, "learning_rate": 7.5e-07, "loss": -0.0224, "step": 1015 }, { "clip_ratio/high_max": 0.0022794718024670146, "clip_ratio/high_mean": 0.0008221336711358163, "clip_ratio/low_mean": 0.00046112203472148394, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012832557149522472, "epoch": 10.522448979591836, "grad_norm": 0.14491406083106995, "learning_rate": 7.5e-07, "loss": -0.0583, "step": 1016 }, { "clip_ratio/high_max": 0.0016075826752057765, "clip_ratio/high_mean": 0.0006199282142915763, "clip_ratio/low_mean": 0.0004784105299222574, "clip_ratio/low_min": 2.313957884325646e-05, "clip_ratio/region_mean": 0.0010983387182932347, "epoch": 10.531778425655977, "grad_norm": 0.14955724775791168, "learning_rate": 7.5e-07, "loss": -0.0025, "step": 1017 }, { "clip_ratio/high_max": 0.0018046355344267795, "clip_ratio/high_mean": 0.0007125942943275732, "clip_ratio/low_mean": 0.0005000754945285735, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012126698056817986, "epoch": 10.541107871720117, "grad_norm": 0.1690232753753662, "learning_rate": 7.5e-07, "loss": -0.0509, "step": 1018 }, { "clip_ratio/high_max": 0.0017176535548060201, "clip_ratio/high_mean": 0.000655751458452869, "clip_ratio/low_mean": 0.000522065990480769, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011778174484788906, "epoch": 10.550437317784256, "grad_norm": 0.1710786372423172, "learning_rate": 7.5e-07, "loss": 0.0081, "step": 1019 }, { "clip_ratio/high_max": 0.0024136268330039456, "clip_ratio/high_mean": 0.0007905744732852327, "clip_ratio/low_mean": 0.0005074686787338578, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012980431420146488, "epoch": 10.559766763848396, "grad_norm": 0.14702607691287994, "learning_rate": 7.5e-07, "loss": -0.0383, "step": 1020 }, { "clip_ratio/high_max": 0.001984505455766339, "clip_ratio/high_mean": 0.0007550536356575321, "clip_ratio/low_mean": 0.00048166077431233134, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00123671438996098, "epoch": 10.569096209912537, "grad_norm": 0.490422785282135, "learning_rate": 7.5e-07, "loss": -0.0248, "step": 1021 }, { "clip_ratio/high_max": 0.0018466023939254228, "clip_ratio/high_mean": 0.0007525923483626684, "clip_ratio/low_mean": 0.0005314655445545213, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012840579001931474, "epoch": 10.578425655976677, "grad_norm": 0.1615360826253891, "learning_rate": 7.5e-07, "loss": -0.033, "step": 1022 }, { "clip_ratio/high_max": 0.0016221290243265685, "clip_ratio/high_mean": 0.0007478583393094596, "clip_ratio/low_mean": 0.0005390829801399377, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012869413330918178, "epoch": 10.587755102040816, "grad_norm": 0.15261238813400269, "learning_rate": 7.5e-07, "loss": -0.0359, "step": 1023 }, { "clip_ratio/high_max": 0.002176244117435999, "clip_ratio/high_mean": 0.0008734364691918017, "clip_ratio/low_mean": 0.00047866384466033196, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013521003202185966, "epoch": 10.597084548104956, "grad_norm": 0.1291610449552536, "learning_rate": 7.5e-07, "loss": -0.0862, "step": 1024 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.07373046875, "completions/max_length": 4096.0, "completions/max_terminated_length": 4004.0, "completions/mean_length": 822.9450073242188, "completions/mean_terminated_length": 562.411865234375, "completions/min_length": 89.0, "completions/min_terminated_length": 89.0, "epoch": 10.606413994169095, "grad_norm": 0.18045377731323242, "learning_rate": 7.5e-07, "loss": 0.0061, "num_tokens": 602844348.0, "reward": 0.6595982313156128, "reward_std": 0.14644524455070496, "rewards/simpleverify_reward/mean": 0.6595982313156128, "rewards/simpleverify_reward/std": 0.47386083006858826, "step": 1025 }, { "clip_ratio/high_max": 0.001807376742362976, "clip_ratio/high_mean": 0.0007138318851502845, "clip_ratio/low_mean": 0.0004179842728717631, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011318161632516421, "epoch": 10.615743440233237, "grad_norm": 0.15715162456035614, "learning_rate": 7.5e-07, "loss": -0.0507, "step": 1026 }, { "clip_ratio/high_max": 0.0017808724005590193, "clip_ratio/high_mean": 0.0007010268259364238, "clip_ratio/low_mean": 0.00047141446975729195, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001172441290691495, "epoch": 10.625072886297376, "grad_norm": 0.19799862802028656, "learning_rate": 7.5e-07, "loss": -0.0417, "step": 1027 }, { "clip_ratio/high_max": 0.0017953524948097765, "clip_ratio/high_mean": 0.0006999482093306142, "clip_ratio/low_mean": 0.00039871631906862604, "clip_ratio/low_min": 2.734631380008068e-05, "clip_ratio/region_mean": 0.0010986645393131766, "epoch": 10.634402332361516, "grad_norm": 0.16877512633800507, "learning_rate": 7.5e-07, "loss": -0.0139, "step": 1028 }, { "clip_ratio/high_max": 0.0015241431101458147, "clip_ratio/high_mean": 0.0006263241848500911, "clip_ratio/low_mean": 0.0004854099488511565, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011117341382487211, "epoch": 10.643731778425655, "grad_norm": 0.15509361028671265, "learning_rate": 7.5e-07, "loss": -0.0288, "step": 1029 }, { "clip_ratio/high_max": 0.001872280154202599, "clip_ratio/high_mean": 0.0006925768466317095, "clip_ratio/low_mean": 0.0005233689321357815, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012159457837697119, "epoch": 10.653061224489797, "grad_norm": 0.15556122362613678, "learning_rate": 7.5e-07, "loss": -0.0392, "step": 1030 }, { "clip_ratio/high_max": 0.001977934625756461, "clip_ratio/high_mean": 0.0006898925948917167, "clip_ratio/low_mean": 0.0004153118120484578, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011052044173993636, "epoch": 10.662390670553936, "grad_norm": 0.19876763224601746, "learning_rate": 7.5e-07, "loss": -0.0191, "step": 1031 }, { "clip_ratio/high_max": 0.0016955022038018797, "clip_ratio/high_mean": 0.0005993994482196285, "clip_ratio/low_mean": 0.00046560937880713027, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010650088515831158, "epoch": 10.671720116618076, "grad_norm": 0.14248724281787872, "learning_rate": 7.5e-07, "loss": -0.0481, "step": 1032 }, { "clip_ratio/high_max": 0.0019936932367272675, "clip_ratio/high_mean": 0.0008255466309492476, "clip_ratio/low_mean": 0.0004925487201035139, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001318095386523055, "epoch": 10.681049562682215, "grad_norm": 0.14940743148326874, "learning_rate": 7.5e-07, "loss": -0.0704, "step": 1033 }, { "clip_ratio/high_max": 0.002076965000014752, "clip_ratio/high_mean": 0.0007288965571206063, "clip_ratio/low_mean": 0.0006002419568176265, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001329138543951558, "epoch": 10.690379008746355, "grad_norm": 0.16334375739097595, "learning_rate": 7.5e-07, "loss": -0.0185, "step": 1034 }, { "clip_ratio/high_max": 0.0018806903681252152, "clip_ratio/high_mean": 0.000685962128045503, "clip_ratio/low_mean": 0.0004741820330309565, "clip_ratio/low_min": 2.0498524463619106e-05, "clip_ratio/region_mean": 0.0011601441619859543, "epoch": 10.699708454810496, "grad_norm": 0.15224264562129974, "learning_rate": 7.5e-07, "loss": -0.066, "step": 1035 }, { "clip_ratio/high_max": 0.0019941230857511982, "clip_ratio/high_mean": 0.0008142519072862342, "clip_ratio/low_mean": 0.0006262071387936885, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014404590201593237, "epoch": 10.709037900874636, "grad_norm": 0.1702098548412323, "learning_rate": 7.5e-07, "loss": -0.0212, "step": 1036 }, { "clip_ratio/high_max": 0.0018144278910767753, "clip_ratio/high_mean": 0.0007186246439232491, "clip_ratio/low_mean": 0.0005756945029133931, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012943191650265362, "epoch": 10.718367346938775, "grad_norm": 4.509218692779541, "learning_rate": 7.5e-07, "loss": 0.0152, "step": 1037 }, { "clip_ratio/high_max": 0.0016950013559835497, "clip_ratio/high_mean": 0.0006672124172837357, "clip_ratio/low_mean": 0.0007318546340684406, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001399067015881883, "epoch": 10.727696793002915, "grad_norm": 0.18219859898090363, "learning_rate": 7.5e-07, "loss": 0.0219, "step": 1038 }, { "clip_ratio/high_max": 0.0022781737498007715, "clip_ratio/high_mean": 0.0008850893991620978, "clip_ratio/low_mean": 0.0006730001978212385, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015580895669700112, "epoch": 10.737026239067056, "grad_norm": 0.1742575317621231, "learning_rate": 7.5e-07, "loss": -0.0552, "step": 1039 }, { "clip_ratio/high_max": 0.0020943154449923895, "clip_ratio/high_mean": 0.0007800316725479206, "clip_ratio/low_mean": 0.0007661519939574646, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015461836483154912, "epoch": 10.746355685131196, "grad_norm": 0.20221158862113953, "learning_rate": 7.5e-07, "loss": 0.0018, "step": 1040 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0817522321428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 4066.0, "completions/mean_length": 843.7453002929688, "completions/mean_terminated_length": 554.1947631835938, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 10.755685131195335, "grad_norm": 0.17919409275054932, "learning_rate": 7.5e-07, "loss": -0.0074, "num_tokens": 611564832.0, "reward": 0.662667453289032, "reward_std": 0.14915898442268372, "rewards/simpleverify_reward/mean": 0.6626673936843872, "rewards/simpleverify_reward/std": 0.47281593084335327, "step": 1041 }, { "clip_ratio/high_max": 0.0017612973824725486, "clip_ratio/high_mean": 0.0007816302440915024, "clip_ratio/low_mean": 0.00040829759200278204, "clip_ratio/low_min": 1.2322555448918138e-05, "clip_ratio/region_mean": 0.0011899278506461997, "epoch": 10.765014577259475, "grad_norm": 0.19473609328269958, "learning_rate": 7.5e-07, "loss": -0.0724, "step": 1042 }, { "clip_ratio/high_max": 0.001491226259531686, "clip_ratio/high_mean": 0.0005778819304396166, "clip_ratio/low_mean": 0.0004466062891879119, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001024488208713592, "epoch": 10.774344023323614, "grad_norm": 0.15037822723388672, "learning_rate": 7.5e-07, "loss": -0.0393, "step": 1043 }, { "clip_ratio/high_max": 0.0016656623265589587, "clip_ratio/high_mean": 0.0006393973271769937, "clip_ratio/low_mean": 0.00042237111392751103, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010617684438329889, "epoch": 10.783673469387756, "grad_norm": 0.17169658839702606, "learning_rate": 7.5e-07, "loss": -0.0551, "step": 1044 }, { "clip_ratio/high_max": 0.001845007143856492, "clip_ratio/high_mean": 0.0006820388261985499, "clip_ratio/low_mean": 0.0004673666308008251, "clip_ratio/low_min": 1.5240185348375235e-05, "clip_ratio/region_mean": 0.0011494053978822194, "epoch": 10.793002915451895, "grad_norm": 0.1718200147151947, "learning_rate": 7.5e-07, "loss": -0.0311, "step": 1045 }, { "clip_ratio/high_max": 0.0017487164659542032, "clip_ratio/high_mean": 0.0006984607662161579, "clip_ratio/low_mean": 0.000494446015181893, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001192906784126535, "epoch": 10.802332361516035, "grad_norm": 0.18627245724201202, "learning_rate": 7.5e-07, "loss": 0.0147, "step": 1046 }, { "clip_ratio/high_max": 0.0018676652289286721, "clip_ratio/high_mean": 0.0007404244697681861, "clip_ratio/low_mean": 0.00040297763553098775, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011434021253080573, "epoch": 10.811661807580174, "grad_norm": 0.16078606247901917, "learning_rate": 7.5e-07, "loss": -0.0353, "step": 1047 }, { "clip_ratio/high_max": 0.0017141080170404166, "clip_ratio/high_mean": 0.0007585975890833652, "clip_ratio/low_mean": 0.000517728863087541, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012763264930981677, "epoch": 10.820991253644316, "grad_norm": 0.6979455947875977, "learning_rate": 7.5e-07, "loss": -0.0178, "step": 1048 }, { "clip_ratio/high_max": 0.0017546399831189774, "clip_ratio/high_mean": 0.0006824590200267266, "clip_ratio/low_mean": 0.0004719545240732259, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011544135340955108, "epoch": 10.830320699708455, "grad_norm": 0.1783263236284256, "learning_rate": 7.5e-07, "loss": -0.027, "step": 1049 }, { "clip_ratio/high_max": 0.001850803480920149, "clip_ratio/high_mean": 0.0007268584899975394, "clip_ratio/low_mean": 0.0006075695000617998, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013344279905140866, "epoch": 10.839650145772595, "grad_norm": 0.19904324412345886, "learning_rate": 7.5e-07, "loss": -0.0245, "step": 1050 }, { "clip_ratio/high_max": 0.0021272301237331703, "clip_ratio/high_mean": 0.0008310053735840484, "clip_ratio/low_mean": 0.0005401404023359646, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00137114578501496, "epoch": 10.848979591836734, "grad_norm": 0.1609840989112854, "learning_rate": 7.5e-07, "loss": -0.0226, "step": 1051 }, { "clip_ratio/high_max": 0.0025743976948433556, "clip_ratio/high_mean": 0.0009049622949532932, "clip_ratio/low_mean": 0.00044860419620817993, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013535664656956214, "epoch": 10.858309037900874, "grad_norm": 0.27624768018722534, "learning_rate": 7.5e-07, "loss": -0.0351, "step": 1052 }, { "clip_ratio/high_max": 0.0020464279950829223, "clip_ratio/high_mean": 0.0008730376612220425, "clip_ratio/low_mean": 0.0005235824291958124, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013966200531285722, "epoch": 10.867638483965015, "grad_norm": 0.1589779555797577, "learning_rate": 7.5e-07, "loss": -0.0559, "step": 1053 }, { "clip_ratio/high_max": 0.002182643405831186, "clip_ratio/high_mean": 0.0009643503199185943, "clip_ratio/low_mean": 0.0006626094771036151, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001626959812710993, "epoch": 10.876967930029155, "grad_norm": 0.1466217041015625, "learning_rate": 7.5e-07, "loss": -0.051, "step": 1054 }, { "clip_ratio/high_max": 0.002466208767145872, "clip_ratio/high_mean": 0.001022666932840366, "clip_ratio/low_mean": 0.0005001501488095528, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001522817063232651, "epoch": 10.886297376093294, "grad_norm": 0.1965305507183075, "learning_rate": 7.5e-07, "loss": -0.0912, "step": 1055 }, { "clip_ratio/high_max": 0.0022782884261687286, "clip_ratio/high_mean": 0.0008708604109415319, "clip_ratio/low_mean": 0.0005510707660505432, "clip_ratio/low_min": 1.7313019270659424e-05, "clip_ratio/region_mean": 0.0014219311451597605, "epoch": 10.895626822157434, "grad_norm": 0.16084864735603333, "learning_rate": 7.5e-07, "loss": -0.0086, "step": 1056 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08935546875, "completions/max_length": 4096.0, "completions/max_terminated_length": 4062.0, "completions/mean_length": 887.2882690429688, "completions/mean_terminated_length": 572.4387817382812, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 11.00932944606414, "grad_norm": 0.1776704639196396, "learning_rate": 7.5e-07, "loss": -0.0371, "num_tokens": 620474300.0, "reward": 0.640206515789032, "reward_std": 0.14993953704833984, "rewards/simpleverify_reward/mean": 0.6402064561843872, "rewards/simpleverify_reward/std": 0.4799564778804779, "step": 1057 }, { "clip_ratio/high_max": 0.001976400712010218, "clip_ratio/high_mean": 0.0007371092169705662, "clip_ratio/low_mean": 0.00042570448022161145, "clip_ratio/low_min": 2.0187339032418095e-05, "clip_ratio/region_mean": 0.0011628137035586406, "epoch": 11.018658892128279, "grad_norm": 0.16225765645503998, "learning_rate": 7.5e-07, "loss": -0.0319, "step": 1058 }, { "clip_ratio/high_max": 0.0016837114017107524, "clip_ratio/high_mean": 0.0006649296556133777, "clip_ratio/low_mean": 0.0004431778640991979, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00110810750265955, "epoch": 11.02798833819242, "grad_norm": 0.1509750336408615, "learning_rate": 7.5e-07, "loss": -0.0406, "step": 1059 }, { "clip_ratio/high_max": 0.0020541488665912766, "clip_ratio/high_mean": 0.0007369838676822837, "clip_ratio/low_mean": 0.0004280640478100395, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011650479355012067, "epoch": 11.03731778425656, "grad_norm": 0.16393734514713287, "learning_rate": 7.5e-07, "loss": -0.0819, "step": 1060 }, { "clip_ratio/high_max": 0.001711178931145696, "clip_ratio/high_mean": 0.0006311000652203802, "clip_ratio/low_mean": 0.0004814418562091305, "clip_ratio/low_min": 8.523114956915379e-06, "clip_ratio/region_mean": 0.0011125419441668782, "epoch": 11.0466472303207, "grad_norm": 0.1700831949710846, "learning_rate": 7.5e-07, "loss": -0.0341, "step": 1061 }, { "clip_ratio/high_max": 0.0017828345007728785, "clip_ratio/high_mean": 0.0006789653380110394, "clip_ratio/low_mean": 0.0005256495360299596, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012046148731315043, "epoch": 11.055976676384839, "grad_norm": 0.15225249528884888, "learning_rate": 7.5e-07, "loss": -0.0394, "step": 1062 }, { "clip_ratio/high_max": 0.0015376641240436584, "clip_ratio/high_mean": 0.0006354024080792442, "clip_ratio/low_mean": 0.0005400489390012808, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011754513434425462, "epoch": 11.06530612244898, "grad_norm": 0.1542481780052185, "learning_rate": 7.5e-07, "loss": -0.0301, "step": 1063 }, { "clip_ratio/high_max": 0.0018353463528910652, "clip_ratio/high_mean": 0.0007388581416307716, "clip_ratio/low_mean": 0.0006085171335143968, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013473752514983062, "epoch": 11.07463556851312, "grad_norm": 0.17591692507266998, "learning_rate": 7.5e-07, "loss": -0.0551, "step": 1064 }, { "clip_ratio/high_max": 0.001774948243109975, "clip_ratio/high_mean": 0.0007027006231510313, "clip_ratio/low_mean": 0.0005057454618508928, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001208446094096871, "epoch": 11.08396501457726, "grad_norm": 0.15264727175235748, "learning_rate": 7.5e-07, "loss": -0.0266, "step": 1065 }, { "clip_ratio/high_max": 0.002188687700254377, "clip_ratio/high_mean": 0.0008342844303115271, "clip_ratio/low_mean": 0.0005342435833881609, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013685281010111794, "epoch": 11.093294460641399, "grad_norm": 0.18811286985874176, "learning_rate": 7.5e-07, "loss": -0.0404, "step": 1066 }, { "clip_ratio/high_max": 0.0018288026367372368, "clip_ratio/high_mean": 0.0007668013295187848, "clip_ratio/low_mean": 0.0005715774495911319, "clip_ratio/low_min": 2.8242204280104488e-05, "clip_ratio/region_mean": 0.0013383788100327365, "epoch": 11.102623906705539, "grad_norm": 2.0074031352996826, "learning_rate": 7.5e-07, "loss": -0.0336, "step": 1067 }, { "clip_ratio/high_max": 0.0019361472732271068, "clip_ratio/high_mean": 0.0007925995887489989, "clip_ratio/low_mean": 0.000492899432174454, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012854990309278946, "epoch": 11.11195335276968, "grad_norm": 0.15601854026317596, "learning_rate": 7.5e-07, "loss": -0.0682, "step": 1068 }, { "clip_ratio/high_max": 0.0016864041208464187, "clip_ratio/high_mean": 0.0006628752444157726, "clip_ratio/low_mean": 0.0006052958797226893, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012681710977631155, "epoch": 11.12128279883382, "grad_norm": 0.16562223434448242, "learning_rate": 7.5e-07, "loss": -0.021, "step": 1069 }, { "clip_ratio/high_max": 0.001993170146306511, "clip_ratio/high_mean": 0.0007986752279975917, "clip_ratio/low_mean": 0.0006251421509659849, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001423817371687619, "epoch": 11.130612244897959, "grad_norm": 0.15887698531150818, "learning_rate": 7.5e-07, "loss": -0.0354, "step": 1070 }, { "clip_ratio/high_max": 0.002294181464094436, "clip_ratio/high_mean": 0.000902523683180334, "clip_ratio/low_mean": 0.000652005008305423, "clip_ratio/low_min": 1.4534884030581452e-05, "clip_ratio/region_mean": 0.0015545287023996934, "epoch": 11.139941690962099, "grad_norm": 0.16996821761131287, "learning_rate": 7.5e-07, "loss": -0.0278, "step": 1071 }, { "clip_ratio/high_max": 0.001801777983928332, "clip_ratio/high_mean": 0.0007411760470859008, "clip_ratio/low_mean": 0.0006601452005270403, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014013212457939517, "epoch": 11.14927113702624, "grad_norm": 0.3286857306957245, "learning_rate": 7.5e-07, "loss": -0.0263, "step": 1072 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0791713169642857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4081.0, "completions/mean_length": 828.5430908203125, "completions/mean_terminated_length": 547.612548828125, "completions/min_length": 88.0, "completions/min_terminated_length": 88.0, "epoch": 11.15860058309038, "grad_norm": 0.1632704734802246, "learning_rate": 7.5e-07, "loss": -0.0111, "num_tokens": 629147181.0, "reward": 0.664132297039032, "reward_std": 0.14546556770801544, "rewards/simpleverify_reward/mean": 0.6641322374343872, "rewards/simpleverify_reward/std": 0.4723093807697296, "step": 1073 }, { "clip_ratio/high_max": 0.0020725514768855646, "clip_ratio/high_mean": 0.000766202263548621, "clip_ratio/low_mean": 0.0003120598898931348, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010782621466205455, "epoch": 11.167930029154519, "grad_norm": 0.2192382663488388, "learning_rate": 7.5e-07, "loss": -0.0385, "step": 1074 }, { "clip_ratio/high_max": 0.0017208330682478845, "clip_ratio/high_mean": 0.0007264216692419723, "clip_ratio/low_mean": 0.00031339247198047815, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001039814182149712, "epoch": 11.177259475218658, "grad_norm": 0.16927531361579895, "learning_rate": 7.5e-07, "loss": -0.0728, "step": 1075 }, { "clip_ratio/high_max": 0.0018645824311533943, "clip_ratio/high_mean": 0.0007094743396010017, "clip_ratio/low_mean": 0.00046550035949621815, "clip_ratio/low_min": 1.7536476661916822e-05, "clip_ratio/region_mean": 0.001174974695459241, "epoch": 11.186588921282798, "grad_norm": 0.17894114553928375, "learning_rate": 7.5e-07, "loss": -0.0116, "step": 1076 }, { "clip_ratio/high_max": 0.00170163981965743, "clip_ratio/high_mean": 0.0007017468406047556, "clip_ratio/low_mean": 0.0004058074646309251, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011075543188781012, "epoch": 11.19591836734694, "grad_norm": 0.14785364270210266, "learning_rate": 7.5e-07, "loss": -0.0251, "step": 1077 }, { "clip_ratio/high_max": 0.0018989800882991403, "clip_ratio/high_mean": 0.0006727234995196341, "clip_ratio/low_mean": 0.0004220003993395949, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010947238879452925, "epoch": 11.205247813411079, "grad_norm": 0.16059760749340057, "learning_rate": 7.5e-07, "loss": -0.0432, "step": 1078 }, { "clip_ratio/high_max": 0.0015332806833612267, "clip_ratio/high_mean": 0.0007089968603395391, "clip_ratio/low_mean": 0.00044197563647685456, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011509724990901304, "epoch": 11.214577259475218, "grad_norm": 0.18069487810134888, "learning_rate": 7.5e-07, "loss": -0.0848, "step": 1079 }, { "clip_ratio/high_max": 0.0019514098567015026, "clip_ratio/high_mean": 0.0006457512554334244, "clip_ratio/low_mean": 0.00035795798430626746, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010037092415586812, "epoch": 11.223906705539358, "grad_norm": 0.1726120114326477, "learning_rate": 7.5e-07, "loss": -0.0251, "step": 1080 }, { "clip_ratio/high_max": 0.0018685032409848645, "clip_ratio/high_mean": 0.0007799353297741618, "clip_ratio/low_mean": 0.0005919533205087646, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013718886584683787, "epoch": 11.2332361516035, "grad_norm": 0.20535185933113098, "learning_rate": 7.5e-07, "loss": 0.0194, "step": 1081 }, { "clip_ratio/high_max": 0.0019328380294609815, "clip_ratio/high_mean": 0.0007852431263017934, "clip_ratio/low_mean": 0.0005452785271700122, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013305216198205017, "epoch": 11.242565597667639, "grad_norm": 0.19232702255249023, "learning_rate": 7.5e-07, "loss": -0.0313, "step": 1082 }, { "clip_ratio/high_max": 0.002024543977313442, "clip_ratio/high_mean": 0.0008342610744875856, "clip_ratio/low_mean": 0.0007062746044539381, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015405356753035448, "epoch": 11.251895043731778, "grad_norm": 0.17295430600643158, "learning_rate": 7.5e-07, "loss": -0.0264, "step": 1083 }, { "clip_ratio/high_max": 0.002435208301903913, "clip_ratio/high_mean": 0.0008862450122251175, "clip_ratio/low_mean": 0.00057418121923547, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014604262469219975, "epoch": 11.261224489795918, "grad_norm": 0.19879071414470673, "learning_rate": 7.5e-07, "loss": -0.0502, "step": 1084 }, { "clip_ratio/high_max": 0.0021058396305306815, "clip_ratio/high_mean": 0.0007190374444689951, "clip_ratio/low_mean": 0.0005620482743324828, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012810857151634991, "epoch": 11.270553935860057, "grad_norm": 0.16017907857894897, "learning_rate": 7.5e-07, "loss": -0.0295, "step": 1085 }, { "clip_ratio/high_max": 0.0017237720858247485, "clip_ratio/high_mean": 0.0006449626898756833, "clip_ratio/low_mean": 0.0006933607237442629, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013383234363573138, "epoch": 11.279883381924199, "grad_norm": 0.16931754350662231, "learning_rate": 7.5e-07, "loss": -0.0057, "step": 1086 }, { "clip_ratio/high_max": 0.0019981083532911725, "clip_ratio/high_mean": 0.0008096446272247704, "clip_ratio/low_mean": 0.0006773224704375025, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014869671358610503, "epoch": 11.289212827988338, "grad_norm": 0.15948660671710968, "learning_rate": 7.5e-07, "loss": -0.0069, "step": 1087 }, { "clip_ratio/high_max": 0.0018189753864135128, "clip_ratio/high_mean": 0.000787154103818466, "clip_ratio/low_mean": 0.0005908481525693787, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013780022454739083, "epoch": 11.298542274052478, "grad_norm": 0.26068779826164246, "learning_rate": 7.5e-07, "loss": -0.0353, "step": 1088 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0836356026785714, "completions/max_length": 4096.0, "completions/max_terminated_length": 4095.0, "completions/mean_length": 858.3605346679688, "completions/mean_terminated_length": 562.8645629882812, "completions/min_length": 112.0, "completions/min_terminated_length": 112.0, "epoch": 11.307871720116617, "grad_norm": 0.19127778708934784, "learning_rate": 7.5e-07, "loss": -0.0267, "num_tokens": 637948525.0, "reward": 0.6635044813156128, "reward_std": 0.13619165122509003, "rewards/simpleverify_reward/mean": 0.6635044813156128, "rewards/simpleverify_reward/std": 0.47252708673477173, "step": 1089 }, { "clip_ratio/high_max": 0.0015765564239700325, "clip_ratio/high_mean": 0.0005944736039964482, "clip_ratio/low_mean": 0.0005073804522908176, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011018540644727182, "epoch": 11.317201166180759, "grad_norm": 0.17397485673427582, "learning_rate": 7.5e-07, "loss": -0.0089, "step": 1090 }, { "clip_ratio/high_max": 0.0019944887862948235, "clip_ratio/high_mean": 0.0007601568495374522, "clip_ratio/low_mean": 0.00038062198382249335, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011407788115320727, "epoch": 11.326530612244898, "grad_norm": 0.1547970026731491, "learning_rate": 7.5e-07, "loss": -0.0297, "step": 1091 }, { "clip_ratio/high_max": 0.001821688416384859, "clip_ratio/high_mean": 0.0006790928182454081, "clip_ratio/low_mean": 0.0003160942551403423, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009951870815712027, "epoch": 11.335860058309038, "grad_norm": 0.16044758260250092, "learning_rate": 7.5e-07, "loss": -0.0284, "step": 1092 }, { "clip_ratio/high_max": 0.0018704135000007227, "clip_ratio/high_mean": 0.0007360176441579824, "clip_ratio/low_mean": 0.0003996106320300896, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011356282921042293, "epoch": 11.345189504373177, "grad_norm": 0.19764259457588196, "learning_rate": 7.5e-07, "loss": -0.0549, "step": 1093 }, { "clip_ratio/high_max": 0.0019791921149590053, "clip_ratio/high_mean": 0.0007236142737383489, "clip_ratio/low_mean": 0.0004990314428141573, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012226456856296863, "epoch": 11.354518950437317, "grad_norm": 0.15556803345680237, "learning_rate": 7.5e-07, "loss": -0.0034, "step": 1094 }, { "clip_ratio/high_max": 0.0015545001915597823, "clip_ratio/high_mean": 0.0005465127687784843, "clip_ratio/low_mean": 0.0004937723606417421, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010402851075923536, "epoch": 11.363848396501458, "grad_norm": 0.17346014082431793, "learning_rate": 7.5e-07, "loss": 0.0196, "step": 1095 }, { "clip_ratio/high_max": 0.0018282723540323786, "clip_ratio/high_mean": 0.0006476829312305199, "clip_ratio/low_mean": 0.0004956719576512114, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011433548970671836, "epoch": 11.373177842565598, "grad_norm": 0.1442718356847763, "learning_rate": 7.5e-07, "loss": -0.0237, "step": 1096 }, { "clip_ratio/high_max": 0.0021093246177770197, "clip_ratio/high_mean": 0.000763972931963508, "clip_ratio/low_mean": 0.0004648608273782884, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001228833782079164, "epoch": 11.382507288629737, "grad_norm": 0.1649332344532013, "learning_rate": 7.5e-07, "loss": -0.0485, "step": 1097 }, { "clip_ratio/high_max": 0.0018170131697843317, "clip_ratio/high_mean": 0.0007062723079798161, "clip_ratio/low_mean": 0.0005418941436801106, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012481664343795273, "epoch": 11.391836734693877, "grad_norm": 0.16724666953086853, "learning_rate": 7.5e-07, "loss": -0.027, "step": 1098 }, { "clip_ratio/high_max": 0.0019824481460091192, "clip_ratio/high_mean": 0.0006524083382828394, "clip_ratio/low_mean": 0.00046190839202608913, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001114316741222865, "epoch": 11.401166180758018, "grad_norm": 0.15969207882881165, "learning_rate": 7.5e-07, "loss": -0.0092, "step": 1099 }, { "clip_ratio/high_max": 0.0019043331158172805, "clip_ratio/high_mean": 0.0006869520816508157, "clip_ratio/low_mean": 0.00047348963516924414, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011604417250055121, "epoch": 11.410495626822158, "grad_norm": 0.1365962028503418, "learning_rate": 7.5e-07, "loss": -0.0422, "step": 1100 }, { "clip_ratio/high_max": 0.001898563547001686, "clip_ratio/high_mean": 0.0006917038808751386, "clip_ratio/low_mean": 0.0005980461810395354, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012897500964754727, "epoch": 11.419825072886297, "grad_norm": 0.5491074919700623, "learning_rate": 7.5e-07, "loss": -0.0214, "step": 1101 }, { "clip_ratio/high_max": 0.0022070647828513756, "clip_ratio/high_mean": 0.0008179925025615375, "clip_ratio/low_mean": 0.00042304446378693683, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012410369545250433, "epoch": 11.429154518950437, "grad_norm": 0.16649353504180908, "learning_rate": 7.5e-07, "loss": -0.0544, "step": 1102 }, { "clip_ratio/high_max": 0.0022901120319147594, "clip_ratio/high_mean": 0.0009295400814153254, "clip_ratio/low_mean": 0.0005036155625930405, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014331556849356275, "epoch": 11.438483965014576, "grad_norm": 0.17595064640045166, "learning_rate": 7.5e-07, "loss": -0.0302, "step": 1103 }, { "clip_ratio/high_max": 0.002076219861919526, "clip_ratio/high_mean": 0.0008283232637040783, "clip_ratio/low_mean": 0.0005527119074031361, "clip_ratio/low_min": 3.121878035017289e-05, "clip_ratio/region_mean": 0.0013810351338179316, "epoch": 11.447813411078718, "grad_norm": 0.14843478798866272, "learning_rate": 7.5e-07, "loss": -0.0607, "step": 1104 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0862165178571429, "completions/max_length": 4096.0, "completions/max_terminated_length": 4072.0, "completions/mean_length": 863.1641845703125, "completions/mean_terminated_length": 558.142333984375, "completions/min_length": 90.0, "completions/min_terminated_length": 90.0, "epoch": 11.457142857142857, "grad_norm": 0.18060103058815002, "learning_rate": 7.5e-07, "loss": 0.0075, "num_tokens": 646678198.0, "reward": 0.6564592719078064, "reward_std": 0.1437281221151352, "rewards/simpleverify_reward/mean": 0.6564592719078064, "rewards/simpleverify_reward/std": 0.4749065339565277, "step": 1105 }, { "clip_ratio/high_max": 0.0019874668796546757, "clip_ratio/high_mean": 0.0007810907845851034, "clip_ratio/low_mean": 0.0004283192174625583, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012094100457034074, "epoch": 11.466472303206997, "grad_norm": 0.16369828581809998, "learning_rate": 7.5e-07, "loss": -0.0402, "step": 1106 }, { "clip_ratio/high_max": 0.0017929970199475065, "clip_ratio/high_mean": 0.0006679979542241199, "clip_ratio/low_mean": 0.0005340393581718672, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012020372960250825, "epoch": 11.475801749271136, "grad_norm": 0.18708263337612152, "learning_rate": 7.5e-07, "loss": -0.0362, "step": 1107 }, { "clip_ratio/high_max": 0.0018646436765266117, "clip_ratio/high_mean": 0.0007126437012630049, "clip_ratio/low_mean": 0.0004107321583433077, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001123375888710143, "epoch": 11.485131195335278, "grad_norm": 0.33117592334747314, "learning_rate": 7.5e-07, "loss": -0.0602, "step": 1108 }, { "clip_ratio/high_max": 0.0015803513742866926, "clip_ratio/high_mean": 0.0005681064121745294, "clip_ratio/low_mean": 0.00038103366296127206, "clip_ratio/low_min": 2.2818547222414054e-05, "clip_ratio/region_mean": 0.0009491400815022644, "epoch": 11.494460641399417, "grad_norm": 0.14067630469799042, "learning_rate": 7.5e-07, "loss": -0.049, "step": 1109 }, { "clip_ratio/high_max": 0.0016844692181621213, "clip_ratio/high_mean": 0.0007085798351909034, "clip_ratio/low_mean": 0.0005707352047465974, "clip_ratio/low_min": 1.790061651263386e-05, "clip_ratio/region_mean": 0.0012793150235665962, "epoch": 11.503790087463557, "grad_norm": 0.17637979984283447, "learning_rate": 7.5e-07, "loss": -0.0065, "step": 1110 }, { "clip_ratio/high_max": 0.0015496579399041366, "clip_ratio/high_mean": 0.0006525438366224989, "clip_ratio/low_mean": 0.00045320850585994776, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011057523734052666, "epoch": 11.513119533527696, "grad_norm": 0.168073371052742, "learning_rate": 7.5e-07, "loss": -0.0107, "step": 1111 }, { "clip_ratio/high_max": 0.0018678380765777547, "clip_ratio/high_mean": 0.0007333138601097744, "clip_ratio/low_mean": 0.0005459084013637039, "clip_ratio/low_min": 2.709733416850213e-05, "clip_ratio/region_mean": 0.0012792222551070154, "epoch": 11.522448979591836, "grad_norm": 0.19407816231250763, "learning_rate": 7.5e-07, "loss": -0.0555, "step": 1112 }, { "clip_ratio/high_max": 0.0020468409384193365, "clip_ratio/high_mean": 0.0008059043811954325, "clip_ratio/low_mean": 0.0005124767094457638, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013183810951886699, "epoch": 11.531778425655977, "grad_norm": 0.15530559420585632, "learning_rate": 7.5e-07, "loss": -0.0414, "step": 1113 }, { "clip_ratio/high_max": 0.002017160408286145, "clip_ratio/high_mean": 0.0007692887593293563, "clip_ratio/low_mean": 0.0005120854004871944, "clip_ratio/low_min": 1.9948931367252953e-05, "clip_ratio/region_mean": 0.0012813741486752406, "epoch": 11.541107871720117, "grad_norm": 0.16335617005825043, "learning_rate": 7.5e-07, "loss": -0.0263, "step": 1114 }, { "clip_ratio/high_max": 0.0018668015982257202, "clip_ratio/high_mean": 0.0007465014250556123, "clip_ratio/low_mean": 0.0005245723041298334, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012710737064480782, "epoch": 11.550437317784256, "grad_norm": 0.18553781509399414, "learning_rate": 7.5e-07, "loss": -0.0213, "step": 1115 }, { "clip_ratio/high_max": 0.002253071728773648, "clip_ratio/high_mean": 0.0007549665242549963, "clip_ratio/low_mean": 0.0005313340006978251, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012863005358667579, "epoch": 11.559766763848396, "grad_norm": 0.1607186496257782, "learning_rate": 7.5e-07, "loss": -0.0509, "step": 1116 }, { "clip_ratio/high_max": 0.001704203063127352, "clip_ratio/high_mean": 0.0007426636102536577, "clip_ratio/low_mean": 0.0005505606150109088, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001293224202527199, "epoch": 11.569096209912537, "grad_norm": 0.17640343308448792, "learning_rate": 7.5e-07, "loss": -0.0376, "step": 1117 }, { "clip_ratio/high_max": 0.0018384862669336144, "clip_ratio/high_mean": 0.0007805534878571052, "clip_ratio/low_mean": 0.0005628532671835274, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001343406725936802, "epoch": 11.578425655976677, "grad_norm": 0.17226919531822205, "learning_rate": 7.5e-07, "loss": -0.0512, "step": 1118 }, { "clip_ratio/high_max": 0.0020706493814941496, "clip_ratio/high_mean": 0.0007979483889357653, "clip_ratio/low_mean": 0.00048486178366147215, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012828102189814672, "epoch": 11.587755102040816, "grad_norm": 0.14089573919773102, "learning_rate": 7.5e-07, "loss": -0.0586, "step": 1119 }, { "clip_ratio/high_max": 0.0018377047199464869, "clip_ratio/high_mean": 0.0007057940911181504, "clip_ratio/low_mean": 0.0005013882350795029, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012071823184669483, "epoch": 11.597084548104956, "grad_norm": 4.283782482147217, "learning_rate": 7.5e-07, "loss": -0.0362, "step": 1120 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0917271205357143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3983.0, "completions/mean_length": 888.4346923828125, "completions/mean_terminated_length": 564.5003662109375, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 11.606413994169095, "grad_norm": 0.19237948954105377, "learning_rate": 7.5e-07, "loss": -0.0658, "num_tokens": 655472317.0, "reward": 0.6498326063156128, "reward_std": 0.14459168910980225, "rewards/simpleverify_reward/mean": 0.6498326063156128, "rewards/simpleverify_reward/std": 0.4770388603210449, "step": 1121 }, { "clip_ratio/high_max": 0.001484858148614876, "clip_ratio/high_mean": 0.0006882803609187249, "clip_ratio/low_mean": 0.00044735574169862957, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011356360992067493, "epoch": 11.615743440233237, "grad_norm": 0.16543763875961304, "learning_rate": 7.5e-07, "loss": -0.0487, "step": 1122 }, { "clip_ratio/high_max": 0.002164991201425437, "clip_ratio/high_mean": 0.0007840806283638813, "clip_ratio/low_mean": 0.00034178482655988773, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011258654412813485, "epoch": 11.625072886297376, "grad_norm": 0.21502916514873505, "learning_rate": 7.5e-07, "loss": -0.0916, "step": 1123 }, { "clip_ratio/high_max": 0.0019361018821655307, "clip_ratio/high_mean": 0.0007385134922515135, "clip_ratio/low_mean": 0.00038284831862256397, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011213617835892364, "epoch": 11.634402332361516, "grad_norm": 0.1463957279920578, "learning_rate": 7.5e-07, "loss": -0.062, "step": 1124 }, { "clip_ratio/high_max": 0.0022932967222004663, "clip_ratio/high_mean": 0.0007330421394726727, "clip_ratio/low_mean": 0.00043078493308712495, "clip_ratio/low_min": 2.8689464670605958e-05, "clip_ratio/region_mean": 0.0011638270771072712, "epoch": 11.643731778425655, "grad_norm": 0.17418134212493896, "learning_rate": 7.5e-07, "loss": -0.0134, "step": 1125 }, { "clip_ratio/high_max": 0.0019647799636004493, "clip_ratio/high_mean": 0.0007078460648699547, "clip_ratio/low_mean": 0.0004999745920031273, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012078206545993453, "epoch": 11.653061224489797, "grad_norm": 0.16218632459640503, "learning_rate": 7.5e-07, "loss": -0.0582, "step": 1126 }, { "clip_ratio/high_max": 0.0017649752480792813, "clip_ratio/high_mean": 0.0006609039273826056, "clip_ratio/low_mean": 0.00042714136361610144, "clip_ratio/low_min": 1.5006002286099829e-05, "clip_ratio/region_mean": 0.0010880453119170852, "epoch": 11.662390670553936, "grad_norm": 0.15910117328166962, "learning_rate": 7.5e-07, "loss": -0.0359, "step": 1127 }, { "clip_ratio/high_max": 0.0019708495019585826, "clip_ratio/high_mean": 0.0007077010923239868, "clip_ratio/low_mean": 0.00042495623620197875, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001132657323978492, "epoch": 11.671720116618076, "grad_norm": 0.161499485373497, "learning_rate": 7.5e-07, "loss": -0.0502, "step": 1128 }, { "clip_ratio/high_max": 0.0017921314247359987, "clip_ratio/high_mean": 0.0006669777994829929, "clip_ratio/low_mean": 0.0006130731489975005, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012800509539374616, "epoch": 11.681049562682215, "grad_norm": 0.1837463527917862, "learning_rate": 7.5e-07, "loss": -0.0287, "step": 1129 }, { "clip_ratio/high_max": 0.001983010704861954, "clip_ratio/high_mean": 0.0008492902998114005, "clip_ratio/low_mean": 0.0006045203726898762, "clip_ratio/low_min": 2.9129931135685183e-05, "clip_ratio/region_mean": 0.0014538106770487502, "epoch": 11.690379008746355, "grad_norm": 0.32431313395500183, "learning_rate": 7.5e-07, "loss": -0.0402, "step": 1130 }, { "clip_ratio/high_max": 0.0022717600095347734, "clip_ratio/high_mean": 0.000813306402960734, "clip_ratio/low_mean": 0.0006983749572100351, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0015116813374334015, "epoch": 11.699708454810496, "grad_norm": 0.23207825422286987, "learning_rate": 7.5e-07, "loss": -0.0171, "step": 1131 }, { "clip_ratio/high_max": 0.002129919535946101, "clip_ratio/high_mean": 0.0008780746975389775, "clip_ratio/low_mean": 0.0005548698418351705, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014329445220937487, "epoch": 11.709037900874636, "grad_norm": 0.21398289501667023, "learning_rate": 7.5e-07, "loss": -0.0594, "step": 1132 }, { "clip_ratio/high_max": 0.0021786202160001267, "clip_ratio/high_mean": 0.0007960783223097678, "clip_ratio/low_mean": 0.0006059781480871607, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014020564631209709, "epoch": 11.718367346938775, "grad_norm": 0.16029854118824005, "learning_rate": 7.5e-07, "loss": -0.0324, "step": 1133 }, { "clip_ratio/high_max": 0.002082652434182819, "clip_ratio/high_mean": 0.0008250893570220796, "clip_ratio/low_mean": 0.0005226961175139877, "clip_ratio/low_min": 1.0959144674416166e-05, "clip_ratio/region_mean": 0.001347785466350615, "epoch": 11.727696793002915, "grad_norm": 0.19318728148937225, "learning_rate": 7.5e-07, "loss": -0.0599, "step": 1134 }, { "clip_ratio/high_max": 0.002024445253482554, "clip_ratio/high_mean": 0.0007556880718766479, "clip_ratio/low_mean": 0.0005978069002594566, "clip_ratio/low_min": 1.2339585737208836e-05, "clip_ratio/region_mean": 0.001353494983050041, "epoch": 11.737026239067056, "grad_norm": 0.2214851677417755, "learning_rate": 7.5e-07, "loss": -0.0357, "step": 1135 }, { "clip_ratio/high_max": 0.0018617011191963684, "clip_ratio/high_mean": 0.0007085026900313096, "clip_ratio/low_mean": 0.0006433493422264291, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001351852057268843, "epoch": 11.746355685131196, "grad_norm": 0.1487954556941986, "learning_rate": 7.5e-07, "loss": -0.0322, "step": 1136 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0894252232142857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4047.0, "completions/mean_length": 869.5850219726562, "completions/mean_terminated_length": 552.7269287109375, "completions/min_length": 124.0, "completions/min_terminated_length": 124.0, "epoch": 11.755685131195335, "grad_norm": 0.19410692155361176, "learning_rate": 7.5e-07, "loss": -0.0151, "num_tokens": 664127207.0, "reward": 0.6599470376968384, "reward_std": 0.1488002985715866, "rewards/simpleverify_reward/mean": 0.6599469780921936, "rewards/simpleverify_reward/std": 0.473743200302124, "step": 1137 }, { "clip_ratio/high_max": 0.0016863597447809298, "clip_ratio/high_mean": 0.0006722467442159541, "clip_ratio/low_mean": 0.00039298857882386073, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010652353539626347, "epoch": 11.765014577259475, "grad_norm": 0.17840518057346344, "learning_rate": 7.5e-07, "loss": -0.0453, "step": 1138 }, { "clip_ratio/high_max": 0.001950167370523559, "clip_ratio/high_mean": 0.0007380962288152659, "clip_ratio/low_mean": 0.00045424401741911424, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011923402307729702, "epoch": 11.774344023323614, "grad_norm": 0.18040117621421814, "learning_rate": 7.5e-07, "loss": -0.0486, "step": 1139 }, { "clip_ratio/high_max": 0.0020252152753528208, "clip_ratio/high_mean": 0.0007141114128899062, "clip_ratio/low_mean": 0.0003341720939715742, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010482834950380493, "epoch": 11.783673469387756, "grad_norm": 0.1389598697423935, "learning_rate": 7.5e-07, "loss": -0.0259, "step": 1140 }, { "clip_ratio/high_max": 0.001964819461136358, "clip_ratio/high_mean": 0.0007982373390404973, "clip_ratio/low_mean": 0.0004521301252680132, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012503674370236695, "epoch": 11.793002915451895, "grad_norm": 0.16619440913200378, "learning_rate": 7.5e-07, "loss": -0.0677, "step": 1141 }, { "clip_ratio/high_max": 0.001746793666825397, "clip_ratio/high_mean": 0.0006918710150785046, "clip_ratio/low_mean": 0.00042588526775944047, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011177562482771464, "epoch": 11.802332361516035, "grad_norm": 0.2014625519514084, "learning_rate": 7.5e-07, "loss": -0.0376, "step": 1142 }, { "clip_ratio/high_max": 0.0019797368004219607, "clip_ratio/high_mean": 0.000706029142747866, "clip_ratio/low_mean": 0.0005072617229870957, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012132908705098089, "epoch": 11.811661807580174, "grad_norm": 0.17571130394935608, "learning_rate": 7.5e-07, "loss": -0.0195, "step": 1143 }, { "clip_ratio/high_max": 0.0020572567227645777, "clip_ratio/high_mean": 0.0008341150132764596, "clip_ratio/low_mean": 0.00042075996407220373, "clip_ratio/low_min": 1.4188422028382774e-05, "clip_ratio/region_mean": 0.0012548749764391687, "epoch": 11.820991253644316, "grad_norm": 0.17354971170425415, "learning_rate": 7.5e-07, "loss": -0.0422, "step": 1144 }, { "clip_ratio/high_max": 0.002528385324694682, "clip_ratio/high_mean": 0.0008810985837044427, "clip_ratio/low_mean": 0.0004886544484179467, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013697530230274424, "epoch": 11.830320699708455, "grad_norm": 0.1567763388156891, "learning_rate": 7.5e-07, "loss": -0.0394, "step": 1145 }, { "clip_ratio/high_max": 0.0023812984363758005, "clip_ratio/high_mean": 0.0008362418866454391, "clip_ratio/low_mean": 0.00043708876637538197, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012733306248264853, "epoch": 11.839650145772595, "grad_norm": 0.17876887321472168, "learning_rate": 7.5e-07, "loss": -0.0636, "step": 1146 }, { "clip_ratio/high_max": 0.0019337853882461786, "clip_ratio/high_mean": 0.0007886689563747495, "clip_ratio/low_mean": 0.0004884965287601517, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012771654910466168, "epoch": 11.848979591836734, "grad_norm": 0.1807648241519928, "learning_rate": 7.5e-07, "loss": -0.0578, "step": 1147 }, { "clip_ratio/high_max": 0.002356649005378131, "clip_ratio/high_mean": 0.000864885694682016, "clip_ratio/low_mean": 0.00036967579490010394, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012345614886726253, "epoch": 11.858309037900874, "grad_norm": 0.21007977426052094, "learning_rate": 7.5e-07, "loss": -0.071, "step": 1148 }, { "clip_ratio/high_max": 0.0022785137844039127, "clip_ratio/high_mean": 0.0008146409909386421, "clip_ratio/low_mean": 0.00046667332799188443, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012813142893719487, "epoch": 11.867638483965015, "grad_norm": 0.18104872107505798, "learning_rate": 7.5e-07, "loss": -0.0446, "step": 1149 }, { "clip_ratio/high_max": 0.002124821079632966, "clip_ratio/high_mean": 0.0008838387975629303, "clip_ratio/low_mean": 0.0005858548015567067, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001469693615945289, "epoch": 11.876967930029155, "grad_norm": 0.15410415828227997, "learning_rate": 7.5e-07, "loss": -0.0697, "step": 1150 }, { "clip_ratio/high_max": 0.002136672719643684, "clip_ratio/high_mean": 0.0007762603136143298, "clip_ratio/low_mean": 0.000607382095495268, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013836424004693981, "epoch": 11.886297376093294, "grad_norm": 0.18163882195949554, "learning_rate": 7.5e-07, "loss": -0.0237, "step": 1151 }, { "clip_ratio/high_max": 0.0022974629791860934, "clip_ratio/high_mean": 0.0008905998956834082, "clip_ratio/low_mean": 0.000517804037372116, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014084039357840084, "epoch": 11.895626822157434, "grad_norm": 0.16448825597763062, "learning_rate": 7.5e-07, "loss": -0.0636, "step": 1152 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0858677455357143, "completions/max_length": 4096.0, "completions/max_terminated_length": 3945.0, "completions/mean_length": 855.3893432617188, "completions/mean_terminated_length": 550.9869995117188, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 12.00932944606414, "grad_norm": 0.17307980358600616, "learning_rate": 7.5e-07, "loss": -0.0356, "num_tokens": 672778268.0, "reward": 0.6703404188156128, "reward_std": 0.1390530914068222, "rewards/simpleverify_reward/mean": 0.6703404188156128, "rewards/simpleverify_reward/std": 0.4701058864593506, "step": 1153 }, { "clip_ratio/high_max": 0.0014037346518307459, "clip_ratio/high_mean": 0.0004891069265795522, "clip_ratio/low_mean": 0.0004906710792056401, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009797780257940758, "epoch": 12.018658892128279, "grad_norm": 0.5368072390556335, "learning_rate": 7.5e-07, "loss": 0.042, "step": 1154 }, { "clip_ratio/high_max": 0.0018475349570508115, "clip_ratio/high_mean": 0.0007572917966172099, "clip_ratio/low_mean": 0.0003695592629355815, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011268510534137022, "epoch": 12.02798833819242, "grad_norm": 0.1776459813117981, "learning_rate": 7.5e-07, "loss": -0.0643, "step": 1155 }, { "clip_ratio/high_max": 0.0018551987886894494, "clip_ratio/high_mean": 0.0007314707763725892, "clip_ratio/low_mean": 0.00041491778210911434, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011463885639386717, "epoch": 12.03731778425656, "grad_norm": 0.17147719860076904, "learning_rate": 7.5e-07, "loss": -0.046, "step": 1156 }, { "clip_ratio/high_max": 0.0017618165693420451, "clip_ratio/high_mean": 0.0006819518075644737, "clip_ratio/low_mean": 0.0005228095997154014, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012047614000039175, "epoch": 12.0466472303207, "grad_norm": 0.17292562127113342, "learning_rate": 7.5e-07, "loss": -0.0622, "step": 1157 }, { "clip_ratio/high_max": 0.0020668560173362494, "clip_ratio/high_mean": 0.0007258662044478115, "clip_ratio/low_mean": 0.00045099976705387235, "clip_ratio/low_min": 1.4723204003530554e-05, "clip_ratio/region_mean": 0.0011768659842346096, "epoch": 12.055976676384839, "grad_norm": 0.16417227685451508, "learning_rate": 7.5e-07, "loss": -0.0417, "step": 1158 }, { "clip_ratio/high_max": 0.001836895062297117, "clip_ratio/high_mean": 0.0007415525515170884, "clip_ratio/low_mean": 0.0005728839387302287, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013144364493200555, "epoch": 12.06530612244898, "grad_norm": 0.18013626337051392, "learning_rate": 7.5e-07, "loss": -0.0397, "step": 1159 }, { "clip_ratio/high_max": 0.0019859493731928524, "clip_ratio/high_mean": 0.0007801404954079771, "clip_ratio/low_mean": 0.0005100930811750004, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012902335729449987, "epoch": 12.07463556851312, "grad_norm": 0.16221684217453003, "learning_rate": 7.5e-07, "loss": -0.0513, "step": 1160 }, { "clip_ratio/high_max": 0.0020459222978388425, "clip_ratio/high_mean": 0.0007647428074051277, "clip_ratio/low_mean": 0.00046966155264271947, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001234404346178053, "epoch": 12.08396501457726, "grad_norm": 0.14534121751785278, "learning_rate": 7.5e-07, "loss": -0.0554, "step": 1161 }, { "clip_ratio/high_max": 0.0021559680462814867, "clip_ratio/high_mean": 0.0008226830013882136, "clip_ratio/low_mean": 0.0005323771265466348, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001355060132482322, "epoch": 12.093294460641399, "grad_norm": 0.1781163364648819, "learning_rate": 7.5e-07, "loss": -0.0585, "step": 1162 }, { "clip_ratio/high_max": 0.0018475401193427388, "clip_ratio/high_mean": 0.0007527768439103966, "clip_ratio/low_mean": 0.0006546333756887179, "clip_ratio/low_min": 4.111842281417921e-05, "clip_ratio/region_mean": 0.0014074102000449784, "epoch": 12.102623906705539, "grad_norm": 0.2247941642999649, "learning_rate": 7.5e-07, "loss": -0.0222, "step": 1163 }, { "clip_ratio/high_max": 0.0019225335090595763, "clip_ratio/high_mean": 0.0007485229953090311, "clip_ratio/low_mean": 0.0005348342947399942, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012833572654926684, "epoch": 12.11195335276968, "grad_norm": 0.22848616540431976, "learning_rate": 7.5e-07, "loss": -0.0378, "step": 1164 }, { "clip_ratio/high_max": 0.0020217188175593037, "clip_ratio/high_mean": 0.000804999197498546, "clip_ratio/low_mean": 0.000500092382026196, "clip_ratio/low_min": 1.7081169062294066e-05, "clip_ratio/region_mean": 0.0013050915804342367, "epoch": 12.12128279883382, "grad_norm": 0.16545841097831726, "learning_rate": 7.5e-07, "loss": -0.0636, "step": 1165 }, { "clip_ratio/high_max": 0.002434285299386829, "clip_ratio/high_mean": 0.0008125418316922151, "clip_ratio/low_mean": 0.0006317857096291846, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001444327543140389, "epoch": 12.130612244897959, "grad_norm": 0.20334884524345398, "learning_rate": 7.5e-07, "loss": 0.0275, "step": 1166 }, { "clip_ratio/high_max": 0.0018385760195087641, "clip_ratio/high_mean": 0.0007655345143575687, "clip_ratio/low_mean": 0.0005546615961975476, "clip_ratio/low_min": 1.121881177823525e-05, "clip_ratio/region_mean": 0.0013201961119193584, "epoch": 12.139941690962099, "grad_norm": 0.16053354740142822, "learning_rate": 7.5e-07, "loss": -0.0327, "step": 1167 }, { "clip_ratio/high_max": 0.002055904951703269, "clip_ratio/high_mean": 0.0007586473893752554, "clip_ratio/low_mean": 0.0005465551703309757, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013052025533397682, "epoch": 12.14927113702624, "grad_norm": 0.1373051106929779, "learning_rate": 7.5e-07, "loss": -0.0481, "step": 1168 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0871233258928571, "completions/max_length": 4096.0, "completions/max_terminated_length": 4031.0, "completions/mean_length": 857.4595336914062, "completions/mean_terminated_length": 548.37890625, "completions/min_length": 110.0, "completions/min_terminated_length": 110.0, "epoch": 12.15860058309038, "grad_norm": 0.192658931016922, "learning_rate": 7.5e-07, "loss": -0.031, "num_tokens": 681340175.0, "reward": 0.674386203289032, "reward_std": 0.1358058899641037, "rewards/simpleverify_reward/mean": 0.6743861436843872, "rewards/simpleverify_reward/std": 0.46862009167671204, "step": 1169 }, { "clip_ratio/high_max": 0.0015297786994779017, "clip_ratio/high_mean": 0.0005704831255570753, "clip_ratio/low_mean": 0.0003752564094838817, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009457395208301023, "epoch": 12.167930029154519, "grad_norm": 0.18344587087631226, "learning_rate": 7.5e-07, "loss": -0.0285, "step": 1170 }, { "clip_ratio/high_max": 0.0019089436282229144, "clip_ratio/high_mean": 0.0006565207941093831, "clip_ratio/low_mean": 0.00037615819292113883, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001032678977935575, "epoch": 12.177259475218658, "grad_norm": 0.15388405323028564, "learning_rate": 7.5e-07, "loss": -0.0253, "step": 1171 }, { "clip_ratio/high_max": 0.0018293895336682908, "clip_ratio/high_mean": 0.0007173905032686889, "clip_ratio/low_mean": 0.0003321910744489287, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010495815913600381, "epoch": 12.186588921282798, "grad_norm": 0.1900005042552948, "learning_rate": 7.5e-07, "loss": -0.0686, "step": 1172 }, { "clip_ratio/high_max": 0.001996609593334142, "clip_ratio/high_mean": 0.0007032706757854612, "clip_ratio/low_mean": 0.0003235724138903606, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010268431069562212, "epoch": 12.19591836734694, "grad_norm": 0.1474086195230484, "learning_rate": 7.5e-07, "loss": -0.0654, "step": 1173 }, { "clip_ratio/high_max": 0.002009912368521327, "clip_ratio/high_mean": 0.0007405108181046671, "clip_ratio/low_mean": 0.00043071653726656223, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001171227344457293, "epoch": 12.205247813411079, "grad_norm": 0.19040659070014954, "learning_rate": 7.5e-07, "loss": -0.0462, "step": 1174 }, { "clip_ratio/high_max": 0.001993093468627194, "clip_ratio/high_mean": 0.0007131287966331001, "clip_ratio/low_mean": 0.0004089067238055577, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011220355081604794, "epoch": 12.214577259475218, "grad_norm": 0.2700249254703522, "learning_rate": 7.5e-07, "loss": -0.0098, "step": 1175 }, { "clip_ratio/high_max": 0.0018312189786229283, "clip_ratio/high_mean": 0.0007790121926518623, "clip_ratio/low_mean": 0.0004379287947813282, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012169409747002646, "epoch": 12.223906705539358, "grad_norm": 0.1619889736175537, "learning_rate": 7.5e-07, "loss": -0.0542, "step": 1176 }, { "clip_ratio/high_max": 0.0017111765664594714, "clip_ratio/high_mean": 0.0006465139431384159, "clip_ratio/low_mean": 0.00045171231158747105, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010982262683683075, "epoch": 12.2332361516035, "grad_norm": 0.1990991085767746, "learning_rate": 7.5e-07, "loss": -0.0065, "step": 1177 }, { "clip_ratio/high_max": 0.0019338970378157683, "clip_ratio/high_mean": 0.0006799911170674022, "clip_ratio/low_mean": 0.00042313866924814647, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011031297872250434, "epoch": 12.242565597667639, "grad_norm": 0.19812889397144318, "learning_rate": 7.5e-07, "loss": -0.0262, "step": 1178 }, { "clip_ratio/high_max": 0.0021174364374019206, "clip_ratio/high_mean": 0.000810887848274433, "clip_ratio/low_mean": 0.0003644366452135728, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011753245089494158, "epoch": 12.251895043731778, "grad_norm": 0.14835980534553528, "learning_rate": 7.5e-07, "loss": -0.0839, "step": 1179 }, { "clip_ratio/high_max": 0.002051998300885316, "clip_ratio/high_mean": 0.0006957633013371378, "clip_ratio/low_mean": 0.00048726341742622026, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011830266812467016, "epoch": 12.261224489795918, "grad_norm": 0.17121875286102295, "learning_rate": 7.5e-07, "loss": -0.0149, "step": 1180 }, { "clip_ratio/high_max": 0.0017892907417262904, "clip_ratio/high_mean": 0.0007760531793792325, "clip_ratio/low_mean": 0.0004360889558938652, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012121421568735968, "epoch": 12.270553935860057, "grad_norm": 0.16578352451324463, "learning_rate": 7.5e-07, "loss": -0.0598, "step": 1181 }, { "clip_ratio/high_max": 0.0020028271173941903, "clip_ratio/high_mean": 0.0006776173759135418, "clip_ratio/low_mean": 0.0005421461137302686, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001219763496919768, "epoch": 12.279883381924199, "grad_norm": 0.1867089420557022, "learning_rate": 7.5e-07, "loss": -0.0033, "step": 1182 }, { "clip_ratio/high_max": 0.0019434749701758847, "clip_ratio/high_mean": 0.0007208031402115012, "clip_ratio/low_mean": 0.0006017506966600195, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00132255383869051, "epoch": 12.289212827988338, "grad_norm": 0.1685970276594162, "learning_rate": 7.5e-07, "loss": -0.0313, "step": 1183 }, { "clip_ratio/high_max": 0.00206588533546892, "clip_ratio/high_mean": 0.0007302391732082469, "clip_ratio/low_mean": 0.0006323698326013982, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013626090221805498, "epoch": 12.298542274052478, "grad_norm": 0.18310166895389557, "learning_rate": 7.5e-07, "loss": -0.042, "step": 1184 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09130859375, "completions/max_length": 4096.0, "completions/max_terminated_length": 4049.0, "completions/mean_length": 893.92529296875, "completions/mean_terminated_length": 572.169189453125, "completions/min_length": 120.0, "completions/min_terminated_length": 120.0, "epoch": 12.307871720116617, "grad_norm": 0.18940389156341553, "learning_rate": 7.5e-07, "loss": -0.0385, "num_tokens": 690216447.0, "reward": 0.6521345376968384, "reward_std": 0.14165355265140533, "rewards/simpleverify_reward/mean": 0.6521344780921936, "rewards/simpleverify_reward/std": 0.47630971670150757, "step": 1185 }, { "clip_ratio/high_max": 0.0016303337361023296, "clip_ratio/high_mean": 0.0006464045873144642, "clip_ratio/low_mean": 0.0003287284823727532, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009751330544531811, "epoch": 12.317201166180759, "grad_norm": 0.14974093437194824, "learning_rate": 7.5e-07, "loss": -0.0583, "step": 1186 }, { "clip_ratio/high_max": 0.0016606894932920113, "clip_ratio/high_mean": 0.0006649305760220159, "clip_ratio/low_mean": 0.0002710844710236415, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000936015054321615, "epoch": 12.326530612244898, "grad_norm": 0.16832828521728516, "learning_rate": 7.5e-07, "loss": -0.0874, "step": 1187 }, { "clip_ratio/high_max": 0.0015194263505691197, "clip_ratio/high_mean": 0.0005513956616596261, "clip_ratio/low_mean": 0.0005233359797784942, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010747316337074153, "epoch": 12.335860058309038, "grad_norm": 0.1739034503698349, "learning_rate": 7.5e-07, "loss": -0.0171, "step": 1188 }, { "clip_ratio/high_max": 0.0015972418914316222, "clip_ratio/high_mean": 0.0006373650121531682, "clip_ratio/low_mean": 0.000394969212493379, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010323342285118997, "epoch": 12.345189504373177, "grad_norm": 0.20493245124816895, "learning_rate": 7.5e-07, "loss": -0.0637, "step": 1189 }, { "clip_ratio/high_max": 0.0016205358660954516, "clip_ratio/high_mean": 0.0005471071654028492, "clip_ratio/low_mean": 0.0004857892336076475, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010328963926440338, "epoch": 12.354518950437317, "grad_norm": 0.19785532355308533, "learning_rate": 7.5e-07, "loss": -0.0008, "step": 1190 }, { "clip_ratio/high_max": 0.0013677254937647376, "clip_ratio/high_mean": 0.0005668448966389406, "clip_ratio/low_mean": 0.0004757466513183317, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010425915315863676, "epoch": 12.363848396501458, "grad_norm": 0.15725794434547424, "learning_rate": 7.5e-07, "loss": -0.0091, "step": 1191 }, { "clip_ratio/high_max": 0.001813759528886294, "clip_ratio/high_mean": 0.0006557645601787954, "clip_ratio/low_mean": 0.0004076038676430471, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010633684396452736, "epoch": 12.373177842565598, "grad_norm": 0.1724264919757843, "learning_rate": 7.5e-07, "loss": -0.0321, "step": 1192 }, { "clip_ratio/high_max": 0.0017496588261565194, "clip_ratio/high_mean": 0.0006538465931953397, "clip_ratio/low_mean": 0.0005411958654804039, "clip_ratio/low_min": 3.966940403188346e-05, "clip_ratio/region_mean": 0.0011950424377573654, "epoch": 12.382507288629737, "grad_norm": 0.1733783483505249, "learning_rate": 7.5e-07, "loss": -0.0275, "step": 1193 }, { "clip_ratio/high_max": 0.0019805151714535896, "clip_ratio/high_mean": 0.0007409060854115523, "clip_ratio/low_mean": 0.00045878949140387704, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011996955981885549, "epoch": 12.391836734693877, "grad_norm": 0.2453775852918625, "learning_rate": 7.5e-07, "loss": -0.0396, "step": 1194 }, { "clip_ratio/high_max": 0.0019385516425245441, "clip_ratio/high_mean": 0.0006793177908548387, "clip_ratio/low_mean": 0.00047458478138651117, "clip_ratio/low_min": 2.2918959075468592e-05, "clip_ratio/region_mean": 0.0011539026017999277, "epoch": 12.401166180758018, "grad_norm": 0.15714959800243378, "learning_rate": 7.5e-07, "loss": -0.0307, "step": 1195 }, { "clip_ratio/high_max": 0.00178022989348392, "clip_ratio/high_mean": 0.0006777688558941009, "clip_ratio/low_mean": 0.000501575963767209, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011793448102253024, "epoch": 12.410495626822158, "grad_norm": 0.17190992832183838, "learning_rate": 7.5e-07, "loss": -0.0496, "step": 1196 }, { "clip_ratio/high_max": 0.002219405916548567, "clip_ratio/high_mean": 0.0007756653376418399, "clip_ratio/low_mean": 0.0005321766370798287, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013078419397061225, "epoch": 12.419825072886297, "grad_norm": 0.16615161299705505, "learning_rate": 7.5e-07, "loss": -0.0414, "step": 1197 }, { "clip_ratio/high_max": 0.0017843523855844978, "clip_ratio/high_mean": 0.0007160395180108026, "clip_ratio/low_mean": 0.0005565141309489263, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012725536143989302, "epoch": 12.429154518950437, "grad_norm": 0.15022048354148865, "learning_rate": 7.5e-07, "loss": -0.0174, "step": 1198 }, { "clip_ratio/high_max": 0.0020622038100555073, "clip_ratio/high_mean": 0.0007678472629777389, "clip_ratio/low_mean": 0.0005155264007044025, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012833736655011307, "epoch": 12.438483965014576, "grad_norm": 0.15177908539772034, "learning_rate": 7.5e-07, "loss": -0.0748, "step": 1199 }, { "clip_ratio/high_max": 0.001984735019505024, "clip_ratio/high_mean": 0.0008133135470416164, "clip_ratio/low_mean": 0.0005473154778883327, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013606290121970233, "epoch": 12.447813411078718, "grad_norm": 0.18198226392269135, "learning_rate": 7.5e-07, "loss": -0.0771, "step": 1200 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.08447265625, "completions/max_length": 4096.0, "completions/max_terminated_length": 4066.0, "completions/mean_length": 857.092529296875, "completions/mean_terminated_length": 558.249267578125, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 12.457142857142857, "grad_norm": 0.19336478412151337, "learning_rate": 7.5e-07, "loss": -0.0549, "num_tokens": 698985829.0, "reward": 0.6692941188812256, "reward_std": 0.14343145489692688, "rewards/simpleverify_reward/mean": 0.6692940592765808, "rewards/simpleverify_reward/std": 0.4704837501049042, "step": 1201 }, { "clip_ratio/high_max": 0.0015765035859658383, "clip_ratio/high_mean": 0.0005976414722681511, "clip_ratio/low_mean": 0.00041152579615300056, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010091672593262047, "epoch": 12.466472303206997, "grad_norm": 0.1865653395652771, "learning_rate": 7.5e-07, "loss": -0.0247, "step": 1202 }, { "clip_ratio/high_max": 0.0019367935310583562, "clip_ratio/high_mean": 0.0007453314101439901, "clip_ratio/low_mean": 0.0003493586657441483, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001094690047466429, "epoch": 12.475801749271136, "grad_norm": 0.18839654326438904, "learning_rate": 7.5e-07, "loss": -0.0612, "step": 1203 }, { "clip_ratio/high_max": 0.001771053717675386, "clip_ratio/high_mean": 0.0006908786126587074, "clip_ratio/low_mean": 0.0003706265413256915, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010615051614877302, "epoch": 12.485131195335278, "grad_norm": 0.19818352162837982, "learning_rate": 7.5e-07, "loss": -0.0511, "step": 1204 }, { "clip_ratio/high_max": 0.0015549865638604388, "clip_ratio/high_mean": 0.0006355703853841987, "clip_ratio/low_mean": 0.00047351619377877796, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001109086580981966, "epoch": 12.494460641399417, "grad_norm": 0.1816173642873764, "learning_rate": 7.5e-07, "loss": -0.0192, "step": 1205 }, { "clip_ratio/high_max": 0.0018846689999918453, "clip_ratio/high_mean": 0.0006925159977981821, "clip_ratio/low_mean": 0.0005291834568197373, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012216994764457922, "epoch": 12.503790087463557, "grad_norm": 0.34985125064849854, "learning_rate": 7.5e-07, "loss": -0.0257, "step": 1206 }, { "clip_ratio/high_max": 0.0017297472950303927, "clip_ratio/high_mean": 0.0006919989436937612, "clip_ratio/low_mean": 0.00040426549912808696, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010962644118990283, "epoch": 12.513119533527696, "grad_norm": 0.19423449039459229, "learning_rate": 7.5e-07, "loss": -0.0138, "step": 1207 }, { "clip_ratio/high_max": 0.0020787794201169163, "clip_ratio/high_mean": 0.0007982667539181421, "clip_ratio/low_mean": 0.00036204973821440944, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011603164930420462, "epoch": 12.522448979591836, "grad_norm": 0.16521035134792328, "learning_rate": 7.5e-07, "loss": -0.0788, "step": 1208 }, { "clip_ratio/high_max": 0.0018724928449955769, "clip_ratio/high_mean": 0.000640216831016005, "clip_ratio/low_mean": 0.0005429957436717814, "clip_ratio/low_min": 2.4752474928391166e-05, "clip_ratio/region_mean": 0.0011832125601358712, "epoch": 12.531778425655977, "grad_norm": 0.26329305768013, "learning_rate": 7.5e-07, "loss": -0.0115, "step": 1209 }, { "clip_ratio/high_max": 0.0021914084609306883, "clip_ratio/high_mean": 0.0008480054311803542, "clip_ratio/low_mean": 0.0004933247153076081, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013413301639957353, "epoch": 12.541107871720117, "grad_norm": 0.15834668278694153, "learning_rate": 7.5e-07, "loss": -0.0874, "step": 1210 }, { "clip_ratio/high_max": 0.002505872405890841, "clip_ratio/high_mean": 0.0009986477580241626, "clip_ratio/low_mean": 0.0004934554317514994, "clip_ratio/low_min": 1.14573786049732e-05, "clip_ratio/region_mean": 0.0014921032452548388, "epoch": 12.550437317784256, "grad_norm": 0.5171984434127808, "learning_rate": 7.5e-07, "loss": -0.0819, "step": 1211 }, { "clip_ratio/high_max": 0.0019852836721838685, "clip_ratio/high_mean": 0.0007104572105163243, "clip_ratio/low_mean": 0.0005536666421903647, "clip_ratio/low_min": 2.3381968276225962e-05, "clip_ratio/region_mean": 0.0012641238899959717, "epoch": 12.559766763848396, "grad_norm": 0.18751870095729828, "learning_rate": 7.5e-07, "loss": -0.0063, "step": 1212 }, { "clip_ratio/high_max": 0.0017798296539695002, "clip_ratio/high_mean": 0.0008144350358634256, "clip_ratio/low_mean": 0.0005683131107616646, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013827481197949965, "epoch": 12.569096209912537, "grad_norm": 0.18579109013080597, "learning_rate": 7.5e-07, "loss": -0.0485, "step": 1213 }, { "clip_ratio/high_max": 0.002244153816718608, "clip_ratio/high_mean": 0.0007662745210836874, "clip_ratio/low_mean": 0.0005152290132173221, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012815035443054512, "epoch": 12.578425655976677, "grad_norm": 0.1808105707168579, "learning_rate": 7.5e-07, "loss": -0.0262, "step": 1214 }, { "clip_ratio/high_max": 0.001956953201442957, "clip_ratio/high_mean": 0.0006798269714636263, "clip_ratio/low_mean": 0.0005786011661257362, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012584281321323942, "epoch": 12.587755102040816, "grad_norm": 0.1801355928182602, "learning_rate": 7.5e-07, "loss": 0.0028, "step": 1215 }, { "clip_ratio/high_max": 0.0020571070126607083, "clip_ratio/high_mean": 0.0008312225709232735, "clip_ratio/low_mean": 0.0006218460603122367, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014530686057696585, "epoch": 12.597084548104956, "grad_norm": 0.1904611438512802, "learning_rate": 7.5e-07, "loss": -0.0365, "step": 1216 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09375, "completions/max_length": 4096.0, "completions/max_terminated_length": 4072.0, "completions/mean_length": 886.5894775390625, "completions/mean_terminated_length": 554.5814208984375, "completions/min_length": 103.0, "completions/min_terminated_length": 103.0, "epoch": 12.606413994169095, "grad_norm": 0.20043841004371643, "learning_rate": 7.5e-07, "loss": -0.0401, "num_tokens": 707639559.0, "reward": 0.6501116156578064, "reward_std": 0.14850300550460815, "rewards/simpleverify_reward/mean": 0.6501116156578064, "rewards/simpleverify_reward/std": 0.47695112228393555, "step": 1217 }, { "clip_ratio/high_max": 0.0019359571706445422, "clip_ratio/high_mean": 0.0005854117362105171, "clip_ratio/low_mean": 0.00043043430014222395, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001015846033624257, "epoch": 12.615743440233237, "grad_norm": 0.17278847098350525, "learning_rate": 7.5e-07, "loss": -0.0194, "step": 1218 }, { "clip_ratio/high_max": 0.001556009410705883, "clip_ratio/high_mean": 0.0006227520361790084, "clip_ratio/low_mean": 0.0004772989354933088, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001100050958484644, "epoch": 12.625072886297376, "grad_norm": 0.18907994031906128, "learning_rate": 7.5e-07, "loss": -0.0147, "step": 1219 }, { "clip_ratio/high_max": 0.0018381371992290951, "clip_ratio/high_mean": 0.0007192294233391294, "clip_ratio/low_mean": 0.0003734820713816589, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010927114817604888, "epoch": 12.634402332361516, "grad_norm": 0.1751541644334793, "learning_rate": 7.5e-07, "loss": -0.0328, "step": 1220 }, { "clip_ratio/high_max": 0.0017246282259293366, "clip_ratio/high_mean": 0.0007396341679850593, "clip_ratio/low_mean": 0.0004287304946046788, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011683646807796322, "epoch": 12.643731778425655, "grad_norm": 0.1858539581298828, "learning_rate": 7.5e-07, "loss": -0.0268, "step": 1221 }, { "clip_ratio/high_max": 0.0019347066554473713, "clip_ratio/high_mean": 0.0007120239943105844, "clip_ratio/low_mean": 0.0004660113745558192, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011780353670474142, "epoch": 12.653061224489797, "grad_norm": 0.26119479537010193, "learning_rate": 7.5e-07, "loss": -0.0217, "step": 1222 }, { "clip_ratio/high_max": 0.0018272716079081874, "clip_ratio/high_mean": 0.0007308273088710848, "clip_ratio/low_mean": 0.000601758422817511, "clip_ratio/low_min": 2.5145845938823186e-05, "clip_ratio/region_mean": 0.0013325857416930376, "epoch": 12.662390670553936, "grad_norm": 0.21728216111660004, "learning_rate": 7.5e-07, "loss": -0.0099, "step": 1223 }, { "clip_ratio/high_max": 0.002101184865750838, "clip_ratio/high_mean": 0.0008292748370877234, "clip_ratio/low_mean": 0.00038363651037798263, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012129113456467167, "epoch": 12.671720116618076, "grad_norm": 0.18949934840202332, "learning_rate": 7.5e-07, "loss": -0.0778, "step": 1224 }, { "clip_ratio/high_max": 0.0020283602752897423, "clip_ratio/high_mean": 0.000740334968213574, "clip_ratio/low_mean": 0.0006379783035299624, "clip_ratio/low_min": 2.647183464432601e-05, "clip_ratio/region_mean": 0.001378313267196063, "epoch": 12.681049562682215, "grad_norm": 0.19312873482704163, "learning_rate": 7.5e-07, "loss": -0.0262, "step": 1225 }, { "clip_ratio/high_max": 0.0021771085666841827, "clip_ratio/high_mean": 0.0009591148318577325, "clip_ratio/low_mean": 0.000534600074388436, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014937149389879778, "epoch": 12.690379008746355, "grad_norm": 0.18807104229927063, "learning_rate": 7.5e-07, "loss": -0.0834, "step": 1226 }, { "clip_ratio/high_max": 0.0020426092778507154, "clip_ratio/high_mean": 0.0007920515290607, "clip_ratio/low_mean": 0.00046984403297756216, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012618955624930095, "epoch": 12.699708454810496, "grad_norm": 0.1736297756433487, "learning_rate": 7.5e-07, "loss": -0.0544, "step": 1227 }, { "clip_ratio/high_max": 0.002164315574191278, "clip_ratio/high_mean": 0.0008251062763520167, "clip_ratio/low_mean": 0.000541407423952478, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001366513672110159, "epoch": 12.709037900874636, "grad_norm": 0.1809934377670288, "learning_rate": 7.5e-07, "loss": -0.0767, "step": 1228 }, { "clip_ratio/high_max": 0.0020170828502159566, "clip_ratio/high_mean": 0.0008117012057482498, "clip_ratio/low_mean": 0.0006104480282829172, "clip_ratio/low_min": 1.581477772560902e-05, "clip_ratio/region_mean": 0.0014221492419892456, "epoch": 12.718367346938775, "grad_norm": 0.21064981818199158, "learning_rate": 7.5e-07, "loss": -0.0344, "step": 1229 }, { "clip_ratio/high_max": 0.001794552692444995, "clip_ratio/high_mean": 0.0007244291064125719, "clip_ratio/low_mean": 0.0005174479224479001, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001241877023858251, "epoch": 12.727696793002915, "grad_norm": 0.18681171536445618, "learning_rate": 7.5e-07, "loss": -0.0299, "step": 1230 }, { "clip_ratio/high_max": 0.0022599214571528137, "clip_ratio/high_mean": 0.0008835696353344247, "clip_ratio/low_mean": 0.0005457854754240543, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014293550884758588, "epoch": 12.737026239067056, "grad_norm": 0.17060433328151703, "learning_rate": 7.5e-07, "loss": -0.069, "step": 1231 }, { "clip_ratio/high_max": 0.0021145268256077543, "clip_ratio/high_mean": 0.000779950903961435, "clip_ratio/low_mean": 0.0006197566890477901, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013997075693623628, "epoch": 12.746355685131196, "grad_norm": 0.16552677750587463, "learning_rate": 7.5e-07, "loss": -0.0276, "step": 1232 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0939592633928571, "completions/max_length": 4096.0, "completions/max_terminated_length": 4035.0, "completions/mean_length": 898.2282104492188, "completions/mean_terminated_length": 566.6091918945312, "completions/min_length": 89.0, "completions/min_terminated_length": 89.0, "epoch": 12.755685131195335, "grad_norm": 0.20300549268722534, "learning_rate": 7.5e-07, "loss": -0.022, "num_tokens": 716466262.0, "reward": 0.6536691188812256, "reward_std": 0.1443965882062912, "rewards/simpleverify_reward/mean": 0.6536690592765808, "rewards/simpleverify_reward/std": 0.4758167862892151, "step": 1233 }, { "clip_ratio/high_max": 0.0016479226724186447, "clip_ratio/high_mean": 0.000567954439247842, "clip_ratio/low_mean": 0.0004492869220484863, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010172413385589607, "epoch": 12.765014577259475, "grad_norm": 0.1667689085006714, "learning_rate": 7.5e-07, "loss": -0.0239, "step": 1234 }, { "clip_ratio/high_max": 0.0016356618416466517, "clip_ratio/high_mean": 0.0006584106540685752, "clip_ratio/low_mean": 0.0004219721172376012, "clip_ratio/low_min": 3.930328693968477e-05, "clip_ratio/region_mean": 0.0010803827626659768, "epoch": 12.774344023323614, "grad_norm": 0.17179609835147858, "learning_rate": 7.5e-07, "loss": -0.0482, "step": 1235 }, { "clip_ratio/high_max": 0.0020841074729105458, "clip_ratio/high_mean": 0.0007372817854047753, "clip_ratio/low_mean": 0.0003827756481769029, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011200574299436994, "epoch": 12.783673469387756, "grad_norm": 0.1552588939666748, "learning_rate": 7.5e-07, "loss": -0.0825, "step": 1236 }, { "clip_ratio/high_max": 0.0017355121053697076, "clip_ratio/high_mean": 0.0007223314914881485, "clip_ratio/low_mean": 0.0004572627829020348, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001179594277346041, "epoch": 12.793002915451895, "grad_norm": 0.2117546945810318, "learning_rate": 7.5e-07, "loss": -0.0209, "step": 1237 }, { "clip_ratio/high_max": 0.0018736967831500806, "clip_ratio/high_mean": 0.0007055858568492113, "clip_ratio/low_mean": 0.000488771718892167, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011943575627810787, "epoch": 12.802332361516035, "grad_norm": 0.1885368525981903, "learning_rate": 7.5e-07, "loss": 0.0008, "step": 1238 }, { "clip_ratio/high_max": 0.0019352788440301083, "clip_ratio/high_mean": 0.0007303805468836799, "clip_ratio/low_mean": 0.00047113366690609837, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012015142347081564, "epoch": 12.811661807580174, "grad_norm": 0.1773141324520111, "learning_rate": 7.5e-07, "loss": -0.0548, "step": 1239 }, { "clip_ratio/high_max": 0.0016539067510166205, "clip_ratio/high_mean": 0.0007002765341894701, "clip_ratio/low_mean": 0.0004540594318314106, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011543359523784602, "epoch": 12.820991253644316, "grad_norm": 0.1809576153755188, "learning_rate": 7.5e-07, "loss": -0.0598, "step": 1240 }, { "clip_ratio/high_max": 0.001964395614777459, "clip_ratio/high_mean": 0.0007405258993458119, "clip_ratio/low_mean": 0.0004988572272850433, "clip_ratio/low_min": 1.6009220416890457e-05, "clip_ratio/region_mean": 0.0012393831311783288, "epoch": 12.830320699708455, "grad_norm": 0.2402149736881256, "learning_rate": 7.5e-07, "loss": -0.0253, "step": 1241 }, { "clip_ratio/high_max": 0.001989732318179449, "clip_ratio/high_mean": 0.000833473550301278, "clip_ratio/low_mean": 0.0004345232782725361, "clip_ratio/low_min": 3.162955545121804e-05, "clip_ratio/region_mean": 0.001267996834940277, "epoch": 12.839650145772595, "grad_norm": 0.2072635442018509, "learning_rate": 7.5e-07, "loss": -0.0591, "step": 1242 }, { "clip_ratio/high_max": 0.001819495257223025, "clip_ratio/high_mean": 0.000724442030332284, "clip_ratio/low_mean": 0.0005539808016692405, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012784228165401146, "epoch": 12.848979591836734, "grad_norm": 1.4372711181640625, "learning_rate": 7.5e-07, "loss": -0.0384, "step": 1243 }, { "clip_ratio/high_max": 0.0020328084501670673, "clip_ratio/high_mean": 0.0007520167255279375, "clip_ratio/low_mean": 0.000607677881816926, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013596946446341462, "epoch": 12.858309037900874, "grad_norm": 0.20521536469459534, "learning_rate": 7.5e-07, "loss": -0.0362, "step": 1244 }, { "clip_ratio/high_max": 0.001831037960073445, "clip_ratio/high_mean": 0.000715464577297098, "clip_ratio/low_mean": 0.0005850220290994912, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013004866232222412, "epoch": 12.867638483965015, "grad_norm": 0.17333026230335236, "learning_rate": 7.5e-07, "loss": -0.0253, "step": 1245 }, { "clip_ratio/high_max": 0.001843563914007973, "clip_ratio/high_mean": 0.0007237184581754263, "clip_ratio/low_mean": 0.00047791589145163016, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001201634353492409, "epoch": 12.876967930029155, "grad_norm": 0.17449648678302765, "learning_rate": 7.5e-07, "loss": -0.0555, "step": 1246 }, { "clip_ratio/high_max": 0.0016445114815724082, "clip_ratio/high_mean": 0.0006647114469160442, "clip_ratio/low_mean": 0.0005848589662491577, "clip_ratio/low_min": 1.652127866691444e-05, "clip_ratio/region_mean": 0.0012495704104367178, "epoch": 12.886297376093294, "grad_norm": 0.254708468914032, "learning_rate": 7.5e-07, "loss": -0.0057, "step": 1247 }, { "clip_ratio/high_max": 0.0022644672280875966, "clip_ratio/high_mean": 0.0009405586642969865, "clip_ratio/low_mean": 0.0006018312788000912, "clip_ratio/low_min": 9.879860954242758e-06, "clip_ratio/region_mean": 0.001542389905807795, "epoch": 12.895626822157434, "grad_norm": 0.15859606862068176, "learning_rate": 7.5e-07, "loss": -0.0411, "step": 1248 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0862862723214286, "completions/max_length": 4096.0, "completions/max_terminated_length": 4057.0, "completions/mean_length": 868.2244262695312, "completions/mean_terminated_length": 563.410400390625, "completions/min_length": 92.0, "completions/min_terminated_length": 92.0, "epoch": 13.00932944606414, "grad_norm": 0.19432368874549866, "learning_rate": 7.5e-07, "loss": -0.0187, "num_tokens": 725272567.0, "reward": 0.676339328289032, "reward_std": 0.13747158646583557, "rewards/simpleverify_reward/mean": 0.6763392686843872, "rewards/simpleverify_reward/std": 0.4678885340690613, "step": 1249 }, { "clip_ratio/high_max": 0.0020968773897038773, "clip_ratio/high_mean": 0.0007379331909760367, "clip_ratio/low_mean": 0.0003499445988381922, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010878777975449339, "epoch": 13.018658892128279, "grad_norm": 0.19563952088356018, "learning_rate": 7.5e-07, "loss": -0.0805, "step": 1250 }, { "clip_ratio/high_max": 0.001767695586750051, "clip_ratio/high_mean": 0.0006178985022415873, "clip_ratio/low_mean": 0.00036675107799055695, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009846495631791186, "epoch": 13.02798833819242, "grad_norm": 0.1901664435863495, "learning_rate": 7.5e-07, "loss": -0.0068, "step": 1251 }, { "clip_ratio/high_max": 0.0018956902349600568, "clip_ratio/high_mean": 0.0007005979732639389, "clip_ratio/low_mean": 0.0004900649319097283, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011906629042641725, "epoch": 13.03731778425656, "grad_norm": 0.18280290067195892, "learning_rate": 7.5e-07, "loss": -0.0265, "step": 1252 }, { "clip_ratio/high_max": 0.0015589321847073734, "clip_ratio/high_mean": 0.0006493670716736233, "clip_ratio/low_mean": 0.00043396500564085727, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001083332062989939, "epoch": 13.0466472303207, "grad_norm": 0.14904798567295074, "learning_rate": 7.5e-07, "loss": -0.0707, "step": 1253 }, { "clip_ratio/high_max": 0.0019353746174601838, "clip_ratio/high_mean": 0.0006954467353352811, "clip_ratio/low_mean": 0.0004600845304594259, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011555312557902653, "epoch": 13.055976676384839, "grad_norm": 0.17444880306720734, "learning_rate": 7.5e-07, "loss": -0.0644, "step": 1254 }, { "clip_ratio/high_max": 0.001847042400186183, "clip_ratio/high_mean": 0.0006897366965858964, "clip_ratio/low_mean": 0.0005356742212825338, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012254109296918614, "epoch": 13.06530612244898, "grad_norm": 0.18318307399749756, "learning_rate": 7.5e-07, "loss": -0.0567, "step": 1255 }, { "clip_ratio/high_max": 0.0020856721021118574, "clip_ratio/high_mean": 0.0007710987792961532, "clip_ratio/low_mean": 0.00033789196186262416, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011089907529822085, "epoch": 13.07463556851312, "grad_norm": 0.18287393450737, "learning_rate": 7.5e-07, "loss": -0.0678, "step": 1256 }, { "clip_ratio/high_max": 0.001799275392841082, "clip_ratio/high_mean": 0.0006489257220891886, "clip_ratio/low_mean": 0.0005533674302569125, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00120229314052267, "epoch": 13.08396501457726, "grad_norm": 0.1569838970899582, "learning_rate": 7.5e-07, "loss": -0.0113, "step": 1257 }, { "clip_ratio/high_max": 0.0020963467468391173, "clip_ratio/high_mean": 0.0008017892341740662, "clip_ratio/low_mean": 0.0005160886612429749, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013178778935980517, "epoch": 13.093294460641399, "grad_norm": 0.1688612997531891, "learning_rate": 7.5e-07, "loss": -0.0843, "step": 1258 }, { "clip_ratio/high_max": 0.0020500008686212823, "clip_ratio/high_mean": 0.0007424704072036548, "clip_ratio/low_mean": 0.00036785042811970925, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011103208380518481, "epoch": 13.102623906705539, "grad_norm": 0.16254207491874695, "learning_rate": 7.5e-07, "loss": -0.0327, "step": 1259 }, { "clip_ratio/high_max": 0.0019706751882040408, "clip_ratio/high_mean": 0.0006216680358193116, "clip_ratio/low_mean": 0.00037702658664784394, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009986946279241238, "epoch": 13.11195335276968, "grad_norm": 0.1467360407114029, "learning_rate": 7.5e-07, "loss": -0.0719, "step": 1260 }, { "clip_ratio/high_max": 0.002092241651553195, "clip_ratio/high_mean": 0.0008275066520582186, "clip_ratio/low_mean": 0.00048802474384501693, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013155313827155624, "epoch": 13.12128279883382, "grad_norm": 0.16962483525276184, "learning_rate": 7.5e-07, "loss": -0.0566, "step": 1261 }, { "clip_ratio/high_max": 0.0016492449103679974, "clip_ratio/high_mean": 0.0006417725635401439, "clip_ratio/low_mean": 0.0006810330469306791, "clip_ratio/low_min": 3.564638609532267e-05, "clip_ratio/region_mean": 0.0013228056341176853, "epoch": 13.130612244897959, "grad_norm": 0.18790215253829956, "learning_rate": 7.5e-07, "loss": -0.0038, "step": 1262 }, { "clip_ratio/high_max": 0.0021655849632225, "clip_ratio/high_mean": 0.0008160664256138261, "clip_ratio/low_mean": 0.0005155695471330546, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013316359400050715, "epoch": 13.139941690962099, "grad_norm": 1.470483422279358, "learning_rate": 7.5e-07, "loss": -0.0363, "step": 1263 }, { "clip_ratio/high_max": 0.0015933838731143624, "clip_ratio/high_mean": 0.0005729485274059698, "clip_ratio/low_mean": 0.0005698601280528237, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011428086836531293, "epoch": 13.14927113702624, "grad_norm": 0.16250194609165192, "learning_rate": 7.5e-07, "loss": -0.0245, "step": 1264 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0846819196428571, "completions/max_length": 4096.0, "completions/max_terminated_length": 4059.0, "completions/mean_length": 851.7593383789062, "completions/mean_terminated_length": 551.6138305664062, "completions/min_length": 109.0, "completions/min_terminated_length": 109.0, "epoch": 13.15860058309038, "grad_norm": 0.1863900125026703, "learning_rate": 7.5e-07, "loss": -0.0287, "num_tokens": 733974516.0, "reward": 0.6795480251312256, "reward_std": 0.13633938133716583, "rewards/simpleverify_reward/mean": 0.6795479655265808, "rewards/simpleverify_reward/std": 0.466666579246521, "step": 1265 }, { "clip_ratio/high_max": 0.0017317329366051126, "clip_ratio/high_mean": 0.0006725897337673814, "clip_ratio/low_mean": 0.0004331703307798307, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001105760038626613, "epoch": 13.167930029154519, "grad_norm": 0.17453962564468384, "learning_rate": 7.5e-07, "loss": -0.0465, "step": 1266 }, { "clip_ratio/high_max": 0.0017340462072752416, "clip_ratio/high_mean": 0.0006397457309503807, "clip_ratio/low_mean": 0.0004244869022613784, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010642326578818029, "epoch": 13.177259475218658, "grad_norm": 0.1887424737215042, "learning_rate": 7.5e-07, "loss": -0.0199, "step": 1267 }, { "clip_ratio/high_max": 0.002033641118032392, "clip_ratio/high_mean": 0.0007519067366956733, "clip_ratio/low_mean": 0.0003684516057091969, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001120358327170834, "epoch": 13.186588921282798, "grad_norm": 0.22439411282539368, "learning_rate": 7.5e-07, "loss": -0.0677, "step": 1268 }, { "clip_ratio/high_max": 0.0017390771681675687, "clip_ratio/high_mean": 0.0007008309894445119, "clip_ratio/low_mean": 0.00044784032036204735, "clip_ratio/low_min": 1.9574068574002013e-05, "clip_ratio/region_mean": 0.001148671324699535, "epoch": 13.19591836734694, "grad_norm": 0.18118374049663544, "learning_rate": 7.5e-07, "loss": -0.0242, "step": 1269 }, { "clip_ratio/high_max": 0.001927893426909577, "clip_ratio/high_mean": 0.0007302688100025989, "clip_ratio/low_mean": 0.0004892993820249103, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012195682065794244, "epoch": 13.205247813411079, "grad_norm": 0.16624538600444794, "learning_rate": 7.5e-07, "loss": -0.0321, "step": 1270 }, { "clip_ratio/high_max": 0.0018390711193205789, "clip_ratio/high_mean": 0.0007192684624897083, "clip_ratio/low_mean": 0.00043331865072104847, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001152587115939241, "epoch": 13.214577259475218, "grad_norm": 0.1884564459323883, "learning_rate": 7.5e-07, "loss": -0.0617, "step": 1271 }, { "clip_ratio/high_max": 0.0016242224774032366, "clip_ratio/high_mean": 0.0006163869547890499, "clip_ratio/low_mean": 0.0005128155526108458, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011292024828435387, "epoch": 13.223906705539358, "grad_norm": 0.17025797069072723, "learning_rate": 7.5e-07, "loss": -0.0189, "step": 1272 }, { "clip_ratio/high_max": 0.0019609730843512807, "clip_ratio/high_mean": 0.0006515362874779385, "clip_ratio/low_mean": 0.0005180250691410038, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011695613611664157, "epoch": 13.2332361516035, "grad_norm": 0.3188588619232178, "learning_rate": 7.5e-07, "loss": -0.0247, "step": 1273 }, { "clip_ratio/high_max": 0.0019635434800875373, "clip_ratio/high_mean": 0.0007646184494660702, "clip_ratio/low_mean": 0.0004587784983414167, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012233969464432448, "epoch": 13.242565597667639, "grad_norm": 0.20746663212776184, "learning_rate": 7.5e-07, "loss": -0.0864, "step": 1274 }, { "clip_ratio/high_max": 0.00199284255359089, "clip_ratio/high_mean": 0.0008712283943168586, "clip_ratio/low_mean": 0.0005080697674202383, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013792981335427612, "epoch": 13.251895043731778, "grad_norm": 0.16058406233787537, "learning_rate": 7.5e-07, "loss": -0.0487, "step": 1275 }, { "clip_ratio/high_max": 0.001736634578264784, "clip_ratio/high_mean": 0.0006860068442620104, "clip_ratio/low_mean": 0.0007065870086080395, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013925938292231876, "epoch": 13.261224489795918, "grad_norm": 0.15562570095062256, "learning_rate": 7.5e-07, "loss": 0.0065, "step": 1276 }, { "clip_ratio/high_max": 0.0018053532876365352, "clip_ratio/high_mean": 0.0006683867650281172, "clip_ratio/low_mean": 0.0006037163529981626, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012721031198452692, "epoch": 13.270553935860057, "grad_norm": 0.17020294070243835, "learning_rate": 7.5e-07, "loss": -0.0087, "step": 1277 }, { "clip_ratio/high_max": 0.0019173251785105094, "clip_ratio/high_mean": 0.0007917857728898525, "clip_ratio/low_mean": 0.0005636549763039511, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013554407451010775, "epoch": 13.279883381924199, "grad_norm": 0.2506265640258789, "learning_rate": 7.5e-07, "loss": -0.0505, "step": 1278 }, { "clip_ratio/high_max": 0.002233525250630919, "clip_ratio/high_mean": 0.0008588757882534992, "clip_ratio/low_mean": 0.0005241203467676314, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013829961426381487, "epoch": 13.289212827988338, "grad_norm": 0.31887415051460266, "learning_rate": 7.5e-07, "loss": -0.0672, "step": 1279 }, { "clip_ratio/high_max": 0.0018067589480779134, "clip_ratio/high_mean": 0.0007324615980905946, "clip_ratio/low_mean": 0.0006324730875348905, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013649346656166017, "epoch": 13.298542274052478, "grad_norm": 0.16589130461215973, "learning_rate": 7.5e-07, "loss": -0.0201, "step": 1280 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0834263392857143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4024.0, "completions/mean_length": 848.4790649414062, "completions/mean_terminated_length": 552.890380859375, "completions/min_length": 89.0, "completions/min_terminated_length": 89.0, "epoch": 13.307871720116617, "grad_norm": 0.1918533444404602, "learning_rate": 7.5e-07, "loss": -0.0369, "num_tokens": 742660631.0, "reward": 0.6740373969078064, "reward_std": 0.1384187638759613, "rewards/simpleverify_reward/mean": 0.6740373969078064, "rewards/simpleverify_reward/std": 0.4687497317790985, "step": 1281 }, { "clip_ratio/high_max": 0.0017973951398744248, "clip_ratio/high_mean": 0.0007879517215769738, "clip_ratio/low_mean": 0.0003296279810456326, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011175797044415958, "epoch": 13.317201166180759, "grad_norm": 0.17775221168994904, "learning_rate": 7.5e-07, "loss": -0.0518, "step": 1282 }, { "clip_ratio/high_max": 0.0016641623915347736, "clip_ratio/high_mean": 0.0005899637380935019, "clip_ratio/low_mean": 0.00041191904529114254, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010018827560998034, "epoch": 13.326530612244898, "grad_norm": 0.145823672413826, "learning_rate": 7.5e-07, "loss": 0.0105, "step": 1283 }, { "clip_ratio/high_max": 0.0016269651241600513, "clip_ratio/high_mean": 0.0005755328147643013, "clip_ratio/low_mean": 0.0003599251945161086, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009354580361105036, "epoch": 13.335860058309038, "grad_norm": 0.17365247011184692, "learning_rate": 7.5e-07, "loss": -0.0307, "step": 1284 }, { "clip_ratio/high_max": 0.0018625222837727051, "clip_ratio/high_mean": 0.0007271476742971572, "clip_ratio/low_mean": 0.00037838074149476597, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011055284503527218, "epoch": 13.345189504373177, "grad_norm": 0.17756684124469757, "learning_rate": 7.5e-07, "loss": -0.0545, "step": 1285 }, { "clip_ratio/high_max": 0.0018063951611111406, "clip_ratio/high_mean": 0.0007341927848756313, "clip_ratio/low_mean": 0.0004555295545287663, "clip_ratio/low_min": 1.8606728190206923e-05, "clip_ratio/region_mean": 0.001189722304843599, "epoch": 13.354518950437317, "grad_norm": 0.1521482616662979, "learning_rate": 7.5e-07, "loss": -0.0501, "step": 1286 }, { "clip_ratio/high_max": 0.0017550016127643175, "clip_ratio/high_mean": 0.0006554140218213433, "clip_ratio/low_mean": 0.0004262691886651737, "clip_ratio/low_min": 1.2247697668499313e-05, "clip_ratio/region_mean": 0.0010816832036653068, "epoch": 13.363848396501458, "grad_norm": 0.27347517013549805, "learning_rate": 7.5e-07, "loss": -0.0392, "step": 1287 }, { "clip_ratio/high_max": 0.0020532298658508807, "clip_ratio/high_mean": 0.0008347374205186497, "clip_ratio/low_mean": 0.0003778105683522881, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012125479952374008, "epoch": 13.373177842565598, "grad_norm": 0.1734417825937271, "learning_rate": 7.5e-07, "loss": -0.0463, "step": 1288 }, { "clip_ratio/high_max": 0.00197953348106239, "clip_ratio/high_mean": 0.0007460720244125696, "clip_ratio/low_mean": 0.0006422964415833121, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013883684514439665, "epoch": 13.382507288629737, "grad_norm": 0.19656747579574585, "learning_rate": 7.5e-07, "loss": -0.0144, "step": 1289 }, { "clip_ratio/high_max": 0.0023582423782499973, "clip_ratio/high_mean": 0.0007664812219445594, "clip_ratio/low_mean": 0.0005112918244094544, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012777730407833587, "epoch": 13.391836734693877, "grad_norm": 0.21605472266674042, "learning_rate": 7.5e-07, "loss": -0.0321, "step": 1290 }, { "clip_ratio/high_max": 0.001888299095298862, "clip_ratio/high_mean": 0.0007388889625872253, "clip_ratio/low_mean": 0.00043126365653733956, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011701525945682079, "epoch": 13.401166180758018, "grad_norm": 0.23335719108581543, "learning_rate": 7.5e-07, "loss": -0.0463, "step": 1291 }, { "clip_ratio/high_max": 0.0019193130428902805, "clip_ratio/high_mean": 0.000757676058128709, "clip_ratio/low_mean": 0.0006277482552832225, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001385424311592942, "epoch": 13.410495626822158, "grad_norm": 0.18722014129161835, "learning_rate": 7.5e-07, "loss": -0.025, "step": 1292 }, { "clip_ratio/high_max": 0.0019404110789764673, "clip_ratio/high_mean": 0.0007670320574106881, "clip_ratio/low_mean": 0.0004746799877466401, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012417120451573282, "epoch": 13.419825072886297, "grad_norm": 0.1774846464395523, "learning_rate": 7.5e-07, "loss": -0.0517, "step": 1293 }, { "clip_ratio/high_max": 0.0022991551013546996, "clip_ratio/high_mean": 0.0008773473055043723, "clip_ratio/low_mean": 0.00058247198103345, "clip_ratio/low_min": 1.1472099686216097e-05, "clip_ratio/region_mean": 0.00145981929381378, "epoch": 13.429154518950437, "grad_norm": 0.19953960180282593, "learning_rate": 7.5e-07, "loss": -0.0321, "step": 1294 }, { "clip_ratio/high_max": 0.001989051372220274, "clip_ratio/high_mean": 0.000800597648776602, "clip_ratio/low_mean": 0.0005873118584531767, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013879095022275578, "epoch": 13.438483965014576, "grad_norm": 0.21038787066936493, "learning_rate": 7.5e-07, "loss": -0.0285, "step": 1295 }, { "clip_ratio/high_max": 0.002123140628100373, "clip_ratio/high_mean": 0.0008345042115251999, "clip_ratio/low_mean": 0.00046116188786982093, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001295666104851989, "epoch": 13.447813411078718, "grad_norm": 0.15971601009368896, "learning_rate": 7.5e-07, "loss": -0.0574, "step": 1296 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0889369419642857, "completions/max_length": 4096.0, "completions/max_terminated_length": 4024.0, "completions/mean_length": 861.0830688476562, "completions/mean_terminated_length": 545.2940673828125, "completions/min_length": 108.0, "completions/min_terminated_length": 108.0, "epoch": 13.457142857142857, "grad_norm": 0.17459258437156677, "learning_rate": 7.5e-07, "loss": -0.0321, "num_tokens": 751219861.0, "reward": 0.682686984539032, "reward_std": 0.1305491179227829, "rewards/simpleverify_reward/mean": 0.6826869249343872, "rewards/simpleverify_reward/std": 0.4654466509819031, "step": 1297 }, { "clip_ratio/high_max": 0.0015853089444135549, "clip_ratio/high_mean": 0.0005524582438738435, "clip_ratio/low_mean": 0.00028946413124231185, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008419223649980268, "epoch": 13.466472303206997, "grad_norm": 0.1518564522266388, "learning_rate": 7.5e-07, "loss": -0.0298, "step": 1298 }, { "clip_ratio/high_max": 0.0019759548085858114, "clip_ratio/high_mean": 0.0007671266757824924, "clip_ratio/low_mean": 0.00031438174528375384, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001081508438801393, "epoch": 13.475801749271136, "grad_norm": 0.17527081072330475, "learning_rate": 7.5e-07, "loss": -0.0855, "step": 1299 }, { "clip_ratio/high_max": 0.001556838942633476, "clip_ratio/high_mean": 0.000632296156254597, "clip_ratio/low_mean": 0.00034249085365445353, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.000974787020822987, "epoch": 13.485131195335278, "grad_norm": 0.1754647195339203, "learning_rate": 7.5e-07, "loss": -0.0267, "step": 1300 }, { "clip_ratio/high_max": 0.001992287714529084, "clip_ratio/high_mean": 0.000737788309379539, "clip_ratio/low_mean": 0.0004135672179472749, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001151355536421761, "epoch": 13.494460641399417, "grad_norm": 1.881227970123291, "learning_rate": 7.5e-07, "loss": -0.0404, "step": 1301 }, { "clip_ratio/high_max": 0.001856618595411419, "clip_ratio/high_mean": 0.0006115169735494419, "clip_ratio/low_mean": 0.00035835467951983446, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009698716385173611, "epoch": 13.503790087463557, "grad_norm": 0.15449245274066925, "learning_rate": 7.5e-07, "loss": -0.0164, "step": 1302 }, { "clip_ratio/high_max": 0.0016646869680698728, "clip_ratio/high_mean": 0.0005953209274593974, "clip_ratio/low_mean": 0.0003454935349509469, "clip_ratio/low_min": 9.645062164054252e-06, "clip_ratio/region_mean": 0.0009408144705957966, "epoch": 13.513119533527696, "grad_norm": 0.1977192759513855, "learning_rate": 7.5e-07, "loss": -0.0333, "step": 1303 }, { "clip_ratio/high_max": 0.0019234943465562537, "clip_ratio/high_mean": 0.0006595021459361305, "clip_ratio/low_mean": 0.0003876682671943854, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010471704008523375, "epoch": 13.522448979591836, "grad_norm": 0.1947961002588272, "learning_rate": 7.5e-07, "loss": -0.0379, "step": 1304 }, { "clip_ratio/high_max": 0.002169461873563705, "clip_ratio/high_mean": 0.0007303112261070055, "clip_ratio/low_mean": 0.0004199971317575546, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011503083424031502, "epoch": 13.531778425655977, "grad_norm": 0.20357443392276764, "learning_rate": 7.5e-07, "loss": -0.0434, "step": 1305 }, { "clip_ratio/high_max": 0.0019728850384126417, "clip_ratio/high_mean": 0.0007121723447198747, "clip_ratio/low_mean": 0.00036315307988843415, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010753254173323512, "epoch": 13.541107871720117, "grad_norm": 0.2074740082025528, "learning_rate": 7.5e-07, "loss": -0.0914, "step": 1306 }, { "clip_ratio/high_max": 0.0019112460104224738, "clip_ratio/high_mean": 0.00065833851476782, "clip_ratio/low_mean": 0.0005527644480025629, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012111029573134147, "epoch": 13.550437317784256, "grad_norm": 0.2329857498407364, "learning_rate": 7.5e-07, "loss": -0.0308, "step": 1307 }, { "clip_ratio/high_max": 0.002111664816766279, "clip_ratio/high_mean": 0.0008041499695536913, "clip_ratio/low_mean": 0.0005296123781590723, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013337623349798378, "epoch": 13.559766763848396, "grad_norm": 0.17049506306648254, "learning_rate": 7.5e-07, "loss": -0.0382, "step": 1308 }, { "clip_ratio/high_max": 0.001729444855300244, "clip_ratio/high_mean": 0.0006638789191129035, "clip_ratio/low_mean": 0.00045960068064232473, "clip_ratio/low_min": 2.486077937646769e-05, "clip_ratio/region_mean": 0.0011234796002099756, "epoch": 13.569096209912537, "grad_norm": 0.18023617565631866, "learning_rate": 7.5e-07, "loss": -0.0126, "step": 1309 }, { "clip_ratio/high_max": 0.0018486624721845146, "clip_ratio/high_mean": 0.0007747255094727734, "clip_ratio/low_mean": 0.0005587174482570845, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001333442960458342, "epoch": 13.578425655976677, "grad_norm": 0.7059165239334106, "learning_rate": 7.5e-07, "loss": -0.0607, "step": 1310 }, { "clip_ratio/high_max": 0.0018820370132743847, "clip_ratio/high_mean": 0.0007436110518028727, "clip_ratio/low_mean": 0.0005839293162352988, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001327540390775539, "epoch": 13.587755102040816, "grad_norm": 0.1896422952413559, "learning_rate": 7.5e-07, "loss": -0.0393, "step": 1311 }, { "clip_ratio/high_max": 0.00193466579366941, "clip_ratio/high_mean": 0.0006695911288261414, "clip_ratio/low_mean": 0.00048589584139335784, "clip_ratio/low_min": 1.8027112673735246e-05, "clip_ratio/region_mean": 0.0011554869706742465, "epoch": 13.597084548104956, "grad_norm": 0.16975539922714233, "learning_rate": 7.5e-07, "loss": -0.0467, "step": 1312 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0940987723214286, "completions/max_length": 4096.0, "completions/max_terminated_length": 4013.0, "completions/mean_length": 895.4205932617188, "completions/mean_terminated_length": 562.9664306640625, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 13.606413994169095, "grad_norm": 0.19092467427253723, "learning_rate": 7.5e-07, "loss": -0.0487, "num_tokens": 759956666.0, "reward": 0.6681780219078064, "reward_std": 0.14189332723617554, "rewards/simpleverify_reward/mean": 0.6681780219078064, "rewards/simpleverify_reward/std": 0.47088387608528137, "step": 1313 }, { "clip_ratio/high_max": 0.001985237278859131, "clip_ratio/high_mean": 0.0006908867744641611, "clip_ratio/low_mean": 0.0003737943688975065, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010646811388141941, "epoch": 13.615743440233237, "grad_norm": 0.16693232953548431, "learning_rate": 7.5e-07, "loss": -0.032, "step": 1314 }, { "clip_ratio/high_max": 0.0016722126201784704, "clip_ratio/high_mean": 0.000611864343227353, "clip_ratio/low_mean": 0.00042711852802312933, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001038982896716334, "epoch": 13.625072886297376, "grad_norm": 0.15888428688049316, "learning_rate": 7.5e-07, "loss": -0.012, "step": 1315 }, { "clip_ratio/high_max": 0.0017490009449829813, "clip_ratio/high_mean": 0.0006719621869706316, "clip_ratio/low_mean": 0.00038425016100518405, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001056212371622678, "epoch": 13.634402332361516, "grad_norm": 0.17130854725837708, "learning_rate": 7.5e-07, "loss": -0.1015, "step": 1316 }, { "clip_ratio/high_max": 0.001804918167181313, "clip_ratio/high_mean": 0.0006868801028758753, "clip_ratio/low_mean": 0.00043504054428922245, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011219206244277302, "epoch": 13.643731778425655, "grad_norm": 0.18567077815532684, "learning_rate": 7.5e-07, "loss": -0.0656, "step": 1317 }, { "clip_ratio/high_max": 0.0019860331740346737, "clip_ratio/high_mean": 0.0006830059392086696, "clip_ratio/low_mean": 0.0004450094038475072, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011280153339612298, "epoch": 13.653061224489797, "grad_norm": 0.22732974588871002, "learning_rate": 7.5e-07, "loss": -0.0486, "step": 1318 }, { "clip_ratio/high_max": 0.0017239078988495748, "clip_ratio/high_mean": 0.0006663089152425528, "clip_ratio/low_mean": 0.0005043447026764625, "clip_ratio/low_min": 2.5976725737564266e-05, "clip_ratio/region_mean": 0.001170653627923457, "epoch": 13.662390670553936, "grad_norm": 0.18800389766693115, "learning_rate": 7.5e-07, "loss": 0.0036, "step": 1319 }, { "clip_ratio/high_max": 0.001856379927630769, "clip_ratio/high_mean": 0.0006961313756619347, "clip_ratio/low_mean": 0.0004187441536487313, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011148755256726872, "epoch": 13.671720116618076, "grad_norm": 0.16457052528858185, "learning_rate": 7.5e-07, "loss": -0.0814, "step": 1320 }, { "clip_ratio/high_max": 0.0024243809821200557, "clip_ratio/high_mean": 0.0008695184478710871, "clip_ratio/low_mean": 0.0005033861064021039, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013729045531363226, "epoch": 13.681049562682215, "grad_norm": 0.21151591837406158, "learning_rate": 7.5e-07, "loss": -0.0451, "step": 1321 }, { "clip_ratio/high_max": 0.0018705024049268104, "clip_ratio/high_mean": 0.0006648998551099794, "clip_ratio/low_mean": 0.0006220925247362175, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001286992359382566, "epoch": 13.690379008746355, "grad_norm": 0.18356619775295258, "learning_rate": 7.5e-07, "loss": -0.0044, "step": 1322 }, { "clip_ratio/high_max": 0.0019470106417429633, "clip_ratio/high_mean": 0.0007311261178983841, "clip_ratio/low_mean": 0.0004510670837589714, "clip_ratio/low_min": 2.04381940420717e-05, "clip_ratio/region_mean": 0.001182193198474124, "epoch": 13.699708454810496, "grad_norm": 0.16942663490772247, "learning_rate": 7.5e-07, "loss": -0.0689, "step": 1323 }, { "clip_ratio/high_max": 0.00184669270311133, "clip_ratio/high_mean": 0.0007240466793518863, "clip_ratio/low_mean": 0.0006062217171347584, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013302684310474433, "epoch": 13.709037900874636, "grad_norm": 0.18498356640338898, "learning_rate": 7.5e-07, "loss": -0.0426, "step": 1324 }, { "clip_ratio/high_max": 0.0020595478199538775, "clip_ratio/high_mean": 0.0009012655245896894, "clip_ratio/low_mean": 0.0005980897572044341, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014993552831583656, "epoch": 13.718367346938775, "grad_norm": 0.16906242072582245, "learning_rate": 7.5e-07, "loss": -0.0833, "step": 1325 }, { "clip_ratio/high_max": 0.0018022143704001792, "clip_ratio/high_mean": 0.000765923919971101, "clip_ratio/low_mean": 0.00047545939833071316, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012413833028404042, "epoch": 13.727696793002915, "grad_norm": 0.16087569296360016, "learning_rate": 7.5e-07, "loss": -0.0418, "step": 1326 }, { "clip_ratio/high_max": 0.0017142096112365834, "clip_ratio/high_mean": 0.0006829991307313321, "clip_ratio/low_mean": 0.0005463084394250473, "clip_ratio/low_min": 5.524343032448087e-05, "clip_ratio/region_mean": 0.0012293075742491055, "epoch": 13.737026239067056, "grad_norm": 0.1830008327960968, "learning_rate": 7.5e-07, "loss": -0.0358, "step": 1327 }, { "clip_ratio/high_max": 0.002241310550743947, "clip_ratio/high_mean": 0.0008329143129230943, "clip_ratio/low_mean": 0.0006138598200777778, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014467741348198615, "epoch": 13.746355685131196, "grad_norm": 0.20498092472553253, "learning_rate": 7.5e-07, "loss": -0.0249, "step": 1328 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.1017020089285714, "completions/max_length": 4096.0, "completions/max_terminated_length": 3951.0, "completions/mean_length": 923.9270629882812, "completions/mean_terminated_length": 564.7965087890625, "completions/min_length": 109.0, "completions/min_terminated_length": 109.0, "epoch": 13.755685131195335, "grad_norm": 0.19439832866191864, "learning_rate": 7.5e-07, "loss": -0.0241, "num_tokens": 768669644.0, "reward": 0.6575056314468384, "reward_std": 0.1338600218296051, "rewards/simpleverify_reward/mean": 0.6575055718421936, "rewards/simpleverify_reward/std": 0.47456052899360657, "step": 1329 }, { "clip_ratio/high_max": 0.0016038700377976056, "clip_ratio/high_mean": 0.0005274439790809993, "clip_ratio/low_mean": 0.00034855324929594644, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008759972079133149, "epoch": 13.765014577259475, "grad_norm": 0.17435666918754578, "learning_rate": 7.5e-07, "loss": -0.0269, "step": 1330 }, { "clip_ratio/high_max": 0.0015903971252555493, "clip_ratio/high_mean": 0.0006078363112465013, "clip_ratio/low_mean": 0.0003521859989632503, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009600223056622781, "epoch": 13.774344023323614, "grad_norm": 0.15023991465568542, "learning_rate": 7.5e-07, "loss": -0.0236, "step": 1331 }, { "clip_ratio/high_max": 0.002060203994915355, "clip_ratio/high_mean": 0.0007586102547065821, "clip_ratio/low_mean": 0.0002557701884597918, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001014380440210516, "epoch": 13.783673469387756, "grad_norm": 0.29132339358329773, "learning_rate": 7.5e-07, "loss": -0.0939, "step": 1332 }, { "clip_ratio/high_max": 0.0017381564903189428, "clip_ratio/high_mean": 0.0006271419692893687, "clip_ratio/low_mean": 0.00044639113775701844, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010735331125033554, "epoch": 13.793002915451895, "grad_norm": 0.262881338596344, "learning_rate": 7.5e-07, "loss": -0.0025, "step": 1333 }, { "clip_ratio/high_max": 0.0018592094602354337, "clip_ratio/high_mean": 0.0006510388921014965, "clip_ratio/low_mean": 0.0004852199217566522, "clip_ratio/low_min": 1.4387661394721363e-05, "clip_ratio/region_mean": 0.001136258815677138, "epoch": 13.802332361516035, "grad_norm": 0.16109876334667206, "learning_rate": 7.5e-07, "loss": -0.0118, "step": 1334 }, { "clip_ratio/high_max": 0.0016951346951827873, "clip_ratio/high_mean": 0.0006366981815517647, "clip_ratio/low_mean": 0.0003669081506814109, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010036063395091332, "epoch": 13.811661807580174, "grad_norm": 0.20438000559806824, "learning_rate": 7.5e-07, "loss": -0.0405, "step": 1335 }, { "clip_ratio/high_max": 0.002326330661162501, "clip_ratio/high_mean": 0.0007885355025791796, "clip_ratio/low_mean": 0.0005470876621984644, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013356231502257288, "epoch": 13.820991253644316, "grad_norm": 0.17652124166488647, "learning_rate": 7.5e-07, "loss": -0.062, "step": 1336 }, { "clip_ratio/high_max": 0.0015884122676652623, "clip_ratio/high_mean": 0.000631396521384886, "clip_ratio/low_mean": 0.0005199812731007114, "clip_ratio/low_min": 2.6354405235906597e-05, "clip_ratio/region_mean": 0.0011513778044900391, "epoch": 13.830320699708455, "grad_norm": 0.18030129373073578, "learning_rate": 7.5e-07, "loss": -0.0324, "step": 1337 }, { "clip_ratio/high_max": 0.0018571557448012754, "clip_ratio/high_mean": 0.000703402173712675, "clip_ratio/low_mean": 0.0005286058335514099, "clip_ratio/low_min": 1.4201317753759213e-05, "clip_ratio/region_mean": 0.0012320080058998428, "epoch": 13.839650145772595, "grad_norm": 0.20880082249641418, "learning_rate": 7.5e-07, "loss": -0.0458, "step": 1338 }, { "clip_ratio/high_max": 0.0019020389736397192, "clip_ratio/high_mean": 0.0007059027211653301, "clip_ratio/low_mean": 0.0004967543036400457, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012026570075249765, "epoch": 13.848979591836734, "grad_norm": 0.23823128640651703, "learning_rate": 7.5e-07, "loss": -0.0407, "step": 1339 }, { "clip_ratio/high_max": 0.0018331719111301936, "clip_ratio/high_mean": 0.0006720073888573097, "clip_ratio/low_mean": 0.0004346092141531699, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011066166080127005, "epoch": 13.858309037900874, "grad_norm": 0.2505812346935272, "learning_rate": 7.5e-07, "loss": -0.0668, "step": 1340 }, { "clip_ratio/high_max": 0.0019968442575191148, "clip_ratio/high_mean": 0.0008024024809856201, "clip_ratio/low_mean": 0.000540246381660836, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013426488621917088, "epoch": 13.867638483965015, "grad_norm": 0.17435340583324432, "learning_rate": 7.5e-07, "loss": -0.0514, "step": 1341 }, { "clip_ratio/high_max": 0.0019261319102952257, "clip_ratio/high_mean": 0.0007593127265863586, "clip_ratio/low_mean": 0.0006086110779506271, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001367923785437597, "epoch": 13.876967930029155, "grad_norm": 0.21527433395385742, "learning_rate": 7.5e-07, "loss": -0.0176, "step": 1342 }, { "clip_ratio/high_max": 0.0020721740074804984, "clip_ratio/high_mean": 0.0008122234485199442, "clip_ratio/low_mean": 0.000555742302822182, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001367965705867391, "epoch": 13.886297376093294, "grad_norm": 0.16032186150550842, "learning_rate": 7.5e-07, "loss": -0.0604, "step": 1343 }, { "clip_ratio/high_max": 0.002342895830224734, "clip_ratio/high_mean": 0.0008598401818744605, "clip_ratio/low_mean": 0.0005848632895322226, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014447034627664834, "epoch": 13.895626822157434, "grad_norm": 0.14145778119564056, "learning_rate": 7.5e-07, "loss": -0.079, "step": 1344 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.09228515625, "completions/max_length": 4096.0, "completions/max_terminated_length": 3993.0, "completions/mean_length": 884.1862182617188, "completions/mean_terminated_length": 557.64892578125, "completions/min_length": 115.0, "completions/min_terminated_length": 115.0, "epoch": 14.00932944606414, "grad_norm": 0.18987612426280975, "learning_rate": 7.5e-07, "loss": -0.0274, "num_tokens": 777348665.0, "reward": 0.6829659938812256, "reward_std": 0.12668225169181824, "rewards/simpleverify_reward/mean": 0.6829659342765808, "rewards/simpleverify_reward/std": 0.46533700823783875, "step": 1345 }, { "clip_ratio/high_max": 0.001674151990300743, "clip_ratio/high_mean": 0.0006102338584241807, "clip_ratio/low_mean": 0.00041493375601930893, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001025167617626721, "epoch": 14.018658892128279, "grad_norm": 0.3001651167869568, "learning_rate": 7.5e-07, "loss": -0.0287, "step": 1346 }, { "clip_ratio/high_max": 0.0014051965627004392, "clip_ratio/high_mean": 0.0005408201459431439, "clip_ratio/low_mean": 0.0004183050950814504, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009591252201062161, "epoch": 14.02798833819242, "grad_norm": 0.14715218544006348, "learning_rate": 7.5e-07, "loss": -0.0481, "step": 1347 }, { "clip_ratio/high_max": 0.0017350773450743873, "clip_ratio/high_mean": 0.0007706096412221086, "clip_ratio/low_mean": 0.00029832370057647495, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010689333430491388, "epoch": 14.03731778425656, "grad_norm": 0.1503770500421524, "learning_rate": 7.5e-07, "loss": -0.0906, "step": 1348 }, { "clip_ratio/high_max": 0.001634985728742322, "clip_ratio/high_mean": 0.0005009576025258866, "clip_ratio/low_mean": 0.00034281502121302765, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008437726137344725, "epoch": 14.0466472303207, "grad_norm": 0.14491122961044312, "learning_rate": 7.5e-07, "loss": -0.0376, "step": 1349 }, { "clip_ratio/high_max": 0.0017629329395276727, "clip_ratio/high_mean": 0.0006312842219813319, "clip_ratio/low_mean": 0.0004013655889139045, "clip_ratio/low_min": 1.3947779734735377e-05, "clip_ratio/region_mean": 0.0010326498122594785, "epoch": 14.055976676384839, "grad_norm": 0.15769901871681213, "learning_rate": 7.5e-07, "loss": -0.0482, "step": 1350 }, { "clip_ratio/high_max": 0.0016776613992988132, "clip_ratio/high_mean": 0.0005972167346044444, "clip_ratio/low_mean": 0.0004377304699119122, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010349472067900933, "epoch": 14.06530612244898, "grad_norm": 0.17248989641666412, "learning_rate": 7.5e-07, "loss": -0.0116, "step": 1351 }, { "clip_ratio/high_max": 0.0015479312642128207, "clip_ratio/high_mean": 0.0005756824430136476, "clip_ratio/low_mean": 0.00040472090881849, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009804033525142586, "epoch": 14.07463556851312, "grad_norm": 0.21256296336650848, "learning_rate": 7.5e-07, "loss": -0.0295, "step": 1352 }, { "clip_ratio/high_max": 0.0018387409727438353, "clip_ratio/high_mean": 0.0006943028856767341, "clip_ratio/low_mean": 0.0004470827616387396, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011413856627768837, "epoch": 14.08396501457726, "grad_norm": 0.15833280980587006, "learning_rate": 7.5e-07, "loss": -0.0661, "step": 1353 }, { "clip_ratio/high_max": 0.0017497559674666263, "clip_ratio/high_mean": 0.0007299461140064523, "clip_ratio/low_mean": 0.00037767518688269774, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011076212940679397, "epoch": 14.093294460641399, "grad_norm": 0.1933690309524536, "learning_rate": 7.5e-07, "loss": -0.0395, "step": 1354 }, { "clip_ratio/high_max": 0.002178677517804317, "clip_ratio/high_mean": 0.0007679925729462411, "clip_ratio/low_mean": 0.00043364561770431465, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001201638220663881, "epoch": 14.102623906705539, "grad_norm": 0.17775413393974304, "learning_rate": 7.5e-07, "loss": -0.0825, "step": 1355 }, { "clip_ratio/high_max": 0.0016407185194111662, "clip_ratio/high_mean": 0.0006130551241767535, "clip_ratio/low_mean": 0.0005222136323936866, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011352687615726609, "epoch": 14.11195335276968, "grad_norm": 0.16980306804180145, "learning_rate": 7.5e-07, "loss": -0.0334, "step": 1356 }, { "clip_ratio/high_max": 0.001930172435095301, "clip_ratio/high_mean": 0.0007053784765957971, "clip_ratio/low_mean": 0.0004905713512926013, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001195949789689621, "epoch": 14.12128279883382, "grad_norm": 0.5095956325531006, "learning_rate": 7.5e-07, "loss": -0.0516, "step": 1357 }, { "clip_ratio/high_max": 0.0020639786635001656, "clip_ratio/high_mean": 0.0007810289134795312, "clip_ratio/low_mean": 0.000613227739904687, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013942566765763331, "epoch": 14.130612244897959, "grad_norm": 0.2106979787349701, "learning_rate": 7.5e-07, "loss": -0.0561, "step": 1358 }, { "clip_ratio/high_max": 0.0015255042853823397, "clip_ratio/high_mean": 0.0005550761873109877, "clip_ratio/low_mean": 0.0004500384075072361, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010051145945908502, "epoch": 14.139941690962099, "grad_norm": 0.1634514182806015, "learning_rate": 7.5e-07, "loss": -0.0268, "step": 1359 }, { "clip_ratio/high_max": 0.0017661486199358478, "clip_ratio/high_mean": 0.0006966276350794942, "clip_ratio/low_mean": 0.0005624995174002834, "clip_ratio/low_min": 2.3629489078302868e-05, "clip_ratio/region_mean": 0.001259127147932304, "epoch": 14.14927113702624, "grad_norm": 0.17249785363674164, "learning_rate": 7.5e-07, "loss": -0.0197, "step": 1360 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0947265625, "completions/max_length": 4096.0, "completions/max_terminated_length": 4086.0, "completions/mean_length": 894.1868896484375, "completions/mean_terminated_length": 559.153564453125, "completions/min_length": 109.0, "completions/min_terminated_length": 109.0, "epoch": 14.15860058309038, "grad_norm": 0.2146492451429367, "learning_rate": 7.5e-07, "loss": -0.0523, "num_tokens": 786019560.0, "reward": 0.676827609539032, "reward_std": 0.14048515260219574, "rewards/simpleverify_reward/mean": 0.6768275499343872, "rewards/simpleverify_reward/std": 0.46770426630973816, "step": 1361 }, { "clip_ratio/high_max": 0.0020541810044960584, "clip_ratio/high_mean": 0.0007884555034252116, "clip_ratio/low_mean": 0.0003298527904007642, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011183083151991013, "epoch": 14.167930029154519, "grad_norm": 0.15047024190425873, "learning_rate": 7.5e-07, "loss": -0.0708, "step": 1362 }, { "clip_ratio/high_max": 0.0021633166397805326, "clip_ratio/high_mean": 0.0007197243394330144, "clip_ratio/low_mean": 0.00042424906860105693, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011439734116720501, "epoch": 14.177259475218658, "grad_norm": 0.1777600347995758, "learning_rate": 7.5e-07, "loss": -0.031, "step": 1363 }, { "clip_ratio/high_max": 0.0017545492119097617, "clip_ratio/high_mean": 0.0006145703482616227, "clip_ratio/low_mean": 0.00046181688594515435, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010763872232928406, "epoch": 14.186588921282798, "grad_norm": 0.16774259507656097, "learning_rate": 7.5e-07, "loss": -0.0331, "step": 1364 }, { "clip_ratio/high_max": 0.0017060978243534919, "clip_ratio/high_mean": 0.0007153921487770276, "clip_ratio/low_mean": 0.00040204540778177034, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011174375413247617, "epoch": 14.19591836734694, "grad_norm": 0.17582902312278748, "learning_rate": 7.5e-07, "loss": -0.078, "step": 1365 }, { "clip_ratio/high_max": 0.0017512369922769722, "clip_ratio/high_mean": 0.0006767680570192169, "clip_ratio/low_mean": 0.000522718423326296, "clip_ratio/low_min": 1.441420681658201e-05, "clip_ratio/region_mean": 0.001199486490804702, "epoch": 14.205247813411079, "grad_norm": 0.17589323222637177, "learning_rate": 7.5e-07, "loss": -0.0127, "step": 1366 }, { "clip_ratio/high_max": 0.0021718795906053856, "clip_ratio/high_mean": 0.000827363855933072, "clip_ratio/low_mean": 0.0003924149646081787, "clip_ratio/low_min": 1.7085838408092968e-05, "clip_ratio/region_mean": 0.001219778809172567, "epoch": 14.214577259475218, "grad_norm": 0.23261195421218872, "learning_rate": 7.5e-07, "loss": -0.0716, "step": 1367 }, { "clip_ratio/high_max": 0.002146407190593891, "clip_ratio/high_mean": 0.0008496411956002703, "clip_ratio/low_mean": 0.00047765589033588185, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013272970736579737, "epoch": 14.223906705539358, "grad_norm": 0.20678889751434326, "learning_rate": 7.5e-07, "loss": -0.0787, "step": 1368 }, { "clip_ratio/high_max": 0.0016879725480976049, "clip_ratio/high_mean": 0.0006015101862431038, "clip_ratio/low_mean": 0.00040606500351714203, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010075751852127723, "epoch": 14.2332361516035, "grad_norm": 0.21287475526332855, "learning_rate": 7.5e-07, "loss": -0.0292, "step": 1369 }, { "clip_ratio/high_max": 0.0018595407491375227, "clip_ratio/high_mean": 0.0007437764870701358, "clip_ratio/low_mean": 0.0004925428579554136, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012363193527562544, "epoch": 14.242565597667639, "grad_norm": 0.1648014783859253, "learning_rate": 7.5e-07, "loss": -0.036, "step": 1370 }, { "clip_ratio/high_max": 0.0021021059328631964, "clip_ratio/high_mean": 0.0007307562536880141, "clip_ratio/low_mean": 0.0004795235381607199, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012102798064006492, "epoch": 14.251895043731778, "grad_norm": 0.32571157813072205, "learning_rate": 7.5e-07, "loss": 0.008, "step": 1371 }, { "clip_ratio/high_max": 0.0018790360263665207, "clip_ratio/high_mean": 0.0007549807796749519, "clip_ratio/low_mean": 0.0005026768430980155, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012576576264109462, "epoch": 14.261224489795918, "grad_norm": 0.18285787105560303, "learning_rate": 7.5e-07, "loss": -0.0538, "step": 1372 }, { "clip_ratio/high_max": 0.0020379203378979582, "clip_ratio/high_mean": 0.0007503591764361772, "clip_ratio/low_mean": 0.0005024864344704838, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012528456099971663, "epoch": 14.270553935860057, "grad_norm": 0.18977348506450653, "learning_rate": 7.5e-07, "loss": -0.0431, "step": 1373 }, { "clip_ratio/high_max": 0.0020523684106592555, "clip_ratio/high_mean": 0.0007805604800523724, "clip_ratio/low_mean": 0.0006135224903118797, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001394082970364252, "epoch": 14.279883381924199, "grad_norm": 0.3591042757034302, "learning_rate": 7.5e-07, "loss": -0.0205, "step": 1374 }, { "clip_ratio/high_max": 0.002346510060306173, "clip_ratio/high_mean": 0.0008886850155249704, "clip_ratio/low_mean": 0.0005843492390340543, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001473034262744477, "epoch": 14.289212827988338, "grad_norm": 0.21593287587165833, "learning_rate": 7.5e-07, "loss": -0.0289, "step": 1375 }, { "clip_ratio/high_max": 0.00217104186231154, "clip_ratio/high_mean": 0.0008381076750083594, "clip_ratio/low_mean": 0.0005982136071907007, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001436321261280682, "epoch": 14.298542274052478, "grad_norm": 930.5231323242188, "learning_rate": 7.5e-07, "loss": -0.0519, "step": 1376 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0897739955357143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4050.0, "completions/mean_length": 880.0684814453125, "completions/mean_terminated_length": 562.8867797851562, "completions/min_length": 111.0, "completions/min_terminated_length": 111.0, "epoch": 14.307871720116617, "grad_norm": 0.2217532843351364, "learning_rate": 7.5e-07, "loss": -0.0236, "num_tokens": 794766205.0, "reward": 0.672921359539032, "reward_std": 0.14592835307121277, "rewards/simpleverify_reward/mean": 0.6729212999343872, "rewards/simpleverify_reward/std": 0.46916261315345764, "step": 1377 }, { "clip_ratio/high_max": 0.0023116807205951773, "clip_ratio/high_mean": 0.0007645242530998075, "clip_ratio/low_mean": 0.0003409716218811809, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011054958558815997, "epoch": 14.317201166180759, "grad_norm": 0.241689071059227, "learning_rate": 7.5e-07, "loss": -0.0273, "step": 1378 }, { "clip_ratio/high_max": 0.0017959979886654764, "clip_ratio/high_mean": 0.0006626152376156824, "clip_ratio/low_mean": 0.0005116683169035241, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011742835413315333, "epoch": 14.326530612244898, "grad_norm": 0.2947315573692322, "learning_rate": 7.5e-07, "loss": -0.0214, "step": 1379 }, { "clip_ratio/high_max": 0.0019112167501589283, "clip_ratio/high_mean": 0.0007662258267373545, "clip_ratio/low_mean": 0.00037506689250221825, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011412927124183625, "epoch": 14.335860058309038, "grad_norm": 0.18052607774734497, "learning_rate": 7.5e-07, "loss": -0.0551, "step": 1380 }, { "clip_ratio/high_max": 0.002140377833711682, "clip_ratio/high_mean": 0.0008422335122304503, "clip_ratio/low_mean": 0.0004450567530511762, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012872902625531424, "epoch": 14.345189504373177, "grad_norm": 0.23700152337551117, "learning_rate": 7.5e-07, "loss": -0.0492, "step": 1381 }, { "clip_ratio/high_max": 0.001723980538372416, "clip_ratio/high_mean": 0.0007537090241385158, "clip_ratio/low_mean": 0.0004601231803462724, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001213832205394283, "epoch": 14.354518950437317, "grad_norm": 0.18219684064388275, "learning_rate": 7.5e-07, "loss": -0.0459, "step": 1382 }, { "clip_ratio/high_max": 0.002039447172137443, "clip_ratio/high_mean": 0.0008409306610701606, "clip_ratio/low_mean": 0.0005413690232671797, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013822996952512767, "epoch": 14.363848396501458, "grad_norm": 0.18124443292617798, "learning_rate": 7.5e-07, "loss": -0.0405, "step": 1383 }, { "clip_ratio/high_max": 0.001983257785468595, "clip_ratio/high_mean": 0.000855755123666313, "clip_ratio/low_mean": 0.0005906427172703843, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014463978513958864, "epoch": 14.373177842565598, "grad_norm": 0.22733160853385925, "learning_rate": 7.5e-07, "loss": -0.0325, "step": 1384 }, { "clip_ratio/high_max": 0.0020919531598337926, "clip_ratio/high_mean": 0.0007917576058389386, "clip_ratio/low_mean": 0.0005692898673714808, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013610474561573938, "epoch": 14.382507288629737, "grad_norm": 0.17730137705802917, "learning_rate": 7.5e-07, "loss": -0.0316, "step": 1385 }, { "clip_ratio/high_max": 0.002017391372646671, "clip_ratio/high_mean": 0.0008160430497810012, "clip_ratio/low_mean": 0.0006381847261991425, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014542277640430257, "epoch": 14.391836734693877, "grad_norm": 0.1944231241941452, "learning_rate": 7.5e-07, "loss": -0.0154, "step": 1386 }, { "clip_ratio/high_max": 0.00215818145807134, "clip_ratio/high_mean": 0.0008325747403432615, "clip_ratio/low_mean": 0.0006140407676866744, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014466155116679147, "epoch": 14.401166180758018, "grad_norm": 0.20359480381011963, "learning_rate": 7.5e-07, "loss": -0.0477, "step": 1387 }, { "clip_ratio/high_max": 0.0019061038874497171, "clip_ratio/high_mean": 0.0008481498207402183, "clip_ratio/low_mean": 0.0005575366831180872, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0014056865184102207, "epoch": 14.410495626822158, "grad_norm": 0.16301140189170837, "learning_rate": 7.5e-07, "loss": -0.0564, "step": 1388 }, { "clip_ratio/high_max": 0.0023588227049913257, "clip_ratio/high_mean": 0.0008376247897103895, "clip_ratio/low_mean": 0.0004720285801340651, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013096533693897072, "epoch": 14.419825072886297, "grad_norm": 0.17528951168060303, "learning_rate": 7.5e-07, "loss": -0.0523, "step": 1389 }, { "clip_ratio/high_max": 0.0022195680467120837, "clip_ratio/high_mean": 0.0008889923028618796, "clip_ratio/low_mean": 0.0006010633683217748, "clip_ratio/low_min": 4.777069989359006e-05, "clip_ratio/region_mean": 0.0014900556780048646, "epoch": 14.429154518950437, "grad_norm": 0.1730450987815857, "learning_rate": 7.5e-07, "loss": -0.0436, "step": 1390 }, { "clip_ratio/high_max": 0.0022863202248117886, "clip_ratio/high_mean": 0.0009273060459236149, "clip_ratio/low_mean": 0.0006735379515703244, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0016008440034056548, "epoch": 14.438483965014576, "grad_norm": 0.20614305138587952, "learning_rate": 7.5e-07, "loss": -0.0402, "step": 1391 }, { "clip_ratio/high_max": 0.0019313356024213135, "clip_ratio/high_mean": 0.0007580807396152522, "clip_ratio/low_mean": 0.00048135865790754906, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001239439381606644, "epoch": 14.447813411078718, "grad_norm": 0.20672832429409027, "learning_rate": 7.5e-07, "loss": -0.057, "step": 1392 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0966099330357143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4046.0, "completions/mean_length": 897.9487915039062, "completions/mean_terminated_length": 555.9441528320312, "completions/min_length": 89.0, "completions/min_terminated_length": 89.0, "epoch": 14.457142857142857, "grad_norm": 0.19983899593353271, "learning_rate": 7.5e-07, "loss": -0.0625, "num_tokens": 803420966.0, "reward": 0.667550265789032, "reward_std": 0.1336434930562973, "rewards/simpleverify_reward/mean": 0.6675502061843872, "rewards/simpleverify_reward/std": 0.4711076319217682, "step": 1393 }, { "clip_ratio/high_max": 0.0017652402493695263, "clip_ratio/high_mean": 0.0006344765388348605, "clip_ratio/low_mean": 0.00037364529680417036, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010081218224513577, "epoch": 14.466472303206997, "grad_norm": 0.1549329161643982, "learning_rate": 7.5e-07, "loss": -0.0593, "step": 1394 }, { "clip_ratio/high_max": 0.0016723047665436752, "clip_ratio/high_mean": 0.0006053241613699356, "clip_ratio/low_mean": 0.0004523282368609216, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001057652410963783, "epoch": 14.475801749271136, "grad_norm": 0.19354736804962158, "learning_rate": 7.5e-07, "loss": -0.0328, "step": 1395 }, { "clip_ratio/high_max": 0.0019485070661175996, "clip_ratio/high_mean": 0.0006722996386088198, "clip_ratio/low_mean": 0.00047079842624953017, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011430980848672334, "epoch": 14.485131195335278, "grad_norm": 0.2427106499671936, "learning_rate": 7.5e-07, "loss": -0.0379, "step": 1396 }, { "clip_ratio/high_max": 0.0019849793789035175, "clip_ratio/high_mean": 0.0007302690610231366, "clip_ratio/low_mean": 0.0003863224324049952, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011165914511366282, "epoch": 14.494460641399417, "grad_norm": 0.20076104998588562, "learning_rate": 7.5e-07, "loss": -0.0486, "step": 1397 }, { "clip_ratio/high_max": 0.0019431257678661495, "clip_ratio/high_mean": 0.0007127591470634798, "clip_ratio/low_mean": 0.00031250260281012743, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001025261779432185, "epoch": 14.503790087463557, "grad_norm": 0.16329839825630188, "learning_rate": 7.5e-07, "loss": -0.0707, "step": 1398 }, { "clip_ratio/high_max": 0.0019132046363665722, "clip_ratio/high_mean": 0.0006918846202097484, "clip_ratio/low_mean": 0.0004486629482016724, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001140547581599094, "epoch": 14.513119533527696, "grad_norm": 0.16989648342132568, "learning_rate": 7.5e-07, "loss": -0.0423, "step": 1399 }, { "clip_ratio/high_max": 0.0019230084217269905, "clip_ratio/high_mean": 0.0006900575635881978, "clip_ratio/low_mean": 0.0005021291553930496, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011921867517230567, "epoch": 14.522448979591836, "grad_norm": 0.19262132048606873, "learning_rate": 7.5e-07, "loss": -0.0526, "step": 1400 }, { "clip_ratio/high_max": 0.0019120007782476023, "clip_ratio/high_mean": 0.0007662091729798703, "clip_ratio/low_mean": 0.00037515448912017746, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011413636457291432, "epoch": 14.531778425655977, "grad_norm": 0.16782663762569427, "learning_rate": 7.5e-07, "loss": -0.0804, "step": 1401 }, { "clip_ratio/high_max": 0.0021101335769344587, "clip_ratio/high_mean": 0.0006712515860272106, "clip_ratio/low_mean": 0.00046338566517079016, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011346372466505272, "epoch": 14.541107871720117, "grad_norm": 0.18336991965770721, "learning_rate": 7.5e-07, "loss": -0.0089, "step": 1402 }, { "clip_ratio/high_max": 0.0021167775921639986, "clip_ratio/high_mean": 0.0007896566439740127, "clip_ratio/low_mean": 0.0005817709507027757, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013714275955862831, "epoch": 14.550437317784256, "grad_norm": 0.20659904181957245, "learning_rate": 7.5e-07, "loss": -0.0376, "step": 1403 }, { "clip_ratio/high_max": 0.0017375813658873085, "clip_ratio/high_mean": 0.0006131485979494755, "clip_ratio/low_mean": 0.0006401533282769378, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012533019398688339, "epoch": 14.559766763848396, "grad_norm": 0.1927819401025772, "learning_rate": 7.5e-07, "loss": 0.0094, "step": 1404 }, { "clip_ratio/high_max": 0.0018001082680711988, "clip_ratio/high_mean": 0.0007043057103146566, "clip_ratio/low_mean": 0.0005708116577807232, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001275117348995991, "epoch": 14.569096209912537, "grad_norm": 0.2872471511363983, "learning_rate": 7.5e-07, "loss": 0.0119, "step": 1405 }, { "clip_ratio/high_max": 0.0026269914051226806, "clip_ratio/high_mean": 0.0008209603001887444, "clip_ratio/low_mean": 0.000497422296575678, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013183825722080655, "epoch": 14.578425655976677, "grad_norm": 0.1716318428516388, "learning_rate": 7.5e-07, "loss": -0.0186, "step": 1406 }, { "clip_ratio/high_max": 0.0017515521212772, "clip_ratio/high_mean": 0.0006616510581807233, "clip_ratio/low_mean": 0.0005828116682096152, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012444627027434763, "epoch": 14.587755102040816, "grad_norm": 0.22973866760730743, "learning_rate": 7.5e-07, "loss": -0.0195, "step": 1407 }, { "clip_ratio/high_max": 0.002022774704528274, "clip_ratio/high_mean": 0.0007278678785951342, "clip_ratio/low_mean": 0.0006061108879293897, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001333978787442902, "epoch": 14.597084548104956, "grad_norm": 0.16076034307479858, "learning_rate": 7.5e-07, "loss": -0.0326, "step": 1408 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0848214285714286, "completions/max_length": 4096.0, "completions/max_terminated_length": 3993.0, "completions/mean_length": 851.8195190429688, "completions/mean_terminated_length": 551.1393432617188, "completions/min_length": 90.0, "completions/min_terminated_length": 90.0, "epoch": 14.606413994169095, "grad_norm": 0.1825433373451233, "learning_rate": 7.5e-07, "loss": -0.0245, "num_tokens": 812103738.0, "reward": 0.6803850531578064, "reward_std": 0.13040997087955475, "rewards/simpleverify_reward/mean": 0.6803850531578064, "rewards/simpleverify_reward/std": 0.46634364128112793, "step": 1409 }, { "clip_ratio/high_max": 0.0017416533119103406, "clip_ratio/high_mean": 0.0006750147840648424, "clip_ratio/low_mean": 0.0002553876915953879, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009304024806624511, "epoch": 14.615743440233237, "grad_norm": 0.15922386944293976, "learning_rate": 7.5e-07, "loss": -0.0921, "step": 1410 }, { "clip_ratio/high_max": 0.0019035987425013445, "clip_ratio/high_mean": 0.0007165405368141364, "clip_ratio/low_mean": 0.00038754430215703906, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011040848185075447, "epoch": 14.625072886297376, "grad_norm": 0.18329031765460968, "learning_rate": 7.5e-07, "loss": -0.0551, "step": 1411 }, { "clip_ratio/high_max": 0.0019370724767213687, "clip_ratio/high_mean": 0.0007136657932278467, "clip_ratio/low_mean": 0.00030667583814647514, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001020341627736343, "epoch": 14.634402332361516, "grad_norm": 0.16920258104801178, "learning_rate": 7.5e-07, "loss": -0.0802, "step": 1412 }, { "clip_ratio/high_max": 0.0017947036303667119, "clip_ratio/high_mean": 0.0006329040129458008, "clip_ratio/low_mean": 0.0004699736518887221, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011028776771127013, "epoch": 14.643731778425655, "grad_norm": 0.1772395521402359, "learning_rate": 7.5e-07, "loss": -0.0039, "step": 1413 }, { "clip_ratio/high_max": 0.002031061700108694, "clip_ratio/high_mean": 0.0007555354295618599, "clip_ratio/low_mean": 0.00039916599735079217, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011547014364623465, "epoch": 14.653061224489797, "grad_norm": 0.18682743608951569, "learning_rate": 7.5e-07, "loss": -0.066, "step": 1414 }, { "clip_ratio/high_max": 0.0017368240987707395, "clip_ratio/high_mean": 0.000685608582898567, "clip_ratio/low_mean": 0.0003811830565609853, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010667916376405628, "epoch": 14.662390670553936, "grad_norm": 0.1655777096748352, "learning_rate": 7.5e-07, "loss": -0.0668, "step": 1415 }, { "clip_ratio/high_max": 0.002125597967278736, "clip_ratio/high_mean": 0.0006813019276705745, "clip_ratio/low_mean": 0.000404039713430393, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010853416151803685, "epoch": 14.671720116618076, "grad_norm": 0.1767096221446991, "learning_rate": 7.5e-07, "loss": -0.0126, "step": 1416 }, { "clip_ratio/high_max": 0.0018266278239025269, "clip_ratio/high_mean": 0.0007696641741858912, "clip_ratio/low_mean": 0.0004190120557723276, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011886762331414502, "epoch": 14.681049562682215, "grad_norm": 0.18526339530944824, "learning_rate": 7.5e-07, "loss": -0.0629, "step": 1417 }, { "clip_ratio/high_max": 0.0018164450229960494, "clip_ratio/high_mean": 0.0006530997598019894, "clip_ratio/low_mean": 0.0004579373144224519, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011110370942333248, "epoch": 14.690379008746355, "grad_norm": 0.1884993463754654, "learning_rate": 7.5e-07, "loss": -0.0246, "step": 1418 }, { "clip_ratio/high_max": 0.0014678296538477298, "clip_ratio/high_mean": 0.000639747950117453, "clip_ratio/low_mean": 0.0005697401666111546, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012094880985387135, "epoch": 14.699708454810496, "grad_norm": 0.19038771092891693, "learning_rate": 7.5e-07, "loss": -0.0012, "step": 1419 }, { "clip_ratio/high_max": 0.0017042665931512602, "clip_ratio/high_mean": 0.0006079942459109589, "clip_ratio/low_mean": 0.0004537880807902184, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0010617823245411273, "epoch": 14.709037900874636, "grad_norm": 0.20267589390277863, "learning_rate": 7.5e-07, "loss": -0.032, "step": 1420 }, { "clip_ratio/high_max": 0.0018279255164088681, "clip_ratio/high_mean": 0.0006100360751588596, "clip_ratio/low_mean": 0.0005325226429704344, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011425587217672728, "epoch": 14.718367346938775, "grad_norm": 0.15844888985157013, "learning_rate": 7.5e-07, "loss": -0.0063, "step": 1421 }, { "clip_ratio/high_max": 0.002394818999164272, "clip_ratio/high_mean": 0.0008154130282491678, "clip_ratio/low_mean": 0.0004407304163578374, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012561434159579221, "epoch": 14.727696793002915, "grad_norm": 0.19928781688213348, "learning_rate": 7.5e-07, "loss": -0.0347, "step": 1422 }, { "clip_ratio/high_max": 0.0017479966954851989, "clip_ratio/high_mean": 0.0006573673617822351, "clip_ratio/low_mean": 0.0004885009257122874, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011458682783995755, "epoch": 14.737026239067056, "grad_norm": 0.20410726964473724, "learning_rate": 7.5e-07, "loss": -0.0391, "step": 1423 }, { "clip_ratio/high_max": 0.002028343686106382, "clip_ratio/high_mean": 0.0008094610575426486, "clip_ratio/low_mean": 0.0004502309775489266, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012596920169016812, "epoch": 14.746355685131196, "grad_norm": 0.17934533953666687, "learning_rate": 7.5e-07, "loss": -0.0648, "step": 1424 }, { "clip_ratio/high_max": 0.0, "clip_ratio/high_mean": 0.0, "clip_ratio/low_mean": 0.0, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0, "completions/clipped_ratio": 0.0946568080357143, "completions/max_length": 4096.0, "completions/max_terminated_length": 4048.0, "completions/mean_length": 887.8990478515625, "completions/mean_terminated_length": 552.4807739257812, "completions/min_length": 109.0, "completions/min_terminated_length": 109.0, "epoch": 14.755685131195335, "grad_norm": 0.21925543248653412, "learning_rate": 7.5e-07, "loss": -0.0278, "num_tokens": 820737514.0, "reward": 0.6661551594734192, "reward_std": 0.13342009484767914, "rewards/simpleverify_reward/mean": 0.6661551594734192, "rewards/simpleverify_reward/std": 0.4716015160083771, "step": 1425 }, { "clip_ratio/high_max": 0.001327705514995614, "clip_ratio/high_mean": 0.0004890314694421249, "clip_ratio/low_mean": 0.000320846300382982, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0008098777470877394, "epoch": 14.765014577259475, "grad_norm": 0.17381136119365692, "learning_rate": 7.5e-07, "loss": -0.0028, "step": 1426 }, { "clip_ratio/high_max": 0.0016756187069404405, "clip_ratio/high_mean": 0.0005872925739822676, "clip_ratio/low_mean": 0.000391849095649377, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009791416887310334, "epoch": 14.774344023323614, "grad_norm": 0.22051213681697845, "learning_rate": 7.5e-07, "loss": -0.0323, "step": 1427 }, { "clip_ratio/high_max": 0.0015364818973466754, "clip_ratio/high_mean": 0.0005625032345051295, "clip_ratio/low_mean": 0.0004251370964993839, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009876403273665346, "epoch": 14.783673469387756, "grad_norm": 0.17549943923950195, "learning_rate": 7.5e-07, "loss": -0.0431, "step": 1428 }, { "clip_ratio/high_max": 0.0019474426117085386, "clip_ratio/high_mean": 0.0007063015400490258, "clip_ratio/low_mean": 0.0004650436785595957, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011713452404364944, "epoch": 14.793002915451895, "grad_norm": 0.20831650495529175, "learning_rate": 7.5e-07, "loss": -0.0179, "step": 1429 }, { "clip_ratio/high_max": 0.002082054656057153, "clip_ratio/high_mean": 0.0007468808007615735, "clip_ratio/low_mean": 0.00044152935060992604, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011884101459145313, "epoch": 14.802332361516035, "grad_norm": 0.1701059639453888, "learning_rate": 7.5e-07, "loss": -0.0722, "step": 1430 }, { "clip_ratio/high_max": 0.002035565961705288, "clip_ratio/high_mean": 0.0007903722907940391, "clip_ratio/low_mean": 0.00040949320259642263, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011998654808849096, "epoch": 14.811661807580174, "grad_norm": 0.17304779589176178, "learning_rate": 7.5e-07, "loss": -0.0696, "step": 1431 }, { "clip_ratio/high_max": 0.0019104457351204474, "clip_ratio/high_mean": 0.0006953089578018989, "clip_ratio/low_mean": 0.0004749302866002836, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011702392439474352, "epoch": 14.820991253644316, "grad_norm": 0.1708764135837555, "learning_rate": 7.5e-07, "loss": -0.044, "step": 1432 }, { "clip_ratio/high_max": 0.0014563554468622897, "clip_ratio/high_mean": 0.000520156955190032, "clip_ratio/low_mean": 0.0004699533837992931, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0009901103094307473, "epoch": 14.830320699708455, "grad_norm": 0.184386745095253, "learning_rate": 7.5e-07, "loss": 0.0246, "step": 1433 }, { "clip_ratio/high_max": 0.0017113665453507565, "clip_ratio/high_mean": 0.0007127610761017422, "clip_ratio/low_mean": 0.0004366292023405549, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0011493902748043183, "epoch": 14.839650145772595, "grad_norm": 0.23499636352062225, "learning_rate": 7.5e-07, "loss": -0.0452, "step": 1434 }, { "clip_ratio/high_max": 0.0017306373083556537, "clip_ratio/high_mean": 0.0007504851655539824, "clip_ratio/low_mean": 0.0004846562744660332, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012351414479780942, "epoch": 14.848979591836734, "grad_norm": 0.18666890263557434, "learning_rate": 7.5e-07, "loss": -0.0279, "step": 1435 }, { "clip_ratio/high_max": 0.0016593448381172493, "clip_ratio/high_mean": 0.0006145741554064443, "clip_ratio/low_mean": 0.0005099724317005894, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00112454657573835, "epoch": 14.858309037900874, "grad_norm": 0.19027793407440186, "learning_rate": 7.5e-07, "loss": -0.0444, "step": 1436 }, { "clip_ratio/high_max": 0.00207335071900161, "clip_ratio/high_mean": 0.000811629967756744, "clip_ratio/low_mean": 0.00039783432157491916, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.00120946426068258, "epoch": 14.867638483965015, "grad_norm": 28.190279006958008, "learning_rate": 7.5e-07, "loss": -0.0993, "step": 1437 }, { "clip_ratio/high_max": 0.001927626748511102, "clip_ratio/high_mean": 0.0006760867017874261, "clip_ratio/low_mean": 0.0007064537949190708, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0013825405258103274, "epoch": 14.876967930029155, "grad_norm": 0.178035706281662, "learning_rate": 7.5e-07, "loss": -0.0107, "step": 1438 }, { "clip_ratio/high_max": 0.0019074844167334959, "clip_ratio/high_mean": 0.0006659216533080325, "clip_ratio/low_mean": 0.0005447177090900368, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.0012106393478461541, "epoch": 14.886297376093294, "grad_norm": 0.19566580653190613, "learning_rate": 7.5e-07, "loss": -0.0394, "step": 1439 }, { "clip_ratio/high_max": 0.002160776381060714, "clip_ratio/high_mean": 0.0008536144760000752, "clip_ratio/low_mean": 0.0004986305425518367, "clip_ratio/low_min": 0.0, "clip_ratio/region_mean": 0.001352245031739585, "epoch": 14.895626822157434, "grad_norm": 0.19444924592971802, "learning_rate": 7.5e-07, "loss": -0.0744, "step": 1440 }, { "epoch": 14.895626822157434, "step": 1440, "total_flos": 0.0, "train_loss": -0.01819617560245206, "train_runtime": 89088.2174, "train_samples_per_second": 16.092, "train_steps_per_second": 0.018 } ], "logging_steps": 1, "max_steps": 1600, "num_input_tokens_seen": 820737514, "num_train_epochs": 15, "save_steps": 160, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }