{
  "best_global_step": null,
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 0.9982507288629737,
  "eval_steps": 500,
  "global_step": 428,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2524.0,
      "completions/mean_length": 585.0535888671875,
      "completions/mean_terminated_length": 553.4234619140625,
      "completions/min_length": 22.0,
      "completions/min_terminated_length": 22.0,
      "epoch": 0.0023323615160349854,
      "grad_norm": 0.27251678705215454,
      "kl": 0.0,
      "learning_rate": 1e-06,
      "loss": 0.0245,
      "num_tokens": 154148.0,
      "reward": 0.455357164144516,
      "reward_std": 0.26333752274513245,
      "rewards/verify_math_reward/mean": 0.4553571343421936,
      "rewards/verify_math_reward/std": 0.4991183280944824,
      "step": 1
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.004464285714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2988.0,
      "completions/mean_length": 522.1116333007812,
      "completions/mean_terminated_length": 506.0852355957031,
      "completions/min_length": 78.0,
      "completions/min_terminated_length": 78.0,
      "epoch": 0.004664723032069971,
      "grad_norm": 0.38745930790901184,
      "kl": 0.0004086494445800781,
      "learning_rate": 1e-06,
      "loss": 0.0205,
      "num_tokens": 288997.0,
      "reward": 0.6428571939468384,
      "reward_std": 0.33575403690338135,
      "rewards/verify_math_reward/mean": 0.6428571343421936,
      "rewards/verify_math_reward/std": 0.48023054003715515,
      "step": 2
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2408.0,
      "completions/mean_length": 611.8125,
      "completions/mean_terminated_length": 548.463623046875,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 0.006997084548104956,
      "grad_norm": 0.2856990098953247,
      "kl": 0.000415802001953125,
      "learning_rate": 1e-06,
      "loss": 0.0109,
      "num_tokens": 450323.0,
      "reward": 0.4732142984867096,
      "reward_std": 0.2688922584056854,
      "rewards/verify_math_reward/mean": 0.4732142984867096,
      "rewards/verify_math_reward/std": 0.5004002451896667,
      "step": 3
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1632.0,
      "completions/mean_length": 570.8125,
      "completions/mean_terminated_length": 522.9592895507812,
      "completions/min_length": 79.0,
      "completions/min_terminated_length": 79.0,
      "epoch": 0.009329446064139942,
      "grad_norm": 0.27500009536743164,
      "kl": 0.0004596710205078125,
      "learning_rate": 1e-06,
      "loss": 0.0309,
      "num_tokens": 599417.0,
      "reward": 0.504464328289032,
      "reward_std": 0.24814742803573608,
      "rewards/verify_math_reward/mean": 0.5044642686843872,
      "rewards/verify_math_reward/std": 0.5010998845100403,
      "step": 4
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2712.0,
      "completions/mean_length": 654.7589721679688,
      "completions/mean_terminated_length": 592.19091796875,
      "completions/min_length": 139.0,
      "completions/min_terminated_length": 139.0,
      "epoch": 0.011661807580174927,
      "grad_norm": 0.2343212068080902,
      "kl": 0.0005488395690917969,
      "learning_rate": 1e-06,
      "loss": 0.0526,
      "num_tokens": 766739.0,
      "reward": 0.4508928656578064,
      "reward_std": 0.21899083256721497,
      "rewards/verify_math_reward/mean": 0.4508928656578064,
      "rewards/verify_math_reward/std": 0.49869707226753235,
      "step": 5
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.004464285714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3986.0,
      "completions/mean_length": 557.53125,
      "completions/mean_terminated_length": 541.6636962890625,
      "completions/min_length": 37.0,
      "completions/min_terminated_length": 37.0,
      "epoch": 0.013994169096209912,
      "grad_norm": 0.2720922529697418,
      "kl": 0.0006399154663085938,
      "learning_rate": 1e-06,
      "loss": 0.0228,
      "num_tokens": 912194.0,
      "reward": 0.535714328289032,
      "reward_std": 0.23205281794071198,
      "rewards/verify_math_reward/mean": 0.5357142686843872,
      "rewards/verify_math_reward/std": 0.49983981251716614,
      "step": 6
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.004464285714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1314.0,
      "completions/mean_length": 552.513427734375,
      "completions/mean_terminated_length": 536.6233520507812,
      "completions/min_length": 149.0,
      "completions/min_terminated_length": 149.0,
      "epoch": 0.0163265306122449,
      "grad_norm": 0.25227245688438416,
      "kl": 0.000701904296875,
      "learning_rate": 1e-06,
      "loss": 0.0249,
      "num_tokens": 1056189.0,
      "reward": 0.4107142984867096,
      "reward_std": 0.262125164270401,
      "rewards/verify_math_reward/mean": 0.4107142984867096,
      "rewards/verify_math_reward/std": 0.4930652976036072,
      "step": 7
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1745.0,
      "completions/mean_length": 593.9955444335938,
      "completions/mean_terminated_length": 546.45703125,
      "completions/min_length": 102.0,
      "completions/min_terminated_length": 102.0,
      "epoch": 0.018658892128279883,
      "grad_norm": 0.21280020475387573,
      "kl": 0.0007581710815429688,
      "learning_rate": 1e-06,
      "loss": 0.0164,
      "num_tokens": 1215044.0,
      "reward": 0.486607164144516,
      "reward_std": 0.18367049098014832,
      "rewards/verify_math_reward/mean": 0.4866071343421936,
      "rewards/verify_math_reward/std": 0.5009400248527527,
      "step": 8
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1892.0,
      "completions/mean_length": 607.5491333007812,
      "completions/mean_terminated_length": 576.1216430664062,
      "completions/min_length": 171.0,
      "completions/min_terminated_length": 171.0,
      "epoch": 0.02099125364431487,
      "grad_norm": 0.2193998098373413,
      "kl": 0.0007028579711914062,
      "learning_rate": 1e-06,
      "loss": 0.0316,
      "num_tokens": 1375951.0,
      "reward": 0.4732142984867096,
      "reward_std": 0.20501871407032013,
      "rewards/verify_math_reward/mean": 0.4732142984867096,
      "rewards/verify_math_reward/std": 0.5004002451896667,
      "step": 9
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2161.0,
      "completions/mean_length": 672.5848388671875,
      "completions/mean_terminated_length": 610.3408813476562,
      "completions/min_length": 179.0,
      "completions/min_terminated_length": 179.0,
      "epoch": 0.023323615160349854,
      "grad_norm": 0.24384577572345734,
      "kl": 0.0007524490356445312,
      "learning_rate": 1e-06,
      "loss": 0.0274,
      "num_tokens": 1551898.0,
      "reward": 0.4330357313156128,
      "reward_std": 0.25236257910728455,
      "rewards/verify_math_reward/mean": 0.4330357015132904,
      "rewards/verify_math_reward/std": 0.49660524725914,
      "step": 10
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 1710.0,
      "completions/max_terminated_length": 1710.0,
      "completions/mean_length": 464.33929443359375,
      "completions/mean_terminated_length": 464.33929443359375,
      "completions/min_length": 135.0,
      "completions/min_terminated_length": 135.0,
      "epoch": 0.02565597667638484,
      "grad_norm": 0.27325257658958435,
      "kl": 0.0012264251708984375,
      "learning_rate": 1e-06,
      "loss": 0.0394,
      "num_tokens": 1674478.0,
      "reward": 0.7276785969734192,
      "reward_std": 0.20471924543380737,
      "rewards/verify_math_reward/mean": 0.7276785969734192,
      "rewards/verify_math_reward/std": 0.44615140557289124,
      "step": 11
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0267857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3055.0,
      "completions/mean_length": 607.9285888671875,
      "completions/mean_terminated_length": 511.92657470703125,
      "completions/min_length": 174.0,
      "completions/min_terminated_length": 174.0,
      "epoch": 0.027988338192419825,
      "grad_norm": 0.2339106947183609,
      "kl": 0.001262664794921875,
      "learning_rate": 1e-06,
      "loss": 0.0289,
      "num_tokens": 1840526.0,
      "reward": 0.5089285969734192,
      "reward_std": 0.1704346090555191,
      "rewards/verify_math_reward/mean": 0.5089285969734192,
      "rewards/verify_math_reward/std": 0.5010399222373962,
      "step": 12
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1666.0,
      "completions/mean_length": 591.8080444335938,
      "completions/mean_terminated_length": 544.2398681640625,
      "completions/min_length": 14.0,
      "completions/min_terminated_length": 14.0,
      "epoch": 0.030320699708454812,
      "grad_norm": 0.2372410148382187,
      "kl": 0.0013179779052734375,
      "learning_rate": 1e-06,
      "loss": 0.0248,
      "num_tokens": 1994643.0,
      "reward": 0.566964328289032,
      "reward_std": 0.17013515532016754,
      "rewards/verify_math_reward/mean": 0.5669642686843872,
      "rewards/verify_math_reward/std": 0.49660524725914,
      "step": 13
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.004464285714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1583.0,
      "completions/mean_length": 559.125,
      "completions/mean_terminated_length": 543.2645874023438,
      "completions/min_length": 116.0,
      "completions/min_terminated_length": 116.0,
      "epoch": 0.0326530612244898,
      "grad_norm": 0.23604567348957062,
      "kl": 0.001369476318359375,
      "learning_rate": 1e-06,
      "loss": 0.0494,
      "num_tokens": 2138215.0,
      "reward": 0.535714328289032,
      "reward_std": 0.22064557671546936,
      "rewards/verify_math_reward/mean": 0.5357142686843872,
      "rewards/verify_math_reward/std": 0.49983981251716614,
      "step": 14
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 1934.0,
      "completions/max_terminated_length": 1934.0,
      "completions/mean_length": 506.263427734375,
      "completions/mean_terminated_length": 506.263427734375,
      "completions/min_length": 186.0,
      "completions/min_terminated_length": 186.0,
      "epoch": 0.03498542274052478,
      "grad_norm": 0.24074804782867432,
      "kl": 0.00183868408203125,
      "learning_rate": 1e-06,
      "loss": 0.0354,
      "num_tokens": 2273434.0,
      "reward": 0.598214328289032,
      "reward_std": 0.18922802805900574,
      "rewards/verify_math_reward/mean": 0.5982142686843872,
      "rewards/verify_math_reward/std": 0.49135705828666687,
      "step": 15
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3522.0,
      "completions/mean_length": 573.4152221679688,
      "completions/mean_terminated_length": 509.3681640625,
      "completions/min_length": 112.0,
      "completions/min_terminated_length": 112.0,
      "epoch": 0.037317784256559766,
      "grad_norm": 0.27736690640449524,
      "kl": 0.0018978118896484375,
      "learning_rate": 1e-06,
      "loss": 0.0253,
      "num_tokens": 2419615.0,
      "reward": 0.6071428656578064,
      "reward_std": 0.22229303419589996,
      "rewards/verify_math_reward/mean": 0.6071428656578064,
      "rewards/verify_math_reward/std": 0.48947933316230774,
      "step": 16
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3601.0,
      "completions/mean_length": 633.9732666015625,
      "completions/mean_terminated_length": 586.9774169921875,
      "completions/min_length": 222.0,
      "completions/min_terminated_length": 222.0,
      "epoch": 0.03965014577259475,
      "grad_norm": 0.2832850515842438,
      "kl": 0.0014324188232421875,
      "learning_rate": 1e-06,
      "loss": 0.0401,
      "num_tokens": 2581097.0,
      "reward": 0.6205357313156128,
      "reward_std": 0.32946476340293884,
      "rewards/verify_math_reward/mean": 0.6205357313156128,
      "rewards/verify_math_reward/std": 0.4863404929637909,
      "step": 17
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0267857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1602.0,
      "completions/mean_length": 625.482177734375,
      "completions/mean_terminated_length": 529.9632568359375,
      "completions/min_length": 183.0,
      "completions/min_terminated_length": 183.0,
      "epoch": 0.04198250728862974,
      "grad_norm": 0.2542360723018646,
      "kl": 0.00144195556640625,
      "learning_rate": 1e-06,
      "loss": 0.0673,
      "num_tokens": 2743269.0,
      "reward": 0.5223214626312256,
      "reward_std": 0.2027650624513626,
      "rewards/verify_math_reward/mean": 0.5223214030265808,
      "rewards/verify_math_reward/std": 0.5006201863288879,
      "step": 18
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2094.0,
      "completions/mean_length": 609.84375,
      "completions/mean_terminated_length": 530.2510986328125,
      "completions/min_length": 44.0,
      "completions/min_terminated_length": 44.0,
      "epoch": 0.044314868804664724,
      "grad_norm": 0.2551509737968445,
      "kl": 0.0028018951416015625,
      "learning_rate": 1e-06,
      "loss": 0.0185,
      "num_tokens": 2904186.0,
      "reward": 0.5223214626312256,
      "reward_std": 0.1973431557416916,
      "rewards/verify_math_reward/mean": 0.5223214030265808,
      "rewards/verify_math_reward/std": 0.5006201863288879,
      "step": 19
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 2832.0,
      "completions/max_terminated_length": 2832.0,
      "completions/mean_length": 555.638427734375,
      "completions/mean_terminated_length": 555.638427734375,
      "completions/min_length": 136.0,
      "completions/min_terminated_length": 136.0,
      "epoch": 0.04664723032069971,
      "grad_norm": 0.25999799370765686,
      "kl": 0.0018978118896484375,
      "learning_rate": 1e-06,
      "loss": 0.0443,
      "num_tokens": 3046369.0,
      "reward": 0.5401785969734192,
      "reward_std": 0.19990073144435883,
      "rewards/verify_math_reward/mean": 0.5401785969734192,
      "rewards/verify_math_reward/std": 0.49949929118156433,
      "step": 20
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 1879.0,
      "completions/max_terminated_length": 1879.0,
      "completions/mean_length": 522.3973388671875,
      "completions/mean_terminated_length": 522.3973388671875,
      "completions/min_length": 19.0,
      "completions/min_terminated_length": 19.0,
      "epoch": 0.04897959183673469,
      "grad_norm": 0.2707967460155487,
      "kl": 0.0012912750244140625,
      "learning_rate": 1e-06,
      "loss": 0.0009,
      "num_tokens": 3186754.0,
      "reward": 0.6026785969734192,
      "reward_std": 0.2604704201221466,
      "rewards/verify_math_reward/mean": 0.6026785969734192,
      "rewards/verify_math_reward/std": 0.4904395043849945,
      "step": 21
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2004.0,
      "completions/mean_length": 579.9553833007812,
      "completions/mean_terminated_length": 532.2262573242188,
      "completions/min_length": 162.0,
      "completions/min_terminated_length": 162.0,
      "epoch": 0.05131195335276968,
      "grad_norm": 0.2654401361942291,
      "kl": 0.0014190673828125,
      "learning_rate": 1e-06,
      "loss": 0.0332,
      "num_tokens": 3339912.0,
      "reward": 0.59375,
      "reward_std": 0.2362651526927948,
      "rewards/verify_math_reward/mean": 0.59375,
      "rewards/verify_math_reward/std": 0.4922322630882263,
      "step": 22
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.004464285714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2987.0,
      "completions/mean_length": 632.4910888671875,
      "completions/mean_terminated_length": 616.9596557617188,
      "completions/min_length": 153.0,
      "completions/min_terminated_length": 153.0,
      "epoch": 0.053644314868804666,
      "grad_norm": 0.2511657178401947,
      "kl": 0.0018367767333984375,
      "learning_rate": 1e-06,
      "loss": -0.0069,
      "num_tokens": 3503742.0,
      "reward": 0.5401785969734192,
      "reward_std": 0.24949824810028076,
      "rewards/verify_math_reward/mean": 0.5401785969734192,
      "rewards/verify_math_reward/std": 0.49949929118156433,
      "step": 23
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3793.0,
      "completions/mean_length": 630.2142944335938,
      "completions/mean_terminated_length": 567.2000122070312,
      "completions/min_length": 138.0,
      "completions/min_terminated_length": 138.0,
      "epoch": 0.05597667638483965,
      "grad_norm": 0.25249868631362915,
      "kl": 0.0013904571533203125,
      "learning_rate": 1e-06,
      "loss": 0.0345,
      "num_tokens": 3666566.0,
      "reward": 0.4062500298023224,
      "reward_std": 0.23686854541301727,
      "rewards/verify_math_reward/mean": 0.40625,
      "rewards/verify_math_reward/std": 0.4922322630882263,
      "step": 24
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.004464285714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3011.0,
      "completions/mean_length": 533.138427734375,
      "completions/mean_terminated_length": 517.1614379882812,
      "completions/min_length": 88.0,
      "completions/min_terminated_length": 88.0,
      "epoch": 0.05830903790087463,
      "grad_norm": 0.2844082713127136,
      "kl": 0.00157928466796875,
      "learning_rate": 1e-06,
      "loss": -0.0311,
      "num_tokens": 3808557.0,
      "reward": 0.566964328289032,
      "reward_std": 0.19538895785808563,
      "rewards/verify_math_reward/mean": 0.5669642686843872,
      "rewards/verify_math_reward/std": 0.49660524725914,
      "step": 25
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1565.0,
      "completions/mean_length": 502.5223388671875,
      "completions/mean_terminated_length": 470.1486511230469,
      "completions/min_length": 175.0,
      "completions/min_terminated_length": 175.0,
      "epoch": 0.060641399416909623,
      "grad_norm": 0.2154368907213211,
      "kl": 0.0026454925537109375,
      "learning_rate": 1e-06,
      "loss": 0.0031,
      "num_tokens": 3940570.0,
      "reward": 0.6428571939468384,
      "reward_std": 0.12791654467582703,
      "rewards/verify_math_reward/mean": 0.6428571343421936,
      "rewards/verify_math_reward/std": 0.48023054003715515,
      "step": 26
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.004464285714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1970.0,
      "completions/mean_length": 532.3928833007812,
      "completions/mean_terminated_length": 516.41259765625,
      "completions/min_length": 102.0,
      "completions/min_terminated_length": 102.0,
      "epoch": 0.0629737609329446,
      "grad_norm": 0.2610211968421936,
      "kl": 0.00170135498046875,
      "learning_rate": 1e-06,
      "loss": -0.0061,
      "num_tokens": 4087314.0,
      "reward": 0.5892857313156128,
      "reward_std": 0.21612931787967682,
      "rewards/verify_math_reward/mean": 0.5892857313156128,
      "rewards/verify_math_reward/std": 0.4930652976036072,
      "step": 27
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2753.0,
      "completions/mean_length": 607.638427734375,
      "completions/mean_terminated_length": 576.2117309570312,
      "completions/min_length": 170.0,
      "completions/min_terminated_length": 170.0,
      "epoch": 0.0653061224489796,
      "grad_norm": 0.2755652964115143,
      "kl": 0.0019664764404296875,
      "learning_rate": 1e-06,
      "loss": 0.0204,
      "num_tokens": 4252377.0,
      "reward": 0.5267857313156128,
      "reward_std": 0.2442418336868286,
      "rewards/verify_math_reward/mean": 0.5267857313156128,
      "rewards/verify_math_reward/std": 0.500400185585022,
      "step": 28
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2203.0,
      "completions/mean_length": 573.5535888671875,
      "completions/mean_terminated_length": 541.81982421875,
      "completions/min_length": 130.0,
      "completions/min_terminated_length": 130.0,
      "epoch": 0.06763848396501458,
      "grad_norm": 0.27797746658325195,
      "kl": 0.001743316650390625,
      "learning_rate": 1e-06,
      "loss": -0.0014,
      "num_tokens": 4398069.0,
      "reward": 0.5401785969734192,
      "reward_std": 0.22935959696769714,
      "rewards/verify_math_reward/mean": 0.5401785969734192,
      "rewards/verify_math_reward/std": 0.49949929118156433,
      "step": 29
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.004464285714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1977.0,
      "completions/mean_length": 622.6116333007812,
      "completions/mean_terminated_length": 607.035888671875,
      "completions/min_length": 97.0,
      "completions/min_terminated_length": 97.0,
      "epoch": 0.06997084548104957,
      "grad_norm": 0.2739410996437073,
      "kl": 0.002223968505859375,
      "learning_rate": 1e-06,
      "loss": -0.007,
      "num_tokens": 4559310.0,
      "reward": 0.5133928656578064,
      "reward_std": 0.25821956992149353,
      "rewards/verify_math_reward/mean": 0.5133928656578064,
      "rewards/verify_math_reward/std": 0.5009400248527527,
      "step": 30
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1887.0,
      "completions/mean_length": 696.5982666015625,
      "completions/mean_terminated_length": 665.9729614257812,
      "completions/min_length": 136.0,
      "completions/min_terminated_length": 136.0,
      "epoch": 0.07230320699708455,
      "grad_norm": 0.23068802058696747,
      "kl": 0.0012111663818359375,
      "learning_rate": 1e-06,
      "loss": 0.0302,
      "num_tokens": 4733676.0,
      "reward": 0.486607164144516,
      "reward_std": 0.23656180500984192,
      "rewards/verify_math_reward/mean": 0.4866071343421936,
      "rewards/verify_math_reward/std": 0.5009400248527527,
      "step": 31
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2028.0,
      "completions/mean_length": 565.3392944335938,
      "completions/mean_terminated_length": 533.5315551757812,
      "completions/min_length": 132.0,
      "completions/min_terminated_length": 132.0,
      "epoch": 0.07463556851311953,
      "grad_norm": 0.22200630605220795,
      "kl": 0.0016536712646484375,
      "learning_rate": 1e-06,
      "loss": -0.0027,
      "num_tokens": 4877384.0,
      "reward": 0.65625,
      "reward_std": 0.18862184882164001,
      "rewards/verify_math_reward/mean": 0.65625,
      "rewards/verify_math_reward/std": 0.4760226309299469,
      "step": 32
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.004464285714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3845.0,
      "completions/mean_length": 504.8660888671875,
      "completions/mean_terminated_length": 488.7623596191406,
      "completions/min_length": 179.0,
      "completions/min_terminated_length": 179.0,
      "epoch": 0.07696793002915452,
      "grad_norm": 0.25177422165870667,
      "kl": 0.0014896392822265625,
      "learning_rate": 1e-06,
      "loss": -0.0148,
      "num_tokens": 5011674.0,
      "reward": 0.7321428656578064,
      "reward_std": 0.18922802805900574,
      "rewards/verify_math_reward/mean": 0.7321428656578064,
      "rewards/verify_math_reward/std": 0.4438345432281494,
      "step": 33
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0267857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2071.0,
      "completions/mean_length": 642.0402221679688,
      "completions/mean_terminated_length": 546.97705078125,
      "completions/min_length": 184.0,
      "completions/min_terminated_length": 184.0,
      "epoch": 0.0793002915451895,
      "grad_norm": 0.2264726758003235,
      "kl": 0.0017852783203125,
      "learning_rate": 1e-06,
      "loss": 0.015,
      "num_tokens": 5174515.0,
      "reward": 0.4687500298023224,
      "reward_std": 0.20771192014217377,
      "rewards/verify_math_reward/mean": 0.46875,
      "rewards/verify_math_reward/std": 0.5001401305198669,
      "step": 34
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3352.0,
      "completions/mean_length": 732.1428833007812,
      "completions/mean_terminated_length": 670.9818115234375,
      "completions/min_length": 135.0,
      "completions/min_terminated_length": 135.0,
      "epoch": 0.08163265306122448,
      "grad_norm": 0.25507766008377075,
      "kl": 0.001220703125,
      "learning_rate": 1e-06,
      "loss": 0.0533,
      "num_tokens": 5362451.0,
      "reward": 0.4910714626312256,
      "reward_std": 0.26181840896606445,
      "rewards/verify_math_reward/mean": 0.4910714328289032,
      "rewards/verify_math_reward/std": 0.5010399222373962,
      "step": 35
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.03125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3513.0,
      "completions/mean_length": 766.3928833007812,
      "completions/mean_terminated_length": 658.9861450195312,
      "completions/min_length": 181.0,
      "completions/min_terminated_length": 181.0,
      "epoch": 0.08396501457725948,
      "grad_norm": 0.17161031067371368,
      "kl": 0.00157928466796875,
      "learning_rate": 1e-06,
      "loss": 0.0424,
      "num_tokens": 5558939.0,
      "reward": 0.4419642984867096,
      "reward_std": 0.15751104056835175,
      "rewards/verify_math_reward/mean": 0.4419642984867096,
      "rewards/verify_math_reward/std": 0.4977326989173889,
      "step": 36
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1510.0,
      "completions/mean_length": 483.55804443359375,
      "completions/mean_terminated_length": 451.0135192871094,
      "completions/min_length": 191.0,
      "completions/min_terminated_length": 191.0,
      "epoch": 0.08629737609329446,
      "grad_norm": 0.2417500913143158,
      "kl": 0.0022220611572265625,
      "learning_rate": 1e-06,
      "loss": 0.0166,
      "num_tokens": 5688400.0,
      "reward": 0.629464328289032,
      "reward_std": 0.20320014655590057,
      "rewards/verify_math_reward/mean": 0.6294642686843872,
      "rewards/verify_math_reward/std": 0.4840298891067505,
      "step": 37
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2653.0,
      "completions/mean_length": 733.450927734375,
      "completions/mean_terminated_length": 656.6803588867188,
      "completions/min_length": 160.0,
      "completions/min_terminated_length": 160.0,
      "epoch": 0.08862973760932945,
      "grad_norm": 0.1672551929950714,
      "kl": 0.0013370513916015625,
      "learning_rate": 1e-06,
      "loss": 0.0037,
      "num_tokens": 5878997.0,
      "reward": 0.3794642984867096,
      "reward_std": 0.15346980094909668,
      "rewards/verify_math_reward/mean": 0.3794642984867096,
      "rewards/verify_math_reward/std": 0.4863404929637909,
      "step": 38
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0401785714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3617.0,
      "completions/mean_length": 714.4375610351562,
      "completions/mean_terminated_length": 572.8837280273438,
      "completions/min_length": 165.0,
      "completions/min_terminated_length": 165.0,
      "epoch": 0.09096209912536443,
      "grad_norm": 0.2452102154493332,
      "kl": 0.001861572265625,
      "learning_rate": 1e-06,
      "loss": 0.0675,
      "num_tokens": 6066143.0,
      "reward": 0.504464328289032,
      "reward_std": 0.18757328391075134,
      "rewards/verify_math_reward/mean": 0.5044642686843872,
      "rewards/verify_math_reward/std": 0.5010998845100403,
      "step": 39
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2279.0,
      "completions/mean_length": 650.5267944335938,
      "completions/mean_terminated_length": 571.8629760742188,
      "completions/min_length": 198.0,
      "completions/min_terminated_length": 198.0,
      "epoch": 0.09329446064139942,
      "grad_norm": 0.27518537640571594,
      "kl": 0.0014820098876953125,
      "learning_rate": 1e-06,
      "loss": 0.068,
      "num_tokens": 6232541.0,
      "reward": 0.5089285969734192,
      "reward_std": 0.34387195110321045,
      "rewards/verify_math_reward/mean": 0.5089285969734192,
      "rewards/verify_math_reward/std": 0.5010399222373962,
      "step": 40
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.03125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1851.0,
      "completions/mean_length": 697.4955444335938,
      "completions/mean_terminated_length": 587.8663330078125,
      "completions/min_length": 177.0,
      "completions/min_terminated_length": 177.0,
      "epoch": 0.0956268221574344,
      "grad_norm": 0.2095831036567688,
      "kl": 0.0014362335205078125,
      "learning_rate": 1e-06,
      "loss": 0.0127,
      "num_tokens": 6410404.0,
      "reward": 0.5401785969734192,
      "reward_std": 0.1976453959941864,
      "rewards/verify_math_reward/mean": 0.5401785969734192,
      "rewards/verify_math_reward/std": 0.49949929118156433,
      "step": 41
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2882.0,
      "completions/mean_length": 610.857177734375,
      "completions/mean_terminated_length": 579.45947265625,
      "completions/min_length": 161.0,
      "completions/min_terminated_length": 161.0,
      "epoch": 0.09795918367346938,
      "grad_norm": 0.24536412954330444,
      "kl": 0.00151824951171875,
      "learning_rate": 1e-06,
      "loss": 0.0564,
      "num_tokens": 6564876.0,
      "reward": 0.5848214626312256,
      "reward_std": 0.2439379096031189,
      "rewards/verify_math_reward/mean": 0.5848214030265808,
      "rewards/verify_math_reward/std": 0.49385640025138855,
      "step": 42
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1801.0,
      "completions/mean_length": 600.1205444335938,
      "completions/mean_terminated_length": 520.305908203125,
      "completions/min_length": 159.0,
      "completions/min_terminated_length": 159.0,
      "epoch": 0.10029154518950437,
      "grad_norm": 0.21419697999954224,
      "kl": 0.002147674560546875,
      "learning_rate": 1e-06,
      "loss": 0.0056,
      "num_tokens": 6722031.0,
      "reward": 0.4464285969734192,
      "reward_std": 0.14249484241008759,
      "rewards/verify_math_reward/mean": 0.4464285671710968,
      "rewards/verify_math_reward/std": 0.49823519587516785,
      "step": 43
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2292.0,
      "completions/mean_length": 698.8616333007812,
      "completions/mean_terminated_length": 637.095458984375,
      "completions/min_length": 133.0,
      "completions/min_terminated_length": 133.0,
      "epoch": 0.10262390670553936,
      "grad_norm": 0.18829944729804993,
      "kl": 0.0019207000732421875,
      "learning_rate": 1e-06,
      "loss": 0.0009,
      "num_tokens": 6900928.0,
      "reward": 0.4151785969734192,
      "reward_std": 0.14097853004932404,
      "rewards/verify_math_reward/mean": 0.4151785671710968,
      "rewards/verify_math_reward/std": 0.49385640025138855,
      "step": 44
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2867.0,
      "completions/mean_length": 546.6517944335938,
      "completions/mean_terminated_length": 514.6756591796875,
      "completions/min_length": 66.0,
      "completions/min_terminated_length": 66.0,
      "epoch": 0.10495626822157435,
      "grad_norm": 0.27684906125068665,
      "kl": 0.0021114349365234375,
      "learning_rate": 1e-06,
      "loss": -0.0102,
      "num_tokens": 7052698.0,
      "reward": 0.504464328289032,
      "reward_std": 0.2086220532655716,
      "rewards/verify_math_reward/mean": 0.5044642686843872,
      "rewards/verify_math_reward/std": 0.5010998845100403,
      "step": 45
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0267857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1926.0,
      "completions/mean_length": 635.3795166015625,
      "completions/mean_terminated_length": 540.1329956054688,
      "completions/min_length": 204.0,
      "completions/min_terminated_length": 204.0,
      "epoch": 0.10728862973760933,
      "grad_norm": 0.22097277641296387,
      "kl": 0.0013580322265625,
      "learning_rate": 1e-06,
      "loss": 0.079,
      "num_tokens": 7218647.0,
      "reward": 0.5,
      "reward_std": 0.19898781180381775,
      "rewards/verify_math_reward/mean": 0.5,
      "rewards/verify_math_reward/std": 0.5011197924613953,
      "step": 46
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1874.0,
      "completions/mean_length": 565.9285888671875,
      "completions/mean_terminated_length": 518.0090942382812,
      "completions/min_length": 138.0,
      "completions/min_terminated_length": 138.0,
      "epoch": 0.10962099125364431,
      "grad_norm": 0.26807278394699097,
      "kl": 0.003177642822265625,
      "learning_rate": 1e-06,
      "loss": -0.0057,
      "num_tokens": 7369015.0,
      "reward": 0.5178571939468384,
      "reward_std": 0.1253589689731598,
      "rewards/verify_math_reward/mean": 0.5178571343421936,
      "rewards/verify_math_reward/std": 0.5008001327514648,
      "step": 47
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2943.0,
      "completions/mean_length": 601.125,
      "completions/mean_terminated_length": 537.581787109375,
      "completions/min_length": 189.0,
      "completions/min_terminated_length": 189.0,
      "epoch": 0.1119533527696793,
      "grad_norm": 0.26417699456214905,
      "kl": 0.0020771026611328125,
      "learning_rate": 1e-06,
      "loss": 0.0212,
      "num_tokens": 7529603.0,
      "reward": 0.6339285969734192,
      "reward_std": 0.22289641201496124,
      "rewards/verify_math_reward/mean": 0.6339285969734192,
      "rewards/verify_math_reward/std": 0.4828082025051117,
      "step": 48
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 2446.0,
      "completions/max_terminated_length": 2446.0,
      "completions/mean_length": 613.513427734375,
      "completions/mean_terminated_length": 613.513427734375,
      "completions/min_length": 127.0,
      "completions/min_terminated_length": 127.0,
      "epoch": 0.11428571428571428,
      "grad_norm": 0.2860516607761383,
      "kl": 0.0013675689697265625,
      "learning_rate": 1e-06,
      "loss": 0.0341,
      "num_tokens": 7691030.0,
      "reward": 0.5535714626312256,
      "reward_std": 0.3531966507434845,
      "rewards/verify_math_reward/mean": 0.5535714030265808,
      "rewards/verify_math_reward/std": 0.49823519587516785,
      "step": 49
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3034.0,
      "completions/mean_length": 611.0223388671875,
      "completions/mean_terminated_length": 579.6261596679688,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 0.11661807580174927,
      "grad_norm": 0.2596966624259949,
      "kl": 0.001728057861328125,
      "learning_rate": 1e-06,
      "loss": 0.033,
      "num_tokens": 7848939.0,
      "reward": 0.5,
      "reward_std": 0.20636393129825592,
      "rewards/verify_math_reward/mean": 0.5,
      "rewards/verify_math_reward/std": 0.5011197924613953,
      "step": 50
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.004464285714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1863.0,
      "completions/mean_length": 641.84375,
      "completions/mean_terminated_length": 626.3543090820312,
      "completions/min_length": 197.0,
      "completions/min_terminated_length": 197.0,
      "epoch": 0.11895043731778426,
      "grad_norm": 0.2556436061859131,
      "kl": 0.0018062591552734375,
      "learning_rate": 1e-06,
      "loss": 0.0178,
      "num_tokens": 8013008.0,
      "reward": 0.4910714626312256,
      "reward_std": 0.231617733836174,
      "rewards/verify_math_reward/mean": 0.4910714328289032,
      "rewards/verify_math_reward/std": 0.5010399222373962,
      "step": 51
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1638.0,
      "completions/mean_length": 558.8348388671875,
      "completions/mean_terminated_length": 510.81903076171875,
      "completions/min_length": 167.0,
      "completions/min_terminated_length": 167.0,
      "epoch": 0.12128279883381925,
      "grad_norm": 0.2848607003688812,
      "kl": 0.001552581787109375,
      "learning_rate": 1e-06,
      "loss": 0.0336,
      "num_tokens": 8160843.0,
      "reward": 0.7098214626312256,
      "reward_std": 0.21447904407978058,
      "rewards/verify_math_reward/mean": 0.7098214030265808,
      "rewards/verify_math_reward/std": 0.4548610746860504,
      "step": 52
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.004464285714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1907.0,
      "completions/mean_length": 552.5670166015625,
      "completions/mean_terminated_length": 536.6771850585938,
      "completions/min_length": 158.0,
      "completions/min_terminated_length": 158.0,
      "epoch": 0.12361516034985423,
      "grad_norm": 0.27648499608039856,
      "kl": 0.00194549560546875,
      "learning_rate": 1e-06,
      "loss": -0.0131,
      "num_tokens": 8310106.0,
      "reward": 0.5714285969734192,
      "reward_std": 0.21703942120075226,
      "rewards/verify_math_reward/mean": 0.5714285969734192,
      "rewards/verify_math_reward/std": 0.49597999453544617,
      "step": 53
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3712.0,
      "completions/mean_length": 583.40625,
      "completions/mean_terminated_length": 551.7612915039062,
      "completions/min_length": 160.0,
      "completions/min_terminated_length": 160.0,
      "epoch": 0.1259475218658892,
      "grad_norm": 0.1887330263853073,
      "kl": 0.00136566162109375,
      "learning_rate": 1e-06,
      "loss": 0.0156,
      "num_tokens": 8466965.0,
      "reward": 0.535714328289032,
      "reward_std": 0.1561630368232727,
      "rewards/verify_math_reward/mean": 0.5357142686843872,
      "rewards/verify_math_reward/std": 0.49983981251716614,
      "step": 54
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2235.0,
      "completions/mean_length": 552.1964721679688,
      "completions/mean_terminated_length": 487.76361083984375,
      "completions/min_length": 175.0,
      "completions/min_terminated_length": 175.0,
      "epoch": 0.1282798833819242,
      "grad_norm": 0.2779422998428345,
      "kl": 0.0018138885498046875,
      "learning_rate": 1e-06,
      "loss": 0.0098,
      "num_tokens": 8613369.0,
      "reward": 0.535714328289032,
      "reward_std": 0.2553524374961853,
      "rewards/verify_math_reward/mean": 0.5357142686843872,
      "rewards/verify_math_reward/std": 0.49983981251716614,
      "step": 55
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1799.0,
      "completions/mean_length": 571.8616333007812,
      "completions/mean_terminated_length": 540.1126098632812,
      "completions/min_length": 155.0,
      "completions/min_terminated_length": 155.0,
      "epoch": 0.1306122448979592,
      "grad_norm": 0.28782251477241516,
      "kl": 0.002044677734375,
      "learning_rate": 1e-06,
      "loss": 0.0022,
      "num_tokens": 8763202.0,
      "reward": 0.5223214626312256,
      "reward_std": 0.2879851758480072,
      "rewards/verify_math_reward/mean": 0.5223214030265808,
      "rewards/verify_math_reward/std": 0.5006201863288879,
      "step": 56
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2805.0,
      "completions/mean_length": 741.4063110351562,
      "completions/mean_terminated_length": 695.8688354492188,
      "completions/min_length": 201.0,
      "completions/min_terminated_length": 201.0,
      "epoch": 0.13294460641399417,
      "grad_norm": 0.19218860566616058,
      "kl": 0.001308441162109375,
      "learning_rate": 1e-06,
      "loss": 0.0071,
      "num_tokens": 8953853.0,
      "reward": 0.5267857313156128,
      "reward_std": 0.1827620565891266,
      "rewards/verify_math_reward/mean": 0.5267857313156128,
      "rewards/verify_math_reward/std": 0.500400185585022,
      "step": 57
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1504.0,
      "completions/mean_length": 535.7410888671875,
      "completions/mean_terminated_length": 503.66668701171875,
      "completions/min_length": 148.0,
      "completions/min_terminated_length": 148.0,
      "epoch": 0.13527696793002916,
      "grad_norm": 0.25593048334121704,
      "kl": 0.00180816650390625,
      "learning_rate": 1e-06,
      "loss": 0.0117,
      "num_tokens": 9096979.0,
      "reward": 0.625,
      "reward_std": 0.17885924875736237,
      "rewards/verify_math_reward/mean": 0.625,
      "rewards/verify_math_reward/std": 0.4852071702480316,
      "step": 58
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.004464285714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2036.0,
      "completions/mean_length": 543.03125,
      "completions/mean_terminated_length": 527.0986938476562,
      "completions/min_length": 132.0,
      "completions/min_terminated_length": 132.0,
      "epoch": 0.13760932944606413,
      "grad_norm": 0.24300231039524078,
      "kl": 0.0019474029541015625,
      "learning_rate": 1e-06,
      "loss": 0.0096,
      "num_tokens": 9240882.0,
      "reward": 0.4821428656578064,
      "reward_std": 0.23009862005710602,
      "rewards/verify_math_reward/mean": 0.4821428656578064,
      "rewards/verify_math_reward/std": 0.5008001327514648,
      "step": 59
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1608.0,
      "completions/mean_length": 603.7410888671875,
      "completions/mean_terminated_length": 572.279296875,
      "completions/min_length": 171.0,
      "completions/min_terminated_length": 171.0,
      "epoch": 0.13994169096209913,
      "grad_norm": 0.17551957070827484,
      "kl": 0.0014858245849609375,
      "learning_rate": 1e-06,
      "loss": -0.0019,
      "num_tokens": 9399624.0,
      "reward": 0.5580357313156128,
      "reward_std": 0.11498290300369263,
      "rewards/verify_math_reward/mean": 0.5580357313156128,
      "rewards/verify_math_reward/std": 0.4977326989173889,
      "step": 60
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1966.0,
      "completions/mean_length": 553.6741333007812,
      "completions/mean_terminated_length": 521.7612915039062,
      "completions/min_length": 199.0,
      "completions/min_terminated_length": 199.0,
      "epoch": 0.1422740524781341,
      "grad_norm": 0.25766316056251526,
      "kl": 0.0018138885498046875,
      "learning_rate": 1e-06,
      "loss": 0.0334,
      "num_tokens": 9548591.0,
      "reward": 0.6875000596046448,
      "reward_std": 0.2413831353187561,
      "rewards/verify_math_reward/mean": 0.6875,
      "rewards/verify_math_reward/std": 0.4645504951477051,
      "step": 61
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4028.0,
      "completions/mean_length": 737.0267944335938,
      "completions/mean_terminated_length": 660.337890625,
      "completions/min_length": 230.0,
      "completions/min_terminated_length": 230.0,
      "epoch": 0.1446064139941691,
      "grad_norm": 0.20428849756717682,
      "kl": 0.0013713836669921875,
      "learning_rate": 1e-06,
      "loss": 0.1037,
      "num_tokens": 9733637.0,
      "reward": 0.4062500298023224,
      "reward_std": 0.21643324196338654,
      "rewards/verify_math_reward/mean": 0.40625,
      "rewards/verify_math_reward/std": 0.4922322630882263,
      "step": 62
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2675.0,
      "completions/mean_length": 612.6785888671875,
      "completions/mean_terminated_length": 565.3936767578125,
      "completions/min_length": 160.0,
      "completions/min_terminated_length": 160.0,
      "epoch": 0.1469387755102041,
      "grad_norm": 0.22185128927230835,
      "kl": 0.0013828277587890625,
      "learning_rate": 1e-06,
      "loss": 0.0328,
      "num_tokens": 9895173.0,
      "reward": 0.5714285969734192,
      "reward_std": 0.17659832537174225,
      "rewards/verify_math_reward/mean": 0.5714285969734192,
      "rewards/verify_math_reward/std": 0.49597999453544617,
      "step": 63
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1415.0,
      "completions/mean_length": 535.9330444335938,
      "completions/mean_terminated_length": 503.8603820800781,
      "completions/min_length": 114.0,
      "completions/min_terminated_length": 114.0,
      "epoch": 0.14927113702623906,
      "grad_norm": 0.22858451306819916,
      "kl": 0.0022106170654296875,
      "learning_rate": 1e-06,
      "loss": 0.0236,
      "num_tokens": 10037494.0,
      "reward": 0.598214328289032,
      "reward_std": 0.17269554734230042,
      "rewards/verify_math_reward/mean": 0.5982142686843872,
      "rewards/verify_math_reward/std": 0.49135705828666687,
      "step": 64
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1542.0,
      "completions/mean_length": 576.7545166015625,
      "completions/mean_terminated_length": 545.049560546875,
      "completions/min_length": 113.0,
      "completions/min_terminated_length": 113.0,
      "epoch": 0.15160349854227406,
      "grad_norm": 0.24405136704444885,
      "kl": 0.00164031982421875,
      "learning_rate": 1e-06,
      "loss": 0.0253,
      "num_tokens": 10190239.0,
      "reward": 0.5491071939468384,
      "reward_std": 0.21057626605033875,
      "rewards/verify_math_reward/mean": 0.5491071343421936,
      "rewards/verify_math_reward/std": 0.49869707226753235,
      "step": 65
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.004464285714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1649.0,
      "completions/mean_length": 574.325927734375,
      "completions/mean_terminated_length": 558.5336303710938,
      "completions/min_length": 120.0,
      "completions/min_terminated_length": 120.0,
      "epoch": 0.15393586005830903,
      "grad_norm": 0.2724969685077667,
      "kl": 0.0016498565673828125,
      "learning_rate": 1e-06,
      "loss": 0.0181,
      "num_tokens": 10341360.0,
      "reward": 0.5580357313156128,
      "reward_std": 0.3142729699611664,
      "rewards/verify_math_reward/mean": 0.5580357313156128,
      "rewards/verify_math_reward/std": 0.4977326989173889,
      "step": 66
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1764.0,
      "completions/mean_length": 570.6473388671875,
      "completions/mean_terminated_length": 538.8873901367188,
      "completions/min_length": 149.0,
      "completions/min_terminated_length": 149.0,
      "epoch": 0.15626822157434403,
      "grad_norm": 0.24138103425502777,
      "kl": 0.001644134521484375,
      "learning_rate": 1e-06,
      "loss": 0.0472,
      "num_tokens": 10488001.0,
      "reward": 0.5758928656578064,
      "reward_std": 0.24649550020694733,
      "rewards/verify_math_reward/mean": 0.5758928656578064,
      "rewards/verify_math_reward/std": 0.4953135848045349,
      "step": 67
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2006.0,
      "completions/mean_length": 631.0982666015625,
      "completions/mean_terminated_length": 584.0633544921875,
      "completions/min_length": 165.0,
      "completions/min_terminated_length": 165.0,
      "epoch": 0.158600583090379,
      "grad_norm": 0.2019968181848526,
      "kl": 0.0019588470458984375,
      "learning_rate": 1e-06,
      "loss": 0.0189,
      "num_tokens": 10650151.0,
      "reward": 0.4955357313156128,
      "reward_std": 0.19208507239818573,
      "rewards/verify_math_reward/mean": 0.4955357015132904,
      "rewards/verify_math_reward/std": 0.5010998249053955,
      "step": 68
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2120.0,
      "completions/mean_length": 691.7500610351562,
      "completions/mean_terminated_length": 614.02734375,
      "completions/min_length": 14.0,
      "completions/min_terminated_length": 14.0,
      "epoch": 0.160932944606414,
      "grad_norm": 0.26620808243751526,
      "kl": 0.0018100738525390625,
      "learning_rate": 1e-06,
      "loss": 0.0137,
      "num_tokens": 10824815.0,
      "reward": 0.486607164144516,
      "reward_std": 0.3650873303413391,
      "rewards/verify_math_reward/mean": 0.4866071343421936,
      "rewards/verify_math_reward/std": 0.5009400248527527,
      "step": 69
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2701.0,
      "completions/mean_length": 673.90625,
      "completions/mean_terminated_length": 595.7762451171875,
      "completions/min_length": 115.0,
      "completions/min_terminated_length": 115.0,
      "epoch": 0.16326530612244897,
      "grad_norm": 0.22031183540821075,
      "kl": 0.001644134521484375,
      "learning_rate": 1e-06,
      "loss": 0.0163,
      "num_tokens": 10998762.0,
      "reward": 0.5491071939468384,
      "reward_std": 0.16878993809223175,
      "rewards/verify_math_reward/mean": 0.5491071343421936,
      "rewards/verify_math_reward/std": 0.49869704246520996,
      "step": 70
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1726.0,
      "completions/mean_length": 592.5803833007812,
      "completions/mean_terminated_length": 528.8817749023438,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 0.16559766763848396,
      "grad_norm": 0.22658729553222656,
      "kl": 0.0015964508056640625,
      "learning_rate": 1e-06,
      "loss": 0.0264,
      "num_tokens": 11152276.0,
      "reward": 0.5535714626312256,
      "reward_std": 0.17525310814380646,
      "rewards/verify_math_reward/mean": 0.5535714030265808,
      "rewards/verify_math_reward/std": 0.49823519587516785,
      "step": 71
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.03125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3690.0,
      "completions/mean_length": 709.1473388671875,
      "completions/mean_terminated_length": 599.8939819335938,
      "completions/min_length": 159.0,
      "completions/min_terminated_length": 159.0,
      "epoch": 0.16793002915451896,
      "grad_norm": 0.21466176211833954,
      "kl": 0.001659393310546875,
      "learning_rate": 1e-06,
      "loss": 0.0379,
      "num_tokens": 11328877.0,
      "reward": 0.5491071939468384,
      "reward_std": 0.19929735362529755,
      "rewards/verify_math_reward/mean": 0.5491071343421936,
      "rewards/verify_math_reward/std": 0.49869707226753235,
      "step": 72
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2910.0,
      "completions/mean_length": 714.0267944335938,
      "completions/mean_terminated_length": 636.812744140625,
      "completions/min_length": 170.0,
      "completions/min_terminated_length": 170.0,
      "epoch": 0.17026239067055393,
      "grad_norm": 0.2443362921476364,
      "kl": 0.0016460418701171875,
      "learning_rate": 1e-06,
      "loss": 0.0826,
      "num_tokens": 11510291.0,
      "reward": 0.6160714626312256,
      "reward_std": 0.29188069701194763,
      "rewards/verify_math_reward/mean": 0.6160714030265808,
      "rewards/verify_math_reward/std": 0.48743006587028503,
      "step": 73
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1566.0,
      "completions/mean_length": 580.294677734375,
      "completions/mean_terminated_length": 548.6216430664062,
      "completions/min_length": 161.0,
      "completions/min_terminated_length": 161.0,
      "epoch": 0.17259475218658893,
      "grad_norm": 0.2611260414123535,
      "kl": 0.001880645751953125,
      "learning_rate": 1e-06,
      "loss": 0.0733,
      "num_tokens": 11669589.0,
      "reward": 0.6428571939468384,
      "reward_std": 0.23101434111595154,
      "rewards/verify_math_reward/mean": 0.6428571343421936,
      "rewards/verify_math_reward/std": 0.48023054003715515,
      "step": 74
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1727.0,
      "completions/mean_length": 557.6830444335938,
      "completions/mean_terminated_length": 509.651611328125,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 0.1749271137026239,
      "grad_norm": 0.2751494348049164,
      "kl": 0.0023212432861328125,
      "learning_rate": 1e-06,
      "loss": 0.0422,
      "num_tokens": 11811990.0,
      "reward": 0.6517857313156128,
      "reward_std": 0.23101432621479034,
      "rewards/verify_math_reward/mean": 0.6517857313156128,
      "rewards/verify_math_reward/std": 0.47747132182121277,
      "step": 75
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0357142857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3887.0,
      "completions/mean_length": 698.4152221679688,
      "completions/mean_terminated_length": 572.5787353515625,
      "completions/min_length": 173.0,
      "completions/min_terminated_length": 173.0,
      "epoch": 0.1772594752186589,
      "grad_norm": 0.29036015272140503,
      "kl": 0.001857757568359375,
      "learning_rate": 1e-06,
      "loss": 0.0056,
      "num_tokens": 11994627.0,
      "reward": 0.5625,
      "reward_std": 0.32599422335624695,
      "rewards/verify_math_reward/mean": 0.5625,
      "rewards/verify_math_reward/std": 0.49718940258026123,
      "step": 76
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1819.0,
      "completions/mean_length": 604.28125,
      "completions/mean_terminated_length": 556.8823852539062,
      "completions/min_length": 102.0,
      "completions/min_terminated_length": 102.0,
      "epoch": 0.17959183673469387,
      "grad_norm": 0.2591899335384369,
      "kl": 0.0017299652099609375,
      "learning_rate": 1e-06,
      "loss": 0.0814,
      "num_tokens": 12149490.0,
      "reward": 0.7187500596046448,
      "reward_std": 0.25084346532821655,
      "rewards/verify_math_reward/mean": 0.71875,
      "rewards/verify_math_reward/std": 0.45061618089675903,
      "step": 77
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.004464285714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2687.0,
      "completions/mean_length": 528.4375,
      "completions/mean_terminated_length": 512.4395141601562,
      "completions/min_length": 140.0,
      "completions/min_terminated_length": 140.0,
      "epoch": 0.18192419825072886,
      "grad_norm": 0.20739056169986725,
      "kl": 0.0016880035400390625,
      "learning_rate": 1e-06,
      "loss": 0.0029,
      "num_tokens": 12291684.0,
      "reward": 0.6651785969734192,
      "reward_std": 0.1684831976890564,
      "rewards/verify_math_reward/mean": 0.6651785969734192,
      "rewards/verify_math_reward/std": 0.4729849100112915,
      "step": 78
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.004464285714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4038.0,
      "completions/mean_length": 626.4152221679688,
      "completions/mean_terminated_length": 610.8565063476562,
      "completions/min_length": 162.0,
      "completions/min_terminated_length": 162.0,
      "epoch": 0.18425655976676386,
      "grad_norm": 0.24469514191150665,
      "kl": 0.003215789794921875,
      "learning_rate": 1e-06,
      "loss": 0.019,
      "num_tokens": 12459601.0,
      "reward": 0.6026785969734192,
      "reward_std": 0.24467973411083221,
      "rewards/verify_math_reward/mean": 0.6026785969734192,
      "rewards/verify_math_reward/std": 0.4904395043849945,
      "step": 79
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1375.0,
      "completions/mean_length": 572.2277221679688,
      "completions/mean_terminated_length": 540.4819946289062,
      "completions/min_length": 101.0,
      "completions/min_terminated_length": 101.0,
      "epoch": 0.18658892128279883,
      "grad_norm": 0.24989625811576843,
      "kl": 0.002498626708984375,
      "learning_rate": 1e-06,
      "loss": 0.0756,
      "num_tokens": 12608020.0,
      "reward": 0.5535714626312256,
      "reward_std": 0.18098174035549164,
      "rewards/verify_math_reward/mean": 0.5535714030265808,
      "rewards/verify_math_reward/std": 0.49823519587516785,
      "step": 80
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4076.0,
      "completions/mean_length": 579.3839721679688,
      "completions/mean_terminated_length": 547.7026977539062,
      "completions/min_length": 169.0,
      "completions/min_terminated_length": 169.0,
      "epoch": 0.18892128279883383,
      "grad_norm": 0.3420480489730835,
      "kl": 0.00213623046875,
      "learning_rate": 1e-06,
      "loss": 0.01,
      "num_tokens": 12757106.0,
      "reward": 0.6473214626312256,
      "reward_std": 0.24980498850345612,
      "rewards/verify_math_reward/mean": 0.6473214030265808,
      "rewards/verify_math_reward/std": 0.4788738489151001,
      "step": 81
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3218.0,
      "completions/mean_length": 696.6250610351562,
      "completions/mean_terminated_length": 650.4796752929688,
      "completions/min_length": 217.0,
      "completions/min_terminated_length": 217.0,
      "epoch": 0.1912536443148688,
      "grad_norm": 0.23296111822128296,
      "kl": 0.0013065338134765625,
      "learning_rate": 1e-06,
      "loss": 0.036,
      "num_tokens": 12934654.0,
      "reward": 0.424107164144516,
      "reward_std": 0.2513140141963959,
      "rewards/verify_math_reward/mean": 0.4241071343421936,
      "rewards/verify_math_reward/std": 0.4953135550022125,
      "step": 82
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.004464285714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2243.0,
      "completions/mean_length": 478.73663330078125,
      "completions/mean_terminated_length": 462.5157165527344,
      "completions/min_length": 182.0,
      "completions/min_terminated_length": 182.0,
      "epoch": 0.1935860058309038,
      "grad_norm": 0.23368965089321136,
      "kl": 0.001613616943359375,
      "learning_rate": 1e-06,
      "loss": 0.0375,
      "num_tokens": 13062099.0,
      "reward": 0.723214328289032,
      "reward_std": 0.15872061252593994,
      "rewards/verify_math_reward/mean": 0.7232142686843872,
      "rewards/verify_math_reward/std": 0.4484116733074188,
      "step": 83
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3744.0,
      "completions/mean_length": 590.7678833007812,
      "completions/mean_terminated_length": 543.185546875,
      "completions/min_length": 114.0,
      "completions/min_terminated_length": 114.0,
      "epoch": 0.19591836734693877,
      "grad_norm": 0.2560170590877533,
      "kl": 0.001621246337890625,
      "learning_rate": 1e-06,
      "loss": 0.07,
      "num_tokens": 13215607.0,
      "reward": 0.6026785969734192,
      "reward_std": 0.2296663522720337,
      "rewards/verify_math_reward/mean": 0.6026785969734192,
      "rewards/verify_math_reward/std": 0.4904395043849945,
      "step": 84
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2096.0,
      "completions/mean_length": 664.6830444335938,
      "completions/mean_terminated_length": 602.29541015625,
      "completions/min_length": 191.0,
      "completions/min_terminated_length": 191.0,
      "epoch": 0.19825072886297376,
      "grad_norm": 0.18460825085639954,
      "kl": 0.0015735626220703125,
      "learning_rate": 1e-06,
      "loss": 0.0034,
      "num_tokens": 13385184.0,
      "reward": 0.5625,
      "reward_std": 0.139630526304245,
      "rewards/verify_math_reward/mean": 0.5625,
      "rewards/verify_math_reward/std": 0.49718940258026123,
      "step": 85
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3385.0,
      "completions/mean_length": 741.6875610351562,
      "completions/mean_terminated_length": 696.1538696289062,
      "completions/min_length": 71.0,
      "completions/min_terminated_length": 71.0,
      "epoch": 0.20058309037900873,
      "grad_norm": 0.2350245714187622,
      "kl": 0.00148773193359375,
      "learning_rate": 1e-06,
      "loss": 0.0107,
      "num_tokens": 13578010.0,
      "reward": 0.4151785969734192,
      "reward_std": 0.17599214613437653,
      "rewards/verify_math_reward/mean": 0.4151785671710968,
      "rewards/verify_math_reward/std": 0.49385643005371094,
      "step": 86
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.004464285714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2418.0,
      "completions/mean_length": 644.90625,
      "completions/mean_terminated_length": 629.4305419921875,
      "completions/min_length": 140.0,
      "completions/min_terminated_length": 140.0,
      "epoch": 0.20291545189504373,
      "grad_norm": 0.27613499760627747,
      "kl": 0.00165557861328125,
      "learning_rate": 1e-06,
      "loss": 0.0377,
      "num_tokens": 13744445.0,
      "reward": 0.5625,
      "reward_std": 0.2765706479549408,
      "rewards/verify_math_reward/mean": 0.5625,
      "rewards/verify_math_reward/std": 0.49718940258026123,
      "step": 87
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1825.0,
      "completions/mean_length": 580.6875,
      "completions/mean_terminated_length": 549.0180053710938,
      "completions/min_length": 133.0,
      "completions/min_terminated_length": 133.0,
      "epoch": 0.20524781341107873,
      "grad_norm": 0.19442224502563477,
      "kl": 0.0015697479248046875,
      "learning_rate": 1e-06,
      "loss": 0.0101,
      "num_tokens": 13900599.0,
      "reward": 0.5535714626312256,
      "reward_std": 0.16457758843898773,
      "rewards/verify_math_reward/mean": 0.5535714030265808,
      "rewards/verify_math_reward/std": 0.49823519587516785,
      "step": 88
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2011.0,
      "completions/mean_length": 640.9420166015625,
      "completions/mean_terminated_length": 594.040771484375,
      "completions/min_length": 185.0,
      "completions/min_terminated_length": 185.0,
      "epoch": 0.2075801749271137,
      "grad_norm": 0.22410327196121216,
      "kl": 0.0014171600341796875,
      "learning_rate": 1e-06,
      "loss": 0.022,
      "num_tokens": 14068834.0,
      "reward": 0.629464328289032,
      "reward_std": 0.16878992319107056,
      "rewards/verify_math_reward/mean": 0.6294642686843872,
      "rewards/verify_math_reward/std": 0.4840298593044281,
      "step": 89
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.03125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2752.0,
      "completions/mean_length": 769.950927734375,
      "completions/mean_terminated_length": 662.6589965820312,
      "completions/min_length": 125.0,
      "completions/min_terminated_length": 125.0,
      "epoch": 0.2099125364431487,
      "grad_norm": 0.21688145399093628,
      "kl": 0.0013904571533203125,
      "learning_rate": 1e-06,
      "loss": 0.059,
      "num_tokens": 14264527.0,
      "reward": 0.5401785969734192,
      "reward_std": 0.18532243371009827,
      "rewards/verify_math_reward/mean": 0.5401785969734192,
      "rewards/verify_math_reward/std": 0.49949926137924194,
      "step": 90
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 1592.0,
      "completions/max_terminated_length": 1592.0,
      "completions/mean_length": 495.21429443359375,
      "completions/mean_terminated_length": 495.21429443359375,
      "completions/min_length": 146.0,
      "completions/min_terminated_length": 146.0,
      "epoch": 0.21224489795918366,
      "grad_norm": 0.25790855288505554,
      "kl": 0.0018939971923828125,
      "learning_rate": 1e-06,
      "loss": 0.0224,
      "num_tokens": 14393703.0,
      "reward": 0.754464328289032,
      "reward_std": 0.21222370862960815,
      "rewards/verify_math_reward/mean": 0.7544642686843872,
      "rewards/verify_math_reward/std": 0.43136832118034363,
      "step": 91
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2620.0,
      "completions/mean_length": 623.0178833007812,
      "completions/mean_terminated_length": 543.7260131835938,
      "completions/min_length": 54.0,
      "completions/min_terminated_length": 54.0,
      "epoch": 0.21457725947521866,
      "grad_norm": 0.27001768350601196,
      "kl": 0.0022411346435546875,
      "learning_rate": 1e-06,
      "loss": 0.0135,
      "num_tokens": 14551459.0,
      "reward": 0.65625,
      "reward_std": 0.252055823802948,
      "rewards/verify_math_reward/mean": 0.65625,
      "rewards/verify_math_reward/std": 0.4760226309299469,
      "step": 92
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2954.0,
      "completions/mean_length": 676.3348388671875,
      "completions/mean_terminated_length": 645.5270385742188,
      "completions/min_length": 154.0,
      "completions/min_terminated_length": 154.0,
      "epoch": 0.21690962099125363,
      "grad_norm": 0.21039845049381256,
      "kl": 0.001399993896484375,
      "learning_rate": 1e-06,
      "loss": 0.0147,
      "num_tokens": 14722078.0,
      "reward": 0.535714328289032,
      "reward_std": 0.1814168244600296,
      "rewards/verify_math_reward/mean": 0.5357142686843872,
      "rewards/verify_math_reward/std": 0.49983981251716614,
      "step": 93
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1650.0,
      "completions/mean_length": 643.388427734375,
      "completions/mean_terminated_length": 580.6136474609375,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 0.21924198250728863,
      "grad_norm": 0.1937643587589264,
      "kl": 0.0016651153564453125,
      "learning_rate": 1e-06,
      "loss": 0.0282,
      "num_tokens": 14890149.0,
      "reward": 0.5133928656578064,
      "reward_std": 0.1668357253074646,
      "rewards/verify_math_reward/mean": 0.5133928656578064,
      "rewards/verify_math_reward/std": 0.5009400248527527,
      "step": 94
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1527.0,
      "completions/mean_length": 605.794677734375,
      "completions/mean_terminated_length": 558.4163208007812,
      "completions/min_length": 160.0,
      "completions/min_terminated_length": 160.0,
      "epoch": 0.22157434402332363,
      "grad_norm": 0.2784527838230133,
      "kl": 0.0018215179443359375,
      "learning_rate": 1e-06,
      "loss": 0.0445,
      "num_tokens": 15045703.0,
      "reward": 0.598214328289032,
      "reward_std": 0.2530971169471741,
      "rewards/verify_math_reward/mean": 0.5982142686843872,
      "rewards/verify_math_reward/std": 0.49135705828666687,
      "step": 95
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.03125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2129.0,
      "completions/mean_length": 695.3035888671875,
      "completions/mean_terminated_length": 585.6036987304688,
      "completions/min_length": 128.0,
      "completions/min_terminated_length": 128.0,
      "epoch": 0.2239067055393586,
      "grad_norm": 0.18985792994499207,
      "kl": 0.0014095306396484375,
      "learning_rate": 1e-06,
      "loss": 0.0325,
      "num_tokens": 15222819.0,
      "reward": 0.4687500298023224,
      "reward_std": 0.20905713737010956,
      "rewards/verify_math_reward/mean": 0.46875,
      "rewards/verify_math_reward/std": 0.5001401305198669,
      "step": 96
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2440.0,
      "completions/mean_length": 663.0982666015625,
      "completions/mean_terminated_length": 616.4977416992188,
      "completions/min_length": 122.0,
      "completions/min_terminated_length": 122.0,
      "epoch": 0.2262390670553936,
      "grad_norm": 0.19274626672267914,
      "kl": 0.0014247894287109375,
      "learning_rate": 1e-06,
      "loss": 0.0138,
      "num_tokens": 15393673.0,
      "reward": 0.5446428656578064,
      "reward_std": 0.16818374395370483,
      "rewards/verify_math_reward/mean": 0.5446428656578064,
      "rewards/verify_math_reward/std": 0.4991183876991272,
      "step": 97
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0357142857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2857.0,
      "completions/mean_length": 812.1160888671875,
      "completions/mean_terminated_length": 690.49072265625,
      "completions/min_length": 149.0,
      "completions/min_terminated_length": 149.0,
      "epoch": 0.22857142857142856,
      "grad_norm": 0.21691066026687622,
      "kl": 0.00139617919921875,
      "learning_rate": 1e-06,
      "loss": -0.0172,
      "num_tokens": 15598387.0,
      "reward": 0.4419642984867096,
      "reward_std": 0.18953195214271545,
      "rewards/verify_math_reward/mean": 0.4419642984867096,
      "rewards/verify_math_reward/std": 0.4977326989173889,
      "step": 98
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1668.0,
      "completions/mean_length": 550.8705444335938,
      "completions/mean_terminated_length": 518.9324340820312,
      "completions/min_length": 151.0,
      "completions/min_terminated_length": 151.0,
      "epoch": 0.23090379008746356,
      "grad_norm": 0.2327047884464264,
      "kl": 0.00212860107421875,
      "learning_rate": 1e-06,
      "loss": 0.0458,
      "num_tokens": 15741534.0,
      "reward": 0.5848214626312256,
      "reward_std": 0.1973431557416916,
      "rewards/verify_math_reward/mean": 0.5848214030265808,
      "rewards/verify_math_reward/std": 0.49385640025138855,
      "step": 99
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2131.0,
      "completions/mean_length": 659.9152221679688,
      "completions/mean_terminated_length": 613.2715454101562,
      "completions/min_length": 95.0,
      "completions/min_terminated_length": 95.0,
      "epoch": 0.23323615160349853,
      "grad_norm": 0.18836602568626404,
      "kl": 0.0020809173583984375,
      "learning_rate": 1e-06,
      "loss": 0.0079,
      "num_tokens": 15909715.0,
      "reward": 0.504464328289032,
      "reward_std": 0.204110249876976,
      "rewards/verify_math_reward/mean": 0.5044642686843872,
      "rewards/verify_math_reward/std": 0.5010998845100403,
      "step": 100
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.004464285714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1505.0,
      "completions/mean_length": 629.607177734375,
      "completions/mean_terminated_length": 614.0628051757812,
      "completions/min_length": 187.0,
      "completions/min_terminated_length": 187.0,
      "epoch": 0.23556851311953353,
      "grad_norm": 0.23415729403495789,
      "kl": 0.001430511474609375,
      "learning_rate": 1e-06,
      "loss": 0.0148,
      "num_tokens": 16071699.0,
      "reward": 0.424107164144516,
      "reward_std": 0.21672989428043365,
      "rewards/verify_math_reward/mean": 0.4241071343421936,
      "rewards/verify_math_reward/std": 0.4953135550022125,
      "step": 101
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3279.0,
      "completions/mean_length": 678.6428833007812,
      "completions/mean_terminated_length": 600.6209716796875,
      "completions/min_length": 183.0,
      "completions/min_terminated_length": 183.0,
      "epoch": 0.23790087463556853,
      "grad_norm": 0.2581215500831604,
      "kl": 0.0017566680908203125,
      "learning_rate": 1e-06,
      "loss": 0.0191,
      "num_tokens": 16247475.0,
      "reward": 0.535714328289032,
      "reward_std": 0.22650256752967834,
      "rewards/verify_math_reward/mean": 0.5357142686843872,
      "rewards/verify_math_reward/std": 0.49983981251716614,
      "step": 102
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.004464285714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1467.0,
      "completions/mean_length": 532.5714721679688,
      "completions/mean_terminated_length": 516.5919799804688,
      "completions/min_length": 150.0,
      "completions/min_terminated_length": 150.0,
      "epoch": 0.2402332361516035,
      "grad_norm": 0.23102478682994843,
      "kl": 0.002101898193359375,
      "learning_rate": 1e-06,
      "loss": 0.0228,
      "num_tokens": 16386339.0,
      "reward": 0.566964328289032,
      "reward_std": 0.163971409201622,
      "rewards/verify_math_reward/mean": 0.5669642686843872,
      "rewards/verify_math_reward/std": 0.49660524725914,
      "step": 103
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2806.0,
      "completions/mean_length": 620.6875,
      "completions/mean_terminated_length": 589.37841796875,
      "completions/min_length": 168.0,
      "completions/min_terminated_length": 168.0,
      "epoch": 0.2425655976676385,
      "grad_norm": 0.25336551666259766,
      "kl": 0.0013637542724609375,
      "learning_rate": 1e-06,
      "loss": 0.0506,
      "num_tokens": 16548565.0,
      "reward": 0.4776785969734192,
      "reward_std": 0.27609726786613464,
      "rewards/verify_math_reward/mean": 0.4776785671710968,
      "rewards/verify_math_reward/std": 0.5006202459335327,
      "step": 104
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.004464285714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1523.0,
      "completions/mean_length": 560.0,
      "completions/mean_terminated_length": 544.1435546875,
      "completions/min_length": 117.0,
      "completions/min_terminated_length": 117.0,
      "epoch": 0.24489795918367346,
      "grad_norm": 0.25496363639831543,
      "kl": 0.0017795562744140625,
      "learning_rate": 1e-06,
      "loss": 0.0272,
      "num_tokens": 16693973.0,
      "reward": 0.6026785969734192,
      "reward_std": 0.24198931455612183,
      "rewards/verify_math_reward/mean": 0.6026785969734192,
      "rewards/verify_math_reward/std": 0.4904395341873169,
      "step": 105
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3834.0,
      "completions/mean_length": 580.0982666015625,
      "completions/mean_terminated_length": 532.37109375,
      "completions/min_length": 167.0,
      "completions/min_terminated_length": 167.0,
      "epoch": 0.24723032069970846,
      "grad_norm": 0.2990919053554535,
      "kl": 0.0017261505126953125,
      "learning_rate": 1e-06,
      "loss": 0.0356,
      "num_tokens": 16847707.0,
      "reward": 0.625,
      "reward_std": 0.27383914589881897,
      "rewards/verify_math_reward/mean": 0.625,
      "rewards/verify_math_reward/std": 0.4852071702480316,
      "step": 106
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1318.0,
      "completions/mean_length": 589.3527221679688,
      "completions/mean_terminated_length": 557.7612915039062,
      "completions/min_length": 218.0,
      "completions/min_terminated_length": 218.0,
      "epoch": 0.24956268221574343,
      "grad_norm": 0.28804782032966614,
      "kl": 0.00211334228515625,
      "learning_rate": 1e-06,
      "loss": 0.0051,
      "num_tokens": 17003810.0,
      "reward": 0.4776785969734192,
      "reward_std": 0.20905713737010956,
      "rewards/verify_math_reward/mean": 0.4776785671710968,
      "rewards/verify_math_reward/std": 0.5006202459335327,
      "step": 107
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0357142857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1866.0,
      "completions/mean_length": 706.8928833007812,
      "completions/mean_terminated_length": 581.370361328125,
      "completions/min_length": 178.0,
      "completions/min_terminated_length": 178.0,
      "epoch": 0.2518950437317784,
      "grad_norm": 0.1757209748029709,
      "kl": 0.0017375946044921875,
      "learning_rate": 1e-06,
      "loss": 0.0351,
      "num_tokens": 17182426.0,
      "reward": 0.5892857313156128,
      "reward_std": 0.1202336996793747,
      "rewards/verify_math_reward/mean": 0.5892857313156128,
      "rewards/verify_math_reward/std": 0.4930652976036072,
      "step": 108
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2962.0,
      "completions/mean_length": 682.0848388671875,
      "completions/mean_terminated_length": 635.7421264648438,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 0.2542274052478134,
      "grad_norm": 0.29106491804122925,
      "kl": 0.0016193389892578125,
      "learning_rate": 1e-06,
      "loss": 0.0085,
      "num_tokens": 17363301.0,
      "reward": 0.4330357313156128,
      "reward_std": 0.24424462020397186,
      "rewards/verify_math_reward/mean": 0.4330357015132904,
      "rewards/verify_math_reward/std": 0.49660524725914,
      "step": 109
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.004464285714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1798.0,
      "completions/mean_length": 519.4285888671875,
      "completions/mean_terminated_length": 503.3901672363281,
      "completions/min_length": 157.0,
      "completions/min_terminated_length": 157.0,
      "epoch": 0.2565597667638484,
      "grad_norm": 0.26929226517677307,
      "kl": 0.0020198822021484375,
      "learning_rate": 1e-06,
      "loss": 0.0284,
      "num_tokens": 17500813.0,
      "reward": 0.5892857313156128,
      "reward_std": 0.22229304909706116,
      "rewards/verify_math_reward/mean": 0.5892857313156128,
      "rewards/verify_math_reward/std": 0.4930652976036072,
      "step": 110
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1765.0,
      "completions/mean_length": 566.1517944335938,
      "completions/mean_terminated_length": 518.2352905273438,
      "completions/min_length": 165.0,
      "completions/min_terminated_length": 165.0,
      "epoch": 0.2588921282798834,
      "grad_norm": 0.19264018535614014,
      "kl": 0.0015964508056640625,
      "learning_rate": 1e-06,
      "loss": -0.0044,
      "num_tokens": 17646687.0,
      "reward": 0.5803571939468384,
      "reward_std": 0.16397422552108765,
      "rewards/verify_math_reward/mean": 0.5803571343421936,
      "rewards/verify_math_reward/std": 0.49460577964782715,
      "step": 111
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1847.0,
      "completions/mean_length": 661.1205444335938,
      "completions/mean_terminated_length": 598.6681518554688,
      "completions/min_length": 185.0,
      "completions/min_terminated_length": 185.0,
      "epoch": 0.2612244897959184,
      "grad_norm": 0.24941198527812958,
      "kl": 0.0016078948974609375,
      "learning_rate": 1e-06,
      "loss": 0.062,
      "num_tokens": 17816466.0,
      "reward": 0.5133928656578064,
      "reward_std": 0.25040388107299805,
      "rewards/verify_math_reward/mean": 0.5133928656578064,
      "rewards/verify_math_reward/std": 0.5009400248527527,
      "step": 112
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3776.0,
      "completions/mean_length": 640.3348388671875,
      "completions/mean_terminated_length": 561.4383544921875,
      "completions/min_length": 195.0,
      "completions/min_terminated_length": 195.0,
      "epoch": 0.26355685131195333,
      "grad_norm": 0.2555276155471802,
      "kl": 0.0020389556884765625,
      "learning_rate": 1e-06,
      "loss": 0.0535,
      "num_tokens": 17979205.0,
      "reward": 0.535714328289032,
      "reward_std": 0.2540072202682495,
      "rewards/verify_math_reward/mean": 0.5357142686843872,
      "rewards/verify_math_reward/std": 0.49983981251716614,
      "step": 113
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2324.0,
      "completions/mean_length": 600.3214721679688,
      "completions/mean_terminated_length": 568.828857421875,
      "completions/min_length": 136.0,
      "completions/min_terminated_length": 136.0,
      "epoch": 0.26588921282798833,
      "grad_norm": 0.21167892217636108,
      "kl": 0.0017452239990234375,
      "learning_rate": 1e-06,
      "loss": 0.0304,
      "num_tokens": 18137877.0,
      "reward": 0.4732142984867096,
      "reward_std": 0.19086988270282745,
      "rewards/verify_math_reward/mean": 0.4732142984867096,
      "rewards/verify_math_reward/std": 0.5004002451896667,
      "step": 114
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 1537.0,
      "completions/max_terminated_length": 1537.0,
      "completions/mean_length": 576.1473388671875,
      "completions/mean_terminated_length": 576.1473388671875,
      "completions/min_length": 137.0,
      "completions/min_terminated_length": 137.0,
      "epoch": 0.26822157434402333,
      "grad_norm": 0.2546173632144928,
      "kl": 0.0017337799072265625,
      "learning_rate": 1e-06,
      "loss": -0.0061,
      "num_tokens": 18293206.0,
      "reward": 0.4330357313156128,
      "reward_std": 0.2455853670835495,
      "rewards/verify_math_reward/mean": 0.4330357015132904,
      "rewards/verify_math_reward/std": 0.49660518765449524,
      "step": 115
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0267857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2655.0,
      "completions/mean_length": 715.8795166015625,
      "completions/mean_terminated_length": 622.8485717773438,
      "completions/min_length": 185.0,
      "completions/min_terminated_length": 185.0,
      "epoch": 0.2705539358600583,
      "grad_norm": 0.2411630004644394,
      "kl": 0.0017833709716796875,
      "learning_rate": 1e-06,
      "loss": 0.0375,
      "num_tokens": 18472771.0,
      "reward": 0.5892857313156128,
      "reward_std": 0.19629742205142975,
      "rewards/verify_math_reward/mean": 0.5892857313156128,
      "rewards/verify_math_reward/std": 0.4930652976036072,
      "step": 116
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2640.0,
      "completions/mean_length": 624.0491333007812,
      "completions/mean_terminated_length": 560.9227294921875,
      "completions/min_length": 167.0,
      "completions/min_terminated_length": 167.0,
      "epoch": 0.27288629737609327,
      "grad_norm": 0.2885005474090576,
      "kl": 0.0020599365234375,
      "learning_rate": 1e-06,
      "loss": 0.0867,
      "num_tokens": 18644870.0,
      "reward": 0.4375000298023224,
      "reward_std": 0.2346104085445404,
      "rewards/verify_math_reward/mean": 0.4375,
      "rewards/verify_math_reward/std": 0.49718940258026123,
      "step": 117
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1629.0,
      "completions/mean_length": 527.6473388671875,
      "completions/mean_terminated_length": 462.7681579589844,
      "completions/min_length": 178.0,
      "completions/min_terminated_length": 178.0,
      "epoch": 0.27521865889212827,
      "grad_norm": 0.24338369071483612,
      "kl": 0.0029697418212890625,
      "learning_rate": 1e-06,
      "loss": 0.0836,
      "num_tokens": 18784303.0,
      "reward": 0.7053571939468384,
      "reward_std": 0.17946264147758484,
      "rewards/verify_math_reward/mean": 0.7053571343421936,
      "rewards/verify_math_reward/std": 0.45690304040908813,
      "step": 118
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1669.0,
      "completions/mean_length": 579.0223388671875,
      "completions/mean_terminated_length": 547.3378295898438,
      "completions/min_length": 182.0,
      "completions/min_terminated_length": 182.0,
      "epoch": 0.27755102040816326,
      "grad_norm": 0.27034732699394226,
      "kl": 0.0016078948974609375,
      "learning_rate": 1e-06,
      "loss": 0.0225,
      "num_tokens": 18935492.0,
      "reward": 0.566964328289032,
      "reward_std": 0.25010165572166443,
      "rewards/verify_math_reward/mean": 0.5669642686843872,
      "rewards/verify_math_reward/std": 0.49660524725914,
      "step": 119
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2419.0,
      "completions/mean_length": 606.669677734375,
      "completions/mean_terminated_length": 575.2342529296875,
      "completions/min_length": 208.0,
      "completions/min_terminated_length": 208.0,
      "epoch": 0.27988338192419826,
      "grad_norm": 0.18903683125972748,
      "kl": 0.0017871856689453125,
      "learning_rate": 1e-06,
      "loss": 0.0183,
      "num_tokens": 19091674.0,
      "reward": 0.5491071939468384,
      "reward_std": 0.14640043675899506,
      "rewards/verify_math_reward/mean": 0.5491071343421936,
      "rewards/verify_math_reward/std": 0.49869707226753235,
      "step": 120
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0267857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2976.0,
      "completions/mean_length": 726.3438110351562,
      "completions/mean_terminated_length": 633.6008911132812,
      "completions/min_length": 188.0,
      "completions/min_terminated_length": 188.0,
      "epoch": 0.28221574344023326,
      "grad_norm": 0.2031654566526413,
      "kl": 0.001880645751953125,
      "learning_rate": 1e-06,
      "loss": 0.0537,
      "num_tokens": 19273375.0,
      "reward": 0.566964328289032,
      "reward_std": 0.18862183392047882,
      "rewards/verify_math_reward/mean": 0.5669642686843872,
      "rewards/verify_math_reward/std": 0.4966052174568176,
      "step": 121
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3289.0,
      "completions/mean_length": 618.388427734375,
      "completions/mean_terminated_length": 571.1810302734375,
      "completions/min_length": 149.0,
      "completions/min_terminated_length": 149.0,
      "epoch": 0.2845481049562682,
      "grad_norm": 0.2487492859363556,
      "kl": 0.001918792724609375,
      "learning_rate": 1e-06,
      "loss": 0.0103,
      "num_tokens": 19432982.0,
      "reward": 0.5758928656578064,
      "reward_std": 0.21282710134983063,
      "rewards/verify_math_reward/mean": 0.5758928656578064,
      "rewards/verify_math_reward/std": 0.4953135550022125,
      "step": 122
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2278.0,
      "completions/mean_length": 732.3482666015625,
      "completions/mean_terminated_length": 655.552490234375,
      "completions/min_length": 171.0,
      "completions/min_terminated_length": 171.0,
      "epoch": 0.2868804664723032,
      "grad_norm": 0.22322975099086761,
      "kl": 0.00141143798828125,
      "learning_rate": 1e-06,
      "loss": -0.0027,
      "num_tokens": 19621012.0,
      "reward": 0.4821428656578064,
      "reward_std": 0.23417532444000244,
      "rewards/verify_math_reward/mean": 0.4821428656578064,
      "rewards/verify_math_reward/std": 0.5008001327514648,
      "step": 123
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.004464285714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1437.0,
      "completions/mean_length": 578.107177734375,
      "completions/mean_terminated_length": 562.3318481445312,
      "completions/min_length": 168.0,
      "completions/min_terminated_length": 168.0,
      "epoch": 0.2892128279883382,
      "grad_norm": 0.21703268587589264,
      "kl": 0.0018138885498046875,
      "learning_rate": 1e-06,
      "loss": -0.0076,
      "num_tokens": 19771316.0,
      "reward": 0.5892857313156128,
      "reward_std": 0.2346104085445404,
      "rewards/verify_math_reward/mean": 0.5892857313156128,
      "rewards/verify_math_reward/std": 0.4930652976036072,
      "step": 124
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2791.0,
      "completions/mean_length": 568.9330444335938,
      "completions/mean_terminated_length": 537.1576538085938,
      "completions/min_length": 118.0,
      "completions/min_terminated_length": 118.0,
      "epoch": 0.2915451895043732,
      "grad_norm": 0.26182636618614197,
      "kl": 0.002227783203125,
      "learning_rate": 1e-06,
      "loss": 0.0242,
      "num_tokens": 19921253.0,
      "reward": 0.625,
      "reward_std": 0.23357194662094116,
      "rewards/verify_math_reward/mean": 0.625,
      "rewards/verify_math_reward/std": 0.4852071702480316,
      "step": 125
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.004464285714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1842.0,
      "completions/mean_length": 543.5625,
      "completions/mean_terminated_length": 527.63232421875,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 0.2938775510204082,
      "grad_norm": 0.3207721412181854,
      "kl": 0.00211334228515625,
      "learning_rate": 1e-06,
      "loss": 0.0397,
      "num_tokens": 20062099.0,
      "reward": 0.6071428656578064,
      "reward_std": 0.30073481798171997,
      "rewards/verify_math_reward/mean": 0.6071428656578064,
      "rewards/verify_math_reward/std": 0.48947930335998535,
      "step": 126
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.004464285714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2994.0,
      "completions/mean_length": 616.7767944335938,
      "completions/mean_terminated_length": 601.1749267578125,
      "completions/min_length": 5.0,
      "completions/min_terminated_length": 5.0,
      "epoch": 0.29620991253644313,
      "grad_norm": 0.21629698574543,
      "kl": 0.002124786376953125,
      "learning_rate": 1e-06,
      "loss": -0.0165,
      "num_tokens": 20223609.0,
      "reward": 0.4642857313156128,
      "reward_std": 0.16683855652809143,
      "rewards/verify_math_reward/mean": 0.4642857015132904,
      "rewards/verify_math_reward/std": 0.49983978271484375,
      "step": 127
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 1894.0,
      "completions/max_terminated_length": 1894.0,
      "completions/mean_length": 605.419677734375,
      "completions/mean_terminated_length": 605.419677734375,
      "completions/min_length": 221.0,
      "completions/min_terminated_length": 221.0,
      "epoch": 0.29854227405247813,
      "grad_norm": 0.2856070101261139,
      "kl": 0.0027828216552734375,
      "learning_rate": 1e-06,
      "loss": 0.0249,
      "num_tokens": 20385127.0,
      "reward": 0.4821428656578064,
      "reward_std": 0.3123260736465454,
      "rewards/verify_math_reward/mean": 0.4821428656578064,
      "rewards/verify_math_reward/std": 0.5008001327514648,
      "step": 128
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 1417.0,
      "completions/max_terminated_length": 1417.0,
      "completions/mean_length": 468.0625305175781,
      "completions/mean_terminated_length": 468.0625305175781,
      "completions/min_length": 165.0,
      "completions/min_terminated_length": 165.0,
      "epoch": 0.3008746355685131,
      "grad_norm": 0.2847056984901428,
      "kl": 0.00231170654296875,
      "learning_rate": 1e-06,
      "loss": 0.0068,
      "num_tokens": 20510829.0,
      "reward": 0.5580357313156128,
      "reward_std": 0.2102695107460022,
      "rewards/verify_math_reward/mean": 0.5580357313156128,
      "rewards/verify_math_reward/std": 0.4977326989173889,
      "step": 129
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3461.0,
      "completions/mean_length": 590.3125,
      "completions/mean_terminated_length": 542.7239990234375,
      "completions/min_length": 8.0,
      "completions/min_terminated_length": 8.0,
      "epoch": 0.3032069970845481,
      "grad_norm": 0.22741061449050903,
      "kl": 0.0025844573974609375,
      "learning_rate": 1e-06,
      "loss": 0.0036,
      "num_tokens": 20662627.0,
      "reward": 0.4598214626312256,
      "reward_std": 0.2215484380722046,
      "rewards/verify_math_reward/mean": 0.4598214328289032,
      "rewards/verify_math_reward/std": 0.49949926137924194,
      "step": 130
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1345.0,
      "completions/mean_length": 511.99554443359375,
      "completions/mean_terminated_length": 479.70721435546875,
      "completions/min_length": 185.0,
      "completions/min_terminated_length": 185.0,
      "epoch": 0.30553935860058307,
      "grad_norm": 0.24569222331047058,
      "kl": 0.002933502197265625,
      "learning_rate": 1e-06,
      "loss": 0.0339,
      "num_tokens": 20795994.0,
      "reward": 0.6651785969734192,
      "reward_std": 0.16818653047084808,
      "rewards/verify_math_reward/mean": 0.6651785969734192,
      "rewards/verify_math_reward/std": 0.4729849100112915,
      "step": 131
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2050.0,
      "completions/mean_length": 615.7366333007812,
      "completions/mean_terminated_length": 536.2785034179688,
      "completions/min_length": 176.0,
      "completions/min_terminated_length": 176.0,
      "epoch": 0.30787172011661806,
      "grad_norm": 0.30736494064331055,
      "kl": 0.0018939971923828125,
      "learning_rate": 1e-06,
      "loss": 0.0255,
      "num_tokens": 20961351.0,
      "reward": 0.4732142984867096,
      "reward_std": 0.24589939415454865,
      "rewards/verify_math_reward/mean": 0.4732142984867096,
      "rewards/verify_math_reward/std": 0.5004002451896667,
      "step": 132
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2477.0,
      "completions/mean_length": 518.5089721679688,
      "completions/mean_terminated_length": 486.279296875,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 0.31020408163265306,
      "grad_norm": 0.27623245120048523,
      "kl": 0.002376556396484375,
      "learning_rate": 1e-06,
      "loss": 0.027,
      "num_tokens": 21097609.0,
      "reward": 0.6428571939468384,
      "reward_std": 0.19013814628124237,
      "rewards/verify_math_reward/mean": 0.6428571343421936,
      "rewards/verify_math_reward/std": 0.48023054003715515,
      "step": 133
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4057.0,
      "completions/mean_length": 599.4777221679688,
      "completions/mean_terminated_length": 552.0136108398438,
      "completions/min_length": 216.0,
      "completions/min_terminated_length": 216.0,
      "epoch": 0.31253644314868806,
      "grad_norm": 0.26248371601104736,
      "kl": 0.0019016265869140625,
      "learning_rate": 1e-06,
      "loss": 0.0744,
      "num_tokens": 21252732.0,
      "reward": 0.5535714626312256,
      "reward_std": 0.2783453166484833,
      "rewards/verify_math_reward/mean": 0.5535714030265808,
      "rewards/verify_math_reward/std": 0.49823519587516785,
      "step": 134
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.004464285714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1794.0,
      "completions/mean_length": 625.3839721679688,
      "completions/mean_terminated_length": 609.8206787109375,
      "completions/min_length": 130.0,
      "completions/min_terminated_length": 130.0,
      "epoch": 0.31486880466472306,
      "grad_norm": 0.24094338715076447,
      "kl": 0.0017375946044921875,
      "learning_rate": 1e-06,
      "loss": 0.0079,
      "num_tokens": 21413170.0,
      "reward": 0.5133928656578064,
      "reward_std": 0.2057577222585678,
      "rewards/verify_math_reward/mean": 0.5133928656578064,
      "rewards/verify_math_reward/std": 0.5009400248527527,
      "step": 135
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.004464285714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1678.0,
      "completions/mean_length": 616.8527221679688,
      "completions/mean_terminated_length": 601.2511596679688,
      "completions/min_length": 211.0,
      "completions/min_terminated_length": 211.0,
      "epoch": 0.317201166180758,
      "grad_norm": 0.2017364650964737,
      "kl": 0.00182342529296875,
      "learning_rate": 1e-06,
      "loss": 0.0144,
      "num_tokens": 21573433.0,
      "reward": 0.4955357313156128,
      "reward_std": 0.16592560708522797,
      "rewards/verify_math_reward/mean": 0.4955357015132904,
      "rewards/verify_math_reward/std": 0.5010998249053955,
      "step": 136
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.004464285714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3052.0,
      "completions/mean_length": 620.1964721679688,
      "completions/mean_terminated_length": 604.6099243164062,
      "completions/min_length": 150.0,
      "completions/min_terminated_length": 150.0,
      "epoch": 0.319533527696793,
      "grad_norm": 0.2938466966152191,
      "kl": 0.0016841888427734375,
      "learning_rate": 1e-06,
      "loss": 0.0037,
      "num_tokens": 21736213.0,
      "reward": 0.5267857313156128,
      "reward_std": 0.2657212018966675,
      "rewards/verify_math_reward/mean": 0.5267857313156128,
      "rewards/verify_math_reward/std": 0.500400185585022,
      "step": 137
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.004464285714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3395.0,
      "completions/mean_length": 572.7232666015625,
      "completions/mean_terminated_length": 556.9237670898438,
      "completions/min_length": 232.0,
      "completions/min_terminated_length": 232.0,
      "epoch": 0.321865889212828,
      "grad_norm": 0.21208110451698303,
      "kl": 0.0015926361083984375,
      "learning_rate": 1e-06,
      "loss": 0.0209,
      "num_tokens": 21885079.0,
      "reward": 0.5892857313156128,
      "reward_std": 0.18397442996501923,
      "rewards/verify_math_reward/mean": 0.5892857313156128,
      "rewards/verify_math_reward/std": 0.4930652976036072,
      "step": 138
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0267857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2273.0,
      "completions/mean_length": 668.8705444335938,
      "completions/mean_terminated_length": 574.5458374023438,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 0.324198250728863,
      "grad_norm": 0.17574279010295868,
      "kl": 0.0015773773193359375,
      "learning_rate": 1e-06,
      "loss": 0.0425,
      "num_tokens": 22054858.0,
      "reward": 0.6830357313156128,
      "reward_std": 0.16006861627101898,
      "rewards/verify_math_reward/mean": 0.6830357313156128,
      "rewards/verify_math_reward/std": 0.4663354754447937,
      "step": 139
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1791.0,
      "completions/mean_length": 630.0625,
      "completions/mean_terminated_length": 550.9314575195312,
      "completions/min_length": 119.0,
      "completions/min_terminated_length": 119.0,
      "epoch": 0.32653061224489793,
      "grad_norm": 0.26903802156448364,
      "kl": 0.0017642974853515625,
      "learning_rate": 1e-06,
      "loss": 0.068,
      "num_tokens": 22215264.0,
      "reward": 0.5,
      "reward_std": 0.2637726068496704,
      "rewards/verify_math_reward/mean": 0.5,
      "rewards/verify_math_reward/std": 0.5011197924613953,
      "step": 140
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1723.0,
      "completions/mean_length": 607.9285888671875,
      "completions/mean_terminated_length": 544.5090942382812,
      "completions/min_length": 216.0,
      "completions/min_terminated_length": 216.0,
      "epoch": 0.32886297376093293,
      "grad_norm": 0.2991168200969696,
      "kl": 0.00200653076171875,
      "learning_rate": 1e-06,
      "loss": 0.0203,
      "num_tokens": 22376632.0,
      "reward": 0.5535714626312256,
      "reward_std": 0.24363845586776733,
      "rewards/verify_math_reward/mean": 0.5535714030265808,
      "rewards/verify_math_reward/std": 0.49823519587516785,
      "step": 141
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3375.0,
      "completions/mean_length": 655.3616333007812,
      "completions/mean_terminated_length": 608.6561279296875,
      "completions/min_length": 160.0,
      "completions/min_terminated_length": 160.0,
      "epoch": 0.33119533527696793,
      "grad_norm": 0.25582826137542725,
      "kl": 0.0018901824951171875,
      "learning_rate": 1e-06,
      "loss": 0.0388,
      "num_tokens": 22544921.0,
      "reward": 0.5401785969734192,
      "reward_std": 0.23326799273490906,
      "rewards/verify_math_reward/mean": 0.5401785969734192,
      "rewards/verify_math_reward/std": 0.49949929118156433,
      "step": 142
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3335.0,
      "completions/mean_length": 616.21875,
      "completions/mean_terminated_length": 584.869384765625,
      "completions/min_length": 185.0,
      "completions/min_terminated_length": 185.0,
      "epoch": 0.3335276967930029,
      "grad_norm": 0.24448157846927643,
      "kl": 0.0015926361083984375,
      "learning_rate": 1e-06,
      "loss": 0.0388,
      "num_tokens": 22703818.0,
      "reward": 0.5625,
      "reward_std": 0.18562637269496918,
      "rewards/verify_math_reward/mean": 0.5625,
      "rewards/verify_math_reward/std": 0.49718940258026123,
      "step": 143
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3484.0,
      "completions/mean_length": 667.4598388671875,
      "completions/mean_terminated_length": 620.9185791015625,
      "completions/min_length": 202.0,
      "completions/min_terminated_length": 202.0,
      "epoch": 0.3358600583090379,
      "grad_norm": 0.2597779631614685,
      "kl": 0.0016498565673828125,
      "learning_rate": 1e-06,
      "loss": 0.0698,
      "num_tokens": 22878609.0,
      "reward": 0.5133928656578064,
      "reward_std": 0.25401005148887634,
      "rewards/verify_math_reward/mean": 0.5133928656578064,
      "rewards/verify_math_reward/std": 0.5009400248527527,
      "step": 144
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3756.0,
      "completions/mean_length": 655.3839721679688,
      "completions/mean_terminated_length": 624.3873901367188,
      "completions/min_length": 156.0,
      "completions/min_terminated_length": 156.0,
      "epoch": 0.33819241982507287,
      "grad_norm": 0.2341889888048172,
      "kl": 0.0018711090087890625,
      "learning_rate": 1e-06,
      "loss": 0.0053,
      "num_tokens": 23045887.0,
      "reward": 0.455357164144516,
      "reward_std": 0.19343754649162292,
      "rewards/verify_math_reward/mean": 0.4553571343421936,
      "rewards/verify_math_reward/std": 0.4991183578968048,
      "step": 145
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.03125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1715.0,
      "completions/mean_length": 602.3660888671875,
      "completions/mean_terminated_length": 489.668212890625,
      "completions/min_length": 183.0,
      "completions/min_terminated_length": 183.0,
      "epoch": 0.34052478134110786,
      "grad_norm": 0.28709134459495544,
      "kl": 0.00226593017578125,
      "learning_rate": 1e-06,
      "loss": 0.0675,
      "num_tokens": 23198337.0,
      "reward": 0.5848214626312256,
      "reward_std": 0.26121222972869873,
      "rewards/verify_math_reward/mean": 0.5848214030265808,
      "rewards/verify_math_reward/std": 0.49385640025138855,
      "step": 146
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.044642857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3598.0,
      "completions/mean_length": 759.6205444335938,
      "completions/mean_terminated_length": 603.7149047851562,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 0.34285714285714286,
      "grad_norm": 0.20393072068691254,
      "kl": 0.0019683837890625,
      "learning_rate": 1e-06,
      "loss": 0.0278,
      "num_tokens": 23391076.0,
      "reward": 0.5133928656578064,
      "reward_std": 0.1720893532037735,
      "rewards/verify_math_reward/mean": 0.5133928656578064,
      "rewards/verify_math_reward/std": 0.5009400248527527,
      "step": 147
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1648.0,
      "completions/mean_length": 521.03125,
      "completions/mean_terminated_length": 472.5022888183594,
      "completions/min_length": 133.0,
      "completions/min_terminated_length": 133.0,
      "epoch": 0.34518950437317786,
      "grad_norm": 0.30667921900749207,
      "kl": 0.00206756591796875,
      "learning_rate": 1e-06,
      "loss": 0.0489,
      "num_tokens": 23523939.0,
      "reward": 0.625,
      "reward_std": 0.23296292126178741,
      "rewards/verify_math_reward/mean": 0.625,
      "rewards/verify_math_reward/std": 0.4852071702480316,
      "step": 148
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3738.0,
      "completions/mean_length": 623.5178833007812,
      "completions/mean_terminated_length": 560.3817749023438,
      "completions/min_length": 154.0,
      "completions/min_terminated_length": 154.0,
      "epoch": 0.34752186588921286,
      "grad_norm": 0.23010538518428802,
      "kl": 0.0018062591552734375,
      "learning_rate": 1e-06,
      "loss": 0.0152,
      "num_tokens": 23689143.0,
      "reward": 0.4642857313156128,
      "reward_std": 0.22289641201496124,
      "rewards/verify_math_reward/mean": 0.4642857015132904,
      "rewards/verify_math_reward/std": 0.49983978271484375,
      "step": 149
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1672.0,
      "completions/mean_length": 587.0803833007812,
      "completions/mean_terminated_length": 539.447998046875,
      "completions/min_length": 151.0,
      "completions/min_terminated_length": 151.0,
      "epoch": 0.3498542274052478,
      "grad_norm": 0.31920623779296875,
      "kl": 0.0019931793212890625,
      "learning_rate": 1e-06,
      "loss": 0.0181,
      "num_tokens": 23841681.0,
      "reward": 0.5267857313156128,
      "reward_std": 0.2663346827030182,
      "rewards/verify_math_reward/mean": 0.5267857313156128,
      "rewards/verify_math_reward/std": 0.500400185585022,
      "step": 150
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1792.0,
      "completions/mean_length": 583.2902221679688,
      "completions/mean_terminated_length": 551.6441650390625,
      "completions/min_length": 170.0,
      "completions/min_terminated_length": 170.0,
      "epoch": 0.3521865889212828,
      "grad_norm": 0.22783829271793365,
      "kl": 0.0017986297607421875,
      "learning_rate": 1e-06,
      "loss": 0.0051,
      "num_tokens": 23994498.0,
      "reward": 0.53125,
      "reward_std": 0.1931336373090744,
      "rewards/verify_math_reward/mean": 0.53125,
      "rewards/verify_math_reward/std": 0.5001401305198669,
      "step": 151
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.044642857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3782.0,
      "completions/mean_length": 762.4152221679688,
      "completions/mean_terminated_length": 606.6401977539062,
      "completions/min_length": 133.0,
      "completions/min_terminated_length": 133.0,
      "epoch": 0.3545189504373178,
      "grad_norm": 0.1953461915254593,
      "kl": 0.0016193389892578125,
      "learning_rate": 1e-06,
      "loss": 0.0531,
      "num_tokens": 24185887.0,
      "reward": 0.6116071939468384,
      "reward_std": 0.18306151032447815,
      "rewards/verify_math_reward/mean": 0.6116071343421936,
      "rewards/verify_math_reward/std": 0.4884762763977051,
      "step": 152
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.004464285714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1827.0,
      "completions/mean_length": 547.544677734375,
      "completions/mean_terminated_length": 531.63232421875,
      "completions/min_length": 51.0,
      "completions/min_terminated_length": 51.0,
      "epoch": 0.3568513119533528,
      "grad_norm": 0.2813705503940582,
      "kl": 0.0021514892578125,
      "learning_rate": 1e-06,
      "loss": 0.0244,
      "num_tokens": 24331713.0,
      "reward": 0.486607164144516,
      "reward_std": 0.2268020212650299,
      "rewards/verify_math_reward/mean": 0.4866071343421936,
      "rewards/verify_math_reward/std": 0.5009400248527527,
      "step": 153
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.03125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2735.0,
      "completions/mean_length": 689.7678833007812,
      "completions/mean_terminated_length": 579.889404296875,
      "completions/min_length": 129.0,
      "completions/min_terminated_length": 129.0,
      "epoch": 0.35918367346938773,
      "grad_norm": 0.23221136629581451,
      "kl": 0.001651763916015625,
      "learning_rate": 1e-06,
      "loss": 0.0972,
      "num_tokens": 24516269.0,
      "reward": 0.5178571939468384,
      "reward_std": 0.2346103936433792,
      "rewards/verify_math_reward/mean": 0.5178571343421936,
      "rewards/verify_math_reward/std": 0.5008001327514648,
      "step": 154
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1829.0,
      "completions/mean_length": 539.294677734375,
      "completions/mean_terminated_length": 507.25225830078125,
      "completions/min_length": 160.0,
      "completions/min_terminated_length": 160.0,
      "epoch": 0.36151603498542273,
      "grad_norm": 0.27546215057373047,
      "kl": 0.00171661376953125,
      "learning_rate": 1e-06,
      "loss": -0.0212,
      "num_tokens": 24655727.0,
      "reward": 0.660714328289032,
      "reward_std": 0.2150852382183075,
      "rewards/verify_math_reward/mean": 0.6607142686843872,
      "rewards/verify_math_reward/std": 0.4745272994041443,
      "step": 155
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1829.0,
      "completions/mean_length": 639.1964721679688,
      "completions/mean_terminated_length": 560.27392578125,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 0.3638483965014577,
      "grad_norm": 0.21887610852718353,
      "kl": 0.0019931793212890625,
      "learning_rate": 1e-06,
      "loss": 0.0176,
      "num_tokens": 24822891.0,
      "reward": 0.5089285969734192,
      "reward_std": 0.2038063406944275,
      "rewards/verify_math_reward/mean": 0.5089285969734192,
      "rewards/verify_math_reward/std": 0.5010399222373962,
      "step": 156
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1685.0,
      "completions/mean_length": 576.6027221679688,
      "completions/mean_terminated_length": 512.6136474609375,
      "completions/min_length": 152.0,
      "completions/min_terminated_length": 152.0,
      "epoch": 0.3661807580174927,
      "grad_norm": 0.24248209595680237,
      "kl": 0.0025463104248046875,
      "learning_rate": 1e-06,
      "loss": 0.0055,
      "num_tokens": 24977922.0,
      "reward": 0.5848214626312256,
      "reward_std": 0.1690966635942459,
      "rewards/verify_math_reward/mean": 0.5848214030265808,
      "rewards/verify_math_reward/std": 0.49385640025138855,
      "step": 157
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1617.0,
      "completions/mean_length": 608.7410888671875,
      "completions/mean_terminated_length": 545.3363647460938,
      "completions/min_length": 150.0,
      "completions/min_terminated_length": 150.0,
      "epoch": 0.3685131195335277,
      "grad_norm": 0.21610601246356964,
      "kl": 0.0019626617431640625,
      "learning_rate": 1e-06,
      "loss": 0.0501,
      "num_tokens": 25141424.0,
      "reward": 0.5401785969734192,
      "reward_std": 0.16804811358451843,
      "rewards/verify_math_reward/mean": 0.5401785969734192,
      "rewards/verify_math_reward/std": 0.49949929118156433,
      "step": 158
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 2173.0,
      "completions/max_terminated_length": 2173.0,
      "completions/mean_length": 554.3527221679688,
      "completions/mean_terminated_length": 554.3527221679688,
      "completions/min_length": 180.0,
      "completions/min_terminated_length": 180.0,
      "epoch": 0.37084548104956266,
      "grad_norm": 0.2853037118911743,
      "kl": 0.002079010009765625,
      "learning_rate": 1e-06,
      "loss": 0.0133,
      "num_tokens": 25286391.0,
      "reward": 0.5535714626312256,
      "reward_std": 0.28707224130630493,
      "rewards/verify_math_reward/mean": 0.5535714030265808,
      "rewards/verify_math_reward/std": 0.49823519587516785,
      "step": 159
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 1519.0,
      "completions/max_terminated_length": 1519.0,
      "completions/mean_length": 564.6830444335938,
      "completions/mean_terminated_length": 564.6830444335938,
      "completions/min_length": 207.0,
      "completions/min_terminated_length": 207.0,
      "epoch": 0.37317784256559766,
      "grad_norm": 0.2480822205543518,
      "kl": 0.0020084381103515625,
      "learning_rate": 1e-06,
      "loss": -0.0062,
      "num_tokens": 25441424.0,
      "reward": 0.4910714626312256,
      "reward_std": 0.23417532444000244,
      "rewards/verify_math_reward/mean": 0.4910714328289032,
      "rewards/verify_math_reward/std": 0.5010399222373962,
      "step": 160
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2044.0,
      "completions/mean_length": 603.9285888671875,
      "completions/mean_terminated_length": 572.468505859375,
      "completions/min_length": 171.0,
      "completions/min_terminated_length": 171.0,
      "epoch": 0.37551020408163266,
      "grad_norm": 0.2697044610977173,
      "kl": 0.0019435882568359375,
      "learning_rate": 1e-06,
      "loss": 0.0112,
      "num_tokens": 25606600.0,
      "reward": 0.424107164144516,
      "reward_std": 0.33350035548210144,
      "rewards/verify_math_reward/mean": 0.4241071343421936,
      "rewards/verify_math_reward/std": 0.4953135550022125,
      "step": 161
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.044642857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4052.0,
      "completions/mean_length": 888.4553833007812,
      "completions/mean_terminated_length": 738.570068359375,
      "completions/min_length": 166.0,
      "completions/min_terminated_length": 166.0,
      "epoch": 0.37784256559766766,
      "grad_norm": 0.21930626034736633,
      "kl": 0.00145721435546875,
      "learning_rate": 1e-06,
      "loss": 0.0089,
      "num_tokens": 25828246.0,
      "reward": 0.4776785969734192,
      "reward_std": 0.17885644733905792,
      "rewards/verify_math_reward/mean": 0.4776785671710968,
      "rewards/verify_math_reward/std": 0.5006202459335327,
      "step": 162
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2992.0,
      "completions/mean_length": 640.8527221679688,
      "completions/mean_terminated_length": 578.0317993164062,
      "completions/min_length": 159.0,
      "completions/min_terminated_length": 159.0,
      "epoch": 0.3801749271137026,
      "grad_norm": 0.20078200101852417,
      "kl": 0.0017642974853515625,
      "learning_rate": 1e-06,
      "loss": 0.0006,
      "num_tokens": 26001229.0,
      "reward": 0.4375000298023224,
      "reward_std": 0.1979493498802185,
      "rewards/verify_math_reward/mean": 0.4375,
      "rewards/verify_math_reward/std": 0.49718940258026123,
      "step": 163
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.004464285714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1622.0,
      "completions/mean_length": 548.919677734375,
      "completions/mean_terminated_length": 533.0134887695312,
      "completions/min_length": 183.0,
      "completions/min_terminated_length": 183.0,
      "epoch": 0.3825072886297376,
      "grad_norm": 0.2016945630311966,
      "kl": 0.0019893646240234375,
      "learning_rate": 1e-06,
      "loss": 0.0092,
      "num_tokens": 26144011.0,
      "reward": 0.6205357313156128,
      "reward_std": 0.17854970693588257,
      "rewards/verify_math_reward/mean": 0.6205357313156128,
      "rewards/verify_math_reward/std": 0.4863404929637909,
      "step": 164
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1564.0,
      "completions/mean_length": 575.2678833007812,
      "completions/mean_terminated_length": 543.549560546875,
      "completions/min_length": 127.0,
      "completions/min_terminated_length": 127.0,
      "epoch": 0.3848396501457726,
      "grad_norm": 0.18787410855293274,
      "kl": 0.002079010009765625,
      "learning_rate": 1e-06,
      "loss": 0.0311,
      "num_tokens": 26298183.0,
      "reward": 0.4419642984867096,
      "reward_std": 0.1489580273628235,
      "rewards/verify_math_reward/mean": 0.4419642984867096,
      "rewards/verify_math_reward/std": 0.4977326989173889,
      "step": 165
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1854.0,
      "completions/mean_length": 598.0223388671875,
      "completions/mean_terminated_length": 550.5385131835938,
      "completions/min_length": 152.0,
      "completions/min_terminated_length": 152.0,
      "epoch": 0.3871720116618076,
      "grad_norm": 0.18036533892154694,
      "kl": 0.0022106170654296875,
      "learning_rate": 1e-06,
      "loss": 0.0026,
      "num_tokens": 26453404.0,
      "reward": 0.5625,
      "reward_std": 0.1441422998905182,
      "rewards/verify_math_reward/mean": 0.5625,
      "rewards/verify_math_reward/std": 0.49718940258026123,
      "step": 166
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 2771.0,
      "completions/max_terminated_length": 2771.0,
      "completions/mean_length": 606.2366333007812,
      "completions/mean_terminated_length": 606.2366333007812,
      "completions/min_length": 162.0,
      "completions/min_terminated_length": 162.0,
      "epoch": 0.3895043731778426,
      "grad_norm": 0.2669314444065094,
      "kl": 0.0019855499267578125,
      "learning_rate": 1e-06,
      "loss": 0.0219,
      "num_tokens": 26612577.0,
      "reward": 0.4776785969734192,
      "reward_std": 0.21478131413459778,
      "rewards/verify_math_reward/mean": 0.4776785671710968,
      "rewards/verify_math_reward/std": 0.5006202459335327,
      "step": 167
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.004464285714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2743.0,
      "completions/mean_length": 523.4598388671875,
      "completions/mean_terminated_length": 507.4394836425781,
      "completions/min_length": 165.0,
      "completions/min_terminated_length": 165.0,
      "epoch": 0.39183673469387753,
      "grad_norm": 0.25433677434921265,
      "kl": 0.002719879150390625,
      "learning_rate": 1e-06,
      "loss": 0.014,
      "num_tokens": 26751080.0,
      "reward": 0.6651785969734192,
      "reward_std": 0.19117942452430725,
      "rewards/verify_math_reward/mean": 0.6651785969734192,
      "rewards/verify_math_reward/std": 0.4729849696159363,
      "step": 168
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0357142857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2245.0,
      "completions/mean_length": 666.044677734375,
      "completions/mean_terminated_length": 539.00927734375,
      "completions/min_length": 168.0,
      "completions/min_terminated_length": 168.0,
      "epoch": 0.39416909620991253,
      "grad_norm": 0.23573501408100128,
      "kl": 0.0018463134765625,
      "learning_rate": 1e-06,
      "loss": 0.064,
      "num_tokens": 26919794.0,
      "reward": 0.5178571939468384,
      "reward_std": 0.20667067170143127,
      "rewards/verify_math_reward/mean": 0.5178571343421936,
      "rewards/verify_math_reward/std": 0.5008001327514648,
      "step": 169
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1729.0,
      "completions/mean_length": 573.107177734375,
      "completions/mean_terminated_length": 541.369384765625,
      "completions/min_length": 181.0,
      "completions/min_terminated_length": 181.0,
      "epoch": 0.3965014577259475,
      "grad_norm": 0.21281416714191437,
      "kl": 0.002506256103515625,
      "learning_rate": 1e-06,
      "loss": 0.0269,
      "num_tokens": 27068994.0,
      "reward": 0.5803571939468384,
      "reward_std": 0.17976489663124084,
      "rewards/verify_math_reward/mean": 0.5803571343421936,
      "rewards/verify_math_reward/std": 0.49460574984550476,
      "step": 170
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2479.0,
      "completions/mean_length": 611.1607666015625,
      "completions/mean_terminated_length": 563.855224609375,
      "completions/min_length": 173.0,
      "completions/min_terminated_length": 173.0,
      "epoch": 0.3988338192419825,
      "grad_norm": 0.2572023570537567,
      "kl": 0.002071380615234375,
      "learning_rate": 1e-06,
      "loss": 0.0386,
      "num_tokens": 27228854.0,
      "reward": 0.5848214626312256,
      "reward_std": 0.2900620996952057,
      "rewards/verify_math_reward/mean": 0.5848214030265808,
      "rewards/verify_math_reward/std": 0.49385640025138855,
      "step": 171
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1588.0,
      "completions/mean_length": 559.3125,
      "completions/mean_terminated_length": 527.450439453125,
      "completions/min_length": 214.0,
      "completions/min_terminated_length": 214.0,
      "epoch": 0.40116618075801747,
      "grad_norm": 0.25404220819473267,
      "kl": 0.002285003662109375,
      "learning_rate": 1e-06,
      "loss": 0.0438,
      "num_tokens": 27376972.0,
      "reward": 0.5848214626312256,
      "reward_std": 0.2592580318450928,
      "rewards/verify_math_reward/mean": 0.5848214030265808,
      "rewards/verify_math_reward/std": 0.49385640025138855,
      "step": 172
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3019.0,
      "completions/mean_length": 540.6741333007812,
      "completions/mean_terminated_length": 492.4117736816406,
      "completions/min_length": 132.0,
      "completions/min_terminated_length": 132.0,
      "epoch": 0.40349854227405246,
      "grad_norm": 0.24391190707683563,
      "kl": 0.0030002593994140625,
      "learning_rate": 1e-06,
      "loss": 0.0295,
      "num_tokens": 27521619.0,
      "reward": 0.6428571939468384,
      "reward_std": 0.18623536825180054,
      "rewards/verify_math_reward/mean": 0.6428571343421936,
      "rewards/verify_math_reward/std": 0.48023054003715515,
      "step": 173
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1426.0,
      "completions/mean_length": 542.6964721679688,
      "completions/mean_terminated_length": 494.4615478515625,
      "completions/min_length": 173.0,
      "completions/min_terminated_length": 173.0,
      "epoch": 0.40583090379008746,
      "grad_norm": 0.28649452328681946,
      "kl": 0.00246429443359375,
      "learning_rate": 1e-06,
      "loss": 0.0162,
      "num_tokens": 27665023.0,
      "reward": 0.5758928656578064,
      "reward_std": 0.263334721326828,
      "rewards/verify_math_reward/mean": 0.5758928656578064,
      "rewards/verify_math_reward/std": 0.4953135550022125,
      "step": 174
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3279.0,
      "completions/mean_length": 534.6964721679688,
      "completions/mean_terminated_length": 502.61260986328125,
      "completions/min_length": 4.0,
      "completions/min_terminated_length": 4.0,
      "epoch": 0.40816326530612246,
      "grad_norm": 0.2814836800098419,
      "kl": 0.00211334228515625,
      "learning_rate": 1e-06,
      "loss": 0.0311,
      "num_tokens": 27804099.0,
      "reward": 0.5446428656578064,
      "reward_std": 0.20966331660747528,
      "rewards/verify_math_reward/mean": 0.5446428656578064,
      "rewards/verify_math_reward/std": 0.4991183578968048,
      "step": 175
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1450.0,
      "completions/mean_length": 538.982177734375,
      "completions/mean_terminated_length": 506.93695068359375,
      "completions/min_length": 130.0,
      "completions/min_terminated_length": 130.0,
      "epoch": 0.41049562682215746,
      "grad_norm": 0.27930331230163574,
      "kl": 0.003086090087890625,
      "learning_rate": 1e-06,
      "loss": 0.0455,
      "num_tokens": 27946343.0,
      "reward": 0.629464328289032,
      "reward_std": 0.257912814617157,
      "rewards/verify_math_reward/mean": 0.6294642686843872,
      "rewards/verify_math_reward/std": 0.4840298891067505,
      "step": 176
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3200.0,
      "completions/mean_length": 567.2142944335938,
      "completions/mean_terminated_length": 535.4234619140625,
      "completions/min_length": 135.0,
      "completions/min_terminated_length": 135.0,
      "epoch": 0.4128279883381924,
      "grad_norm": 0.22507323324680328,
      "kl": 0.002315521240234375,
      "learning_rate": 1e-06,
      "loss": 0.0272,
      "num_tokens": 28090431.0,
      "reward": 0.6339285969734192,
      "reward_std": 0.23205281794071198,
      "rewards/verify_math_reward/mean": 0.6339285969734192,
      "rewards/verify_math_reward/std": 0.4828082025051117,
      "step": 177
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2344.0,
      "completions/mean_length": 650.0267944335938,
      "completions/mean_terminated_length": 587.3726806640625,
      "completions/min_length": 166.0,
      "completions/min_terminated_length": 166.0,
      "epoch": 0.4151603498542274,
      "grad_norm": 0.20785152912139893,
      "kl": 0.002262115478515625,
      "learning_rate": 1e-06,
      "loss": 0.0516,
      "num_tokens": 28262613.0,
      "reward": 0.6116071939468384,
      "reward_std": 0.163971409201622,
      "rewards/verify_math_reward/mean": 0.6116071343421936,
      "rewards/verify_math_reward/std": 0.4884762465953827,
      "step": 178
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1666.0,
      "completions/mean_length": 614.375,
      "completions/mean_terminated_length": 534.8858032226562,
      "completions/min_length": 185.0,
      "completions/min_terminated_length": 185.0,
      "epoch": 0.4174927113702624,
      "grad_norm": 0.24259614944458008,
      "kl": 0.0018787384033203125,
      "learning_rate": 1e-06,
      "loss": 0.0173,
      "num_tokens": 28429033.0,
      "reward": 0.5267857313156128,
      "reward_std": 0.23942892253398895,
      "rewards/verify_math_reward/mean": 0.5267857313156128,
      "rewards/verify_math_reward/std": 0.500400185585022,
      "step": 179
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1984.0,
      "completions/mean_length": 603.3035888671875,
      "completions/mean_terminated_length": 571.8378295898438,
      "completions/min_length": 128.0,
      "completions/min_terminated_length": 128.0,
      "epoch": 0.4198250728862974,
      "grad_norm": 0.2416960448026657,
      "kl": 0.002521514892578125,
      "learning_rate": 1e-06,
      "loss": 0.0146,
      "num_tokens": 28585157.0,
      "reward": 0.5758928656578064,
      "reward_std": 0.22558964788913727,
      "rewards/verify_math_reward/mean": 0.5758928656578064,
      "rewards/verify_math_reward/std": 0.4953135550022125,
      "step": 180
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1781.0,
      "completions/mean_length": 677.4420166015625,
      "completions/mean_terminated_length": 615.2863159179688,
      "completions/min_length": 156.0,
      "completions/min_terminated_length": 156.0,
      "epoch": 0.4221574344023324,
      "grad_norm": 0.20833344757556915,
      "kl": 0.002132415771484375,
      "learning_rate": 1e-06,
      "loss": 0.0185,
      "num_tokens": 28758456.0,
      "reward": 0.5178571939468384,
      "reward_std": 0.1979493498802185,
      "rewards/verify_math_reward/mean": 0.5178571343421936,
      "rewards/verify_math_reward/std": 0.5008001327514648,
      "step": 181
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1681.0,
      "completions/mean_length": 615.7678833007812,
      "completions/mean_terminated_length": 584.4144287109375,
      "completions/min_length": 164.0,
      "completions/min_terminated_length": 164.0,
      "epoch": 0.42448979591836733,
      "grad_norm": 0.21140404045581818,
      "kl": 0.00281524658203125,
      "learning_rate": 1e-06,
      "loss": 0.0143,
      "num_tokens": 28918340.0,
      "reward": 0.5133928656578064,
      "reward_std": 0.17946544289588928,
      "rewards/verify_math_reward/mean": 0.5133928656578064,
      "rewards/verify_math_reward/std": 0.5009400248527527,
      "step": 182
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1902.0,
      "completions/mean_length": 633.3795166015625,
      "completions/mean_terminated_length": 586.3756103515625,
      "completions/min_length": 176.0,
      "completions/min_terminated_length": 176.0,
      "epoch": 0.4268221574344023,
      "grad_norm": 0.25119203329086304,
      "kl": 0.002170562744140625,
      "learning_rate": 1e-06,
      "loss": 0.0742,
      "num_tokens": 29083001.0,
      "reward": 0.6160714626312256,
      "reward_std": 0.2501044273376465,
      "rewards/verify_math_reward/mean": 0.6160714030265808,
      "rewards/verify_math_reward/std": 0.48743006587028503,
      "step": 183
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.004464285714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2133.0,
      "completions/mean_length": 603.71875,
      "completions/mean_terminated_length": 588.058349609375,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 0.4291545189504373,
      "grad_norm": 0.21964031457901,
      "kl": 0.0020694732666015625,
      "learning_rate": 1e-06,
      "loss": 0.0229,
      "num_tokens": 29240394.0,
      "reward": 0.5803571939468384,
      "reward_std": 0.1956884115934372,
      "rewards/verify_math_reward/mean": 0.5803571343421936,
      "rewards/verify_math_reward/std": 0.49460574984550476,
      "step": 184
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.03125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2410.0,
      "completions/mean_length": 815.8527221679688,
      "completions/mean_terminated_length": 710.04150390625,
      "completions/min_length": 117.0,
      "completions/min_terminated_length": 117.0,
      "epoch": 0.4314868804664723,
      "grad_norm": 0.1969069242477417,
      "kl": 0.0018215179443359375,
      "learning_rate": 1e-06,
      "loss": 0.0323,
      "num_tokens": 29445745.0,
      "reward": 0.4687500298023224,
      "reward_std": 0.21057625114917755,
      "rewards/verify_math_reward/mean": 0.46875,
      "rewards/verify_math_reward/std": 0.5001401305198669,
      "step": 185
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.004464285714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3714.0,
      "completions/mean_length": 531.6607666015625,
      "completions/mean_terminated_length": 515.6771850585938,
      "completions/min_length": 165.0,
      "completions/min_terminated_length": 165.0,
      "epoch": 0.43381924198250726,
      "grad_norm": 0.3044220209121704,
      "kl": 0.00237274169921875,
      "learning_rate": 1e-06,
      "loss": 0.0116,
      "num_tokens": 29582717.0,
      "reward": 0.5089285969734192,
      "reward_std": 0.24363845586776733,
      "rewards/verify_math_reward/mean": 0.5089285969734192,
      "rewards/verify_math_reward/std": 0.5010399222373962,
      "step": 186
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3589.0,
      "completions/mean_length": 565.0491333007812,
      "completions/mean_terminated_length": 533.23876953125,
      "completions/min_length": 167.0,
      "completions/min_terminated_length": 167.0,
      "epoch": 0.43615160349854226,
      "grad_norm": 0.25785499811172485,
      "kl": 0.002742767333984375,
      "learning_rate": 1e-06,
      "loss": 0.0574,
      "num_tokens": 29727992.0,
      "reward": 0.6473214626312256,
      "reward_std": 0.23521658778190613,
      "rewards/verify_math_reward/mean": 0.6473214030265808,
      "rewards/verify_math_reward/std": 0.4788738489151001,
      "step": 187
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1612.0,
      "completions/mean_length": 661.5535888671875,
      "completions/mean_terminated_length": 630.6126098632812,
      "completions/min_length": 153.0,
      "completions/min_terminated_length": 153.0,
      "epoch": 0.43848396501457726,
      "grad_norm": 0.2619020342826843,
      "kl": 0.002166748046875,
      "learning_rate": 1e-06,
      "loss": 0.0682,
      "num_tokens": 29901892.0,
      "reward": 0.5267857313156128,
      "reward_std": 0.2770000994205475,
      "rewards/verify_math_reward/mean": 0.5267857313156128,
      "rewards/verify_math_reward/std": 0.500400185585022,
      "step": 188
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2728.0,
      "completions/mean_length": 694.2053833007812,
      "completions/mean_terminated_length": 616.5387573242188,
      "completions/min_length": 194.0,
      "completions/min_terminated_length": 194.0,
      "epoch": 0.44081632653061226,
      "grad_norm": 0.20431652665138245,
      "kl": 0.0028629302978515625,
      "learning_rate": 1e-06,
      "loss": -0.0025,
      "num_tokens": 30079418.0,
      "reward": 0.4910714626312256,
      "reward_std": 0.17269553244113922,
      "rewards/verify_math_reward/mean": 0.4910714328289032,
      "rewards/verify_math_reward/std": 0.5010399222373962,
      "step": 189
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2207.0,
      "completions/mean_length": 583.53125,
      "completions/mean_terminated_length": 551.8873901367188,
      "completions/min_length": 179.0,
      "completions/min_terminated_length": 179.0,
      "epoch": 0.44314868804664725,
      "grad_norm": 0.2757954001426697,
      "kl": 0.00231170654296875,
      "learning_rate": 1e-06,
      "loss": -0.0136,
      "num_tokens": 30236433.0,
      "reward": 0.5580357313156128,
      "reward_std": 0.1892252266407013,
      "rewards/verify_math_reward/mean": 0.5580357313156128,
      "rewards/verify_math_reward/std": 0.4977326989173889,
      "step": 190
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 1320.0,
      "completions/max_terminated_length": 1320.0,
      "completions/mean_length": 480.9285888671875,
      "completions/mean_terminated_length": 480.9285888671875,
      "completions/min_length": 115.0,
      "completions/min_terminated_length": 115.0,
      "epoch": 0.4454810495626822,
      "grad_norm": 0.23335270583629608,
      "kl": 0.00286102294921875,
      "learning_rate": 1e-06,
      "loss": 0.0041,
      "num_tokens": 30371673.0,
      "reward": 0.6026785969734192,
      "reward_std": 0.16878993809223175,
      "rewards/verify_math_reward/mean": 0.6026785969734192,
      "rewards/verify_math_reward/std": 0.4904395341873169,
      "step": 191
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2766.0,
      "completions/mean_length": 605.794677734375,
      "completions/mean_terminated_length": 574.3513793945312,
      "completions/min_length": 123.0,
      "completions/min_terminated_length": 123.0,
      "epoch": 0.4478134110787172,
      "grad_norm": 0.25478288531303406,
      "kl": 0.002330780029296875,
      "learning_rate": 1e-06,
      "loss": 0.0411,
      "num_tokens": 30526179.0,
      "reward": 0.5178571939468384,
      "reward_std": 0.22545400261878967,
      "rewards/verify_math_reward/mean": 0.5178571343421936,
      "rewards/verify_math_reward/std": 0.5008001327514648,
      "step": 192
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2360.0,
      "completions/mean_length": 714.9107666015625,
      "completions/mean_terminated_length": 637.7168579101562,
      "completions/min_length": 180.0,
      "completions/min_terminated_length": 180.0,
      "epoch": 0.4501457725947522,
      "grad_norm": 0.20080216228961945,
      "kl": 0.001979827880859375,
      "learning_rate": 1e-06,
      "loss": 0.0239,
      "num_tokens": 30708671.0,
      "reward": 0.3705357313156128,
      "reward_std": 0.20801867544651031,
      "rewards/verify_math_reward/mean": 0.3705357015132904,
      "rewards/verify_math_reward/std": 0.4840298593044281,
      "step": 193
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1717.0,
      "completions/mean_length": 679.46875,
      "completions/mean_terminated_length": 633.0905151367188,
      "completions/min_length": 184.0,
      "completions/min_terminated_length": 184.0,
      "epoch": 0.4524781341107872,
      "grad_norm": 0.23196423053741455,
      "kl": 0.001995086669921875,
      "learning_rate": 1e-06,
      "loss": 0.0736,
      "num_tokens": 30885216.0,
      "reward": 0.486607164144516,
      "reward_std": 0.257912814617157,
      "rewards/verify_math_reward/mean": 0.4866071343421936,
      "rewards/verify_math_reward/std": 0.5009400248527527,
      "step": 194
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 1854.0,
      "completions/max_terminated_length": 1854.0,
      "completions/mean_length": 497.83929443359375,
      "completions/mean_terminated_length": 497.83929443359375,
      "completions/min_length": 165.0,
      "completions/min_terminated_length": 165.0,
      "epoch": 0.45481049562682213,
      "grad_norm": 0.2476566582918167,
      "kl": 0.0028533935546875,
      "learning_rate": 1e-06,
      "loss": 0.0104,
      "num_tokens": 31013244.0,
      "reward": 0.6160714626312256,
      "reward_std": 0.1827620565891266,
      "rewards/verify_math_reward/mean": 0.6160714030265808,
      "rewards/verify_math_reward/std": 0.48743006587028503,
      "step": 195
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 3214.0,
      "completions/max_terminated_length": 3214.0,
      "completions/mean_length": 527.8303833007812,
      "completions/mean_terminated_length": 527.8303833007812,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 0.45714285714285713,
      "grad_norm": 0.2752782702445984,
      "kl": 0.002559661865234375,
      "learning_rate": 1e-06,
      "loss": 0.0075,
      "num_tokens": 31151286.0,
      "reward": 0.5,
      "reward_std": 0.21387286484241486,
      "rewards/verify_math_reward/mean": 0.5,
      "rewards/verify_math_reward/std": 0.5011197924613953,
      "step": 196
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1439.0,
      "completions/mean_length": 556.5402221679688,
      "completions/mean_terminated_length": 492.18634033203125,
      "completions/min_length": 16.0,
      "completions/min_terminated_length": 16.0,
      "epoch": 0.4594752186588921,
      "grad_norm": 0.2352851927280426,
      "kl": 0.00229644775390625,
      "learning_rate": 1e-06,
      "loss": 0.0283,
      "num_tokens": 31306103.0,
      "reward": 0.53125,
      "reward_std": 0.21117517352104187,
      "rewards/verify_math_reward/mean": 0.53125,
      "rewards/verify_math_reward/std": 0.5001401305198669,
      "step": 197
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0401785714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1932.0,
      "completions/mean_length": 682.6473388671875,
      "completions/mean_terminated_length": 539.7628173828125,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 0.4618075801749271,
      "grad_norm": 0.24791188538074493,
      "kl": 0.0019893646240234375,
      "learning_rate": 1e-06,
      "loss": 0.0065,
      "num_tokens": 31483240.0,
      "reward": 0.5401785969734192,
      "reward_std": 0.2086220532655716,
      "rewards/verify_math_reward/mean": 0.5401785969734192,
      "rewards/verify_math_reward/std": 0.49949929118156433,
      "step": 198
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.03125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3958.0,
      "completions/mean_length": 726.6116333007812,
      "completions/mean_terminated_length": 617.921630859375,
      "completions/min_length": 150.0,
      "completions/min_terminated_length": 150.0,
      "epoch": 0.4641399416909621,
      "grad_norm": 0.1901516169309616,
      "kl": 0.0020084381103515625,
      "learning_rate": 1e-06,
      "loss": 0.039,
      "num_tokens": 31671865.0,
      "reward": 0.4598214626312256,
      "reward_std": 0.19239181280136108,
      "rewards/verify_math_reward/mean": 0.4598214328289032,
      "rewards/verify_math_reward/std": 0.49949926137924194,
      "step": 199
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0401785714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2103.0,
      "completions/mean_length": 683.0580444335938,
      "completions/mean_terminated_length": 540.190673828125,
      "completions/min_length": 153.0,
      "completions/min_terminated_length": 153.0,
      "epoch": 0.46647230320699706,
      "grad_norm": 0.26182863116264343,
      "kl": 0.002254486083984375,
      "learning_rate": 1e-06,
      "loss": 0.0497,
      "num_tokens": 31847166.0,
      "reward": 0.59375,
      "reward_std": 0.2502654790878296,
      "rewards/verify_math_reward/mean": 0.59375,
      "rewards/verify_math_reward/std": 0.4922322630882263,
      "step": 200
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1982.0,
      "completions/mean_length": 587.3482666015625,
      "completions/mean_terminated_length": 555.73876953125,
      "completions/min_length": 68.0,
      "completions/min_terminated_length": 68.0,
      "epoch": 0.46880466472303206,
      "grad_norm": 0.28166404366493225,
      "kl": 0.00238037109375,
      "learning_rate": 1e-06,
      "loss": 0.0467,
      "num_tokens": 32003524.0,
      "reward": 0.5580357313156128,
      "reward_std": 0.26121222972869873,
      "rewards/verify_math_reward/mean": 0.5580357313156128,
      "rewards/verify_math_reward/std": 0.4977326989173889,
      "step": 201
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2860.0,
      "completions/mean_length": 632.5714721679688,
      "completions/mean_terminated_length": 601.369384765625,
      "completions/min_length": 143.0,
      "completions/min_terminated_length": 143.0,
      "epoch": 0.47113702623906706,
      "grad_norm": 0.22461578249931335,
      "kl": 0.00252532958984375,
      "learning_rate": 1e-06,
      "loss": 0.0138,
      "num_tokens": 32163052.0,
      "reward": 0.5803571939468384,
      "reward_std": 0.19178561866283417,
      "rewards/verify_math_reward/mean": 0.5803571343421936,
      "rewards/verify_math_reward/std": 0.49460574984550476,
      "step": 202
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0357142857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2833.0,
      "completions/mean_length": 701.357177734375,
      "completions/mean_terminated_length": 575.629638671875,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 0.47346938775510206,
      "grad_norm": 0.21722613275051117,
      "kl": 0.0023365020751953125,
      "learning_rate": 1e-06,
      "loss": -0.0027,
      "num_tokens": 32341532.0,
      "reward": 0.5892857313156128,
      "reward_std": 0.18081346154212952,
      "rewards/verify_math_reward/mean": 0.5892857313156128,
      "rewards/verify_math_reward/std": 0.4930652976036072,
      "step": 203
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0267857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1556.0,
      "completions/mean_length": 630.3125,
      "completions/mean_terminated_length": 534.9265747070312,
      "completions/min_length": 129.0,
      "completions/min_terminated_length": 129.0,
      "epoch": 0.47580174927113705,
      "grad_norm": 0.24759508669376373,
      "kl": 0.002536773681640625,
      "learning_rate": 1e-06,
      "loss": 0.0162,
      "num_tokens": 32499930.0,
      "reward": 0.65625,
      "reward_std": 0.22350260615348816,
      "rewards/verify_math_reward/mean": 0.65625,
      "rewards/verify_math_reward/std": 0.4760226309299469,
      "step": 204
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.03125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2909.0,
      "completions/mean_length": 650.1875,
      "completions/mean_terminated_length": 539.0322875976562,
      "completions/min_length": 166.0,
      "completions/min_terminated_length": 166.0,
      "epoch": 0.478134110787172,
      "grad_norm": 0.22896498441696167,
      "kl": 0.0021572113037109375,
      "learning_rate": 1e-06,
      "loss": 0.1047,
      "num_tokens": 32662868.0,
      "reward": 0.7500000596046448,
      "reward_std": 0.22289641201496124,
      "rewards/verify_math_reward/mean": 0.75,
      "rewards/verify_math_reward/std": 0.4339824914932251,
      "step": 205
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2468.0,
      "completions/mean_length": 553.357177734375,
      "completions/mean_terminated_length": 521.4414672851562,
      "completions/min_length": 105.0,
      "completions/min_terminated_length": 105.0,
      "epoch": 0.480466472303207,
      "grad_norm": 0.25822311639785767,
      "kl": 0.00254058837890625,
      "learning_rate": 1e-06,
      "loss": -0.0033,
      "num_tokens": 32805700.0,
      "reward": 0.5625,
      "reward_std": 0.21222089231014252,
      "rewards/verify_math_reward/mean": 0.5625,
      "rewards/verify_math_reward/std": 0.49718940258026123,
      "step": 206
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.004464285714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2398.0,
      "completions/mean_length": 534.2857666015625,
      "completions/mean_terminated_length": 518.3139038085938,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 0.482798833819242,
      "grad_norm": 0.27964499592781067,
      "kl": 0.002460479736328125,
      "learning_rate": 1e-06,
      "loss": 0.0084,
      "num_tokens": 32949964.0,
      "reward": 0.5848214626312256,
      "reward_std": 0.1318221390247345,
      "rewards/verify_math_reward/mean": 0.5848214030265808,
      "rewards/verify_math_reward/std": 0.49385640025138855,
      "step": 207
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0267857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1947.0,
      "completions/mean_length": 671.84375,
      "completions/mean_terminated_length": 577.6008911132812,
      "completions/min_length": 185.0,
      "completions/min_terminated_length": 185.0,
      "epoch": 0.485131195335277,
      "grad_norm": 0.25014668703079224,
      "kl": 0.002231597900390625,
      "learning_rate": 1e-06,
      "loss": 0.0764,
      "num_tokens": 33119385.0,
      "reward": 0.629464328289032,
      "reward_std": 0.17720451951026917,
      "rewards/verify_math_reward/mean": 0.6294642686843872,
      "rewards/verify_math_reward/std": 0.4840298891067505,
      "step": 208
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1950.0,
      "completions/mean_length": 569.732177734375,
      "completions/mean_terminated_length": 537.9639892578125,
      "completions/min_length": 152.0,
      "completions/min_terminated_length": 152.0,
      "epoch": 0.48746355685131193,
      "grad_norm": 0.23354767262935638,
      "kl": 0.0025787353515625,
      "learning_rate": 1e-06,
      "loss": 0.0271,
      "num_tokens": 33268325.0,
      "reward": 0.59375,
      "reward_std": 0.22558963298797607,
      "rewards/verify_math_reward/mean": 0.59375,
      "rewards/verify_math_reward/std": 0.4922322630882263,
      "step": 209
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2308.0,
      "completions/mean_length": 630.3928833007812,
      "completions/mean_terminated_length": 567.3817749023438,
      "completions/min_length": 145.0,
      "completions/min_terminated_length": 145.0,
      "epoch": 0.4897959183673469,
      "grad_norm": 0.2099696546792984,
      "kl": 0.00211334228515625,
      "learning_rate": 1e-06,
      "loss": 0.0263,
      "num_tokens": 33431557.0,
      "reward": 0.5223214626312256,
      "reward_std": 0.20996278524398804,
      "rewards/verify_math_reward/mean": 0.5223214030265808,
      "rewards/verify_math_reward/std": 0.5006202459335327,
      "step": 210
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2087.0,
      "completions/mean_length": 561.6027221679688,
      "completions/mean_terminated_length": 513.6244506835938,
      "completions/min_length": 127.0,
      "completions/min_terminated_length": 127.0,
      "epoch": 0.4921282798833819,
      "grad_norm": 0.2531992793083191,
      "kl": 0.0027008056640625,
      "learning_rate": 1e-06,
      "loss": 0.0132,
      "num_tokens": 33577524.0,
      "reward": 0.59375,
      "reward_std": 0.20410579442977905,
      "rewards/verify_math_reward/mean": 0.59375,
      "rewards/verify_math_reward/std": 0.4922322630882263,
      "step": 211
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3648.0,
      "completions/mean_length": 606.0045166015625,
      "completions/mean_terminated_length": 542.5499877929688,
      "completions/min_length": 183.0,
      "completions/min_terminated_length": 183.0,
      "epoch": 0.4944606413994169,
      "grad_norm": 0.25219160318374634,
      "kl": 0.002231597900390625,
      "learning_rate": 1e-06,
      "loss": 0.0145,
      "num_tokens": 33735509.0,
      "reward": 0.535714328289032,
      "reward_std": 0.18067501485347748,
      "rewards/verify_math_reward/mean": 0.5357142686843872,
      "rewards/verify_math_reward/std": 0.49983981251716614,
      "step": 212
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.004464285714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1646.0,
      "completions/mean_length": 456.4910888671875,
      "completions/mean_terminated_length": 440.1704406738281,
      "completions/min_length": 107.0,
      "completions/min_terminated_length": 107.0,
      "epoch": 0.4967930029154519,
      "grad_norm": 0.30093586444854736,
      "kl": 0.002838134765625,
      "learning_rate": 1e-06,
      "loss": 0.0323,
      "num_tokens": 33856723.0,
      "reward": 0.5848214626312256,
      "reward_std": 0.25687432289123535,
      "rewards/verify_math_reward/mean": 0.5848214030265808,
      "rewards/verify_math_reward/std": 0.49385640025138855,
      "step": 213
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 1414.0,
      "completions/max_terminated_length": 1414.0,
      "completions/mean_length": 526.7678833007812,
      "completions/mean_terminated_length": 526.7678833007812,
      "completions/min_length": 149.0,
      "completions/min_terminated_length": 149.0,
      "epoch": 0.49912536443148686,
      "grad_norm": 0.26041379570961,
      "kl": 0.0023651123046875,
      "learning_rate": 1e-06,
      "loss": -0.0016,
      "num_tokens": 33997551.0,
      "reward": 0.6071428656578064,
      "reward_std": 0.19209234416484833,
      "rewards/verify_math_reward/mean": 0.6071428656578064,
      "rewards/verify_math_reward/std": 0.48947933316230774,
      "step": 214
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 1449.0,
      "completions/max_terminated_length": 1449.0,
      "completions/mean_length": 480.7857360839844,
      "completions/mean_terminated_length": 480.7857360839844,
      "completions/min_length": 174.0,
      "completions/min_terminated_length": 174.0,
      "epoch": 0.5014577259475219,
      "grad_norm": 0.2899254858493805,
      "kl": 0.002529144287109375,
      "learning_rate": 1e-06,
      "loss": 0.0328,
      "num_tokens": 34133663.0,
      "reward": 0.566964328289032,
      "reward_std": 0.257912814617157,
      "rewards/verify_math_reward/mean": 0.5669642686843872,
      "rewards/verify_math_reward/std": 0.49660524725914,
      "step": 215
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2382.0,
      "completions/mean_length": 589.6875,
      "completions/mean_terminated_length": 525.9363403320312,
      "completions/min_length": 162.0,
      "completions/min_terminated_length": 162.0,
      "epoch": 0.5037900874635568,
      "grad_norm": 0.2395540475845337,
      "kl": 0.002971649169921875,
      "learning_rate": 1e-06,
      "loss": -0.0118,
      "num_tokens": 34285385.0,
      "reward": 0.5401785969734192,
      "reward_std": 0.18909241259098053,
      "rewards/verify_math_reward/mean": 0.5401785969734192,
      "rewards/verify_math_reward/std": 0.49949926137924194,
      "step": 216
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1685.0,
      "completions/mean_length": 560.075927734375,
      "completions/mean_terminated_length": 528.220703125,
      "completions/min_length": 181.0,
      "completions/min_terminated_length": 181.0,
      "epoch": 0.5061224489795918,
      "grad_norm": 0.2749766707420349,
      "kl": 0.00225067138671875,
      "learning_rate": 1e-06,
      "loss": 0.0425,
      "num_tokens": 34435378.0,
      "reward": 0.6026785969734192,
      "reward_std": 0.19990073144435883,
      "rewards/verify_math_reward/mean": 0.6026785969734192,
      "rewards/verify_math_reward/std": 0.4904395341873169,
      "step": 217
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 3585.0,
      "completions/max_terminated_length": 3585.0,
      "completions/mean_length": 498.8214416503906,
      "completions/mean_terminated_length": 498.8214416503906,
      "completions/min_length": 137.0,
      "completions/min_terminated_length": 137.0,
      "epoch": 0.5084548104956268,
      "grad_norm": 0.24174532294273376,
      "kl": 0.00290679931640625,
      "learning_rate": 1e-06,
      "loss": 0.0132,
      "num_tokens": 34568954.0,
      "reward": 0.6428571939468384,
      "reward_std": 0.19178561866283417,
      "rewards/verify_math_reward/mean": 0.6428571343421936,
      "rewards/verify_math_reward/std": 0.48023054003715515,
      "step": 218
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 1775.0,
      "completions/max_terminated_length": 1775.0,
      "completions/mean_length": 482.2812805175781,
      "completions/mean_terminated_length": 482.2812805175781,
      "completions/min_length": 134.0,
      "completions/min_terminated_length": 134.0,
      "epoch": 0.5107871720116618,
      "grad_norm": 0.3048853576183319,
      "kl": 0.002696990966796875,
      "learning_rate": 1e-06,
      "loss": 0.0034,
      "num_tokens": 34701761.0,
      "reward": 0.5535714626312256,
      "reward_std": 0.2368713617324829,
      "rewards/verify_math_reward/mean": 0.5535714030265808,
      "rewards/verify_math_reward/std": 0.49823516607284546,
      "step": 219
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3880.0,
      "completions/mean_length": 559.6741333007812,
      "completions/mean_terminated_length": 527.8153076171875,
      "completions/min_length": 178.0,
      "completions/min_terminated_length": 178.0,
      "epoch": 0.5131195335276968,
      "grad_norm": 0.27453503012657166,
      "kl": 0.002105712890625,
      "learning_rate": 1e-06,
      "loss": -0.0075,
      "num_tokens": 34850368.0,
      "reward": 0.6026785969734192,
      "reward_std": 0.21313384175300598,
      "rewards/verify_math_reward/mean": 0.6026785969734192,
      "rewards/verify_math_reward/std": 0.4904395043849945,
      "step": 220
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.004464285714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1454.0,
      "completions/mean_length": 532.0089721679688,
      "completions/mean_terminated_length": 516.0269165039062,
      "completions/min_length": 142.0,
      "completions/min_terminated_length": 142.0,
      "epoch": 0.5154518950437318,
      "grad_norm": 0.2646479308605194,
      "kl": 0.002300262451171875,
      "learning_rate": 1e-06,
      "loss": 0.0266,
      "num_tokens": 34991802.0,
      "reward": 0.4821428656578064,
      "reward_std": 0.18727383017539978,
      "rewards/verify_math_reward/mean": 0.4821428656578064,
      "rewards/verify_math_reward/std": 0.5008001327514648,
      "step": 221
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2066.0,
      "completions/mean_length": 648.75,
      "completions/mean_terminated_length": 570.045654296875,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 0.5177842565597668,
      "grad_norm": 0.2515674829483032,
      "kl": 0.0022411346435546875,
      "learning_rate": 1e-06,
      "loss": 0.0386,
      "num_tokens": 35164114.0,
      "reward": 0.5625,
      "reward_std": 0.2248506098985672,
      "rewards/verify_math_reward/mean": 0.5625,
      "rewards/verify_math_reward/std": 0.49718940258026123,
      "step": 222
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1737.0,
      "completions/mean_length": 638.2902221679688,
      "completions/mean_terminated_length": 575.4227294921875,
      "completions/min_length": 96.0,
      "completions/min_terminated_length": 96.0,
      "epoch": 0.5201166180758018,
      "grad_norm": 0.2733094394207001,
      "kl": 0.002407073974609375,
      "learning_rate": 1e-06,
      "loss": 0.0095,
      "num_tokens": 35327867.0,
      "reward": 0.4910714626312256,
      "reward_std": 0.283466100692749,
      "rewards/verify_math_reward/mean": 0.4910714328289032,
      "rewards/verify_math_reward/std": 0.5010398626327515,
      "step": 223
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3876.0,
      "completions/mean_length": 605.4241333007812,
      "completions/mean_terminated_length": 573.9774780273438,
      "completions/min_length": 121.0,
      "completions/min_terminated_length": 121.0,
      "epoch": 0.5224489795918368,
      "grad_norm": 0.25804582238197327,
      "kl": 0.002838134765625,
      "learning_rate": 1e-06,
      "loss": 0.0666,
      "num_tokens": 35484858.0,
      "reward": 0.5,
      "reward_std": 0.20997007191181183,
      "rewards/verify_math_reward/mean": 0.5,
      "rewards/verify_math_reward/std": 0.5011197924613953,
      "step": 224
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2558.0,
      "completions/mean_length": 652.5267944335938,
      "completions/mean_terminated_length": 605.7828369140625,
      "completions/min_length": 177.0,
      "completions/min_terminated_length": 177.0,
      "epoch": 0.5247813411078717,
      "grad_norm": 0.2812970280647278,
      "kl": 0.002105712890625,
      "learning_rate": 1e-06,
      "loss": 0.068,
      "num_tokens": 35654296.0,
      "reward": 0.5714285969734192,
      "reward_std": 0.2380882054567337,
      "rewards/verify_math_reward/mean": 0.5714285969734192,
      "rewards/verify_math_reward/std": 0.49597999453544617,
      "step": 225
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.004464285714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1575.0,
      "completions/mean_length": 481.5937805175781,
      "completions/mean_terminated_length": 465.38568115234375,
      "completions/min_length": 80.0,
      "completions/min_terminated_length": 80.0,
      "epoch": 0.5271137026239067,
      "grad_norm": 0.20803192257881165,
      "kl": 0.00357818603515625,
      "learning_rate": 1e-06,
      "loss": 0.0167,
      "num_tokens": 35782853.0,
      "reward": 0.6071428656578064,
      "reward_std": 0.1554211974143982,
      "rewards/verify_math_reward/mean": 0.6071428656578064,
      "rewards/verify_math_reward/std": 0.48947930335998535,
      "step": 226
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2798.0,
      "completions/mean_length": 588.8705444335938,
      "completions/mean_terminated_length": 525.1045532226562,
      "completions/min_length": 177.0,
      "completions/min_terminated_length": 177.0,
      "epoch": 0.5294460641399417,
      "grad_norm": 0.2741509675979614,
      "kl": 0.002384185791015625,
      "learning_rate": 1e-06,
      "loss": 0.0279,
      "num_tokens": 35934520.0,
      "reward": 0.5758928656578064,
      "reward_std": 0.20245832204818726,
      "rewards/verify_math_reward/mean": 0.5758928656578064,
      "rewards/verify_math_reward/std": 0.4953135550022125,
      "step": 227
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 1569.0,
      "completions/max_terminated_length": 1569.0,
      "completions/mean_length": 508.29913330078125,
      "completions/mean_terminated_length": 508.29913330078125,
      "completions/min_length": 179.0,
      "completions/min_terminated_length": 179.0,
      "epoch": 0.5317784256559767,
      "grad_norm": 0.27911996841430664,
      "kl": 0.002399444580078125,
      "learning_rate": 1e-06,
      "loss": -0.0053,
      "num_tokens": 36071419.0,
      "reward": 0.598214328289032,
      "reward_std": 0.2072695791721344,
      "rewards/verify_math_reward/mean": 0.5982142686843872,
      "rewards/verify_math_reward/std": 0.49135705828666687,
      "step": 228
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1842.0,
      "completions/mean_length": 582.4464721679688,
      "completions/mean_terminated_length": 550.7927856445312,
      "completions/min_length": 196.0,
      "completions/min_terminated_length": 196.0,
      "epoch": 0.5341107871720117,
      "grad_norm": 0.2247956246137619,
      "kl": 0.002819061279296875,
      "learning_rate": 1e-06,
      "loss": 0.0076,
      "num_tokens": 36222391.0,
      "reward": 0.5,
      "reward_std": 0.20576053857803345,
      "rewards/verify_math_reward/mean": 0.5,
      "rewards/verify_math_reward/std": 0.5011197924613953,
      "step": 229
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.004464285714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2208.0,
      "completions/mean_length": 564.6517944335938,
      "completions/mean_terminated_length": 548.816162109375,
      "completions/min_length": 148.0,
      "completions/min_terminated_length": 148.0,
      "epoch": 0.5364431486880467,
      "grad_norm": 0.27169230580329895,
      "kl": 0.0021495819091796875,
      "learning_rate": 1e-06,
      "loss": 0.0486,
      "num_tokens": 36369601.0,
      "reward": 0.5401785969734192,
      "reward_std": 0.2604703903198242,
      "rewards/verify_math_reward/mean": 0.5401785969734192,
      "rewards/verify_math_reward/std": 0.49949929118156433,
      "step": 230
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0267857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3086.0,
      "completions/mean_length": 710.2366333007812,
      "completions/mean_terminated_length": 617.0504150390625,
      "completions/min_length": 191.0,
      "completions/min_terminated_length": 191.0,
      "epoch": 0.5387755102040817,
      "grad_norm": 0.26924261450767517,
      "kl": 0.00235748291015625,
      "learning_rate": 1e-06,
      "loss": 0.0774,
      "num_tokens": 36556686.0,
      "reward": 0.4464285969734192,
      "reward_std": 0.2368713617324829,
      "rewards/verify_math_reward/mean": 0.4464285671710968,
      "rewards/verify_math_reward/std": 0.49823519587516785,
      "step": 231
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2172.0,
      "completions/mean_length": 518.2723388671875,
      "completions/mean_terminated_length": 469.7059020996094,
      "completions/min_length": 136.0,
      "completions/min_terminated_length": 136.0,
      "epoch": 0.5411078717201167,
      "grad_norm": 0.2511337697505951,
      "kl": 0.00334930419921875,
      "learning_rate": 1e-06,
      "loss": 0.0119,
      "num_tokens": 36693419.0,
      "reward": 0.7321428656578064,
      "reward_std": 0.17599493265151978,
      "rewards/verify_math_reward/mean": 0.7321428656578064,
      "rewards/verify_math_reward/std": 0.443834513425827,
      "step": 232
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1671.0,
      "completions/mean_length": 532.8214721679688,
      "completions/mean_terminated_length": 500.7207336425781,
      "completions/min_length": 149.0,
      "completions/min_terminated_length": 149.0,
      "epoch": 0.5434402332361516,
      "grad_norm": 0.264467716217041,
      "kl": 0.003246307373046875,
      "learning_rate": 1e-06,
      "loss": 0.0203,
      "num_tokens": 36835603.0,
      "reward": 0.5625,
      "reward_std": 0.19990354776382446,
      "rewards/verify_math_reward/mean": 0.5625,
      "rewards/verify_math_reward/std": 0.49718940258026123,
      "step": 233
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3132.0,
      "completions/mean_length": 549.9955444335938,
      "completions/mean_terminated_length": 518.049560546875,
      "completions/min_length": 181.0,
      "completions/min_terminated_length": 181.0,
      "epoch": 0.5457725947521865,
      "grad_norm": 0.20606504380702972,
      "kl": 0.002223968505859375,
      "learning_rate": 1e-06,
      "loss": 0.0225,
      "num_tokens": 36986130.0,
      "reward": 0.5401785969734192,
      "reward_std": 0.12640023231506348,
      "rewards/verify_math_reward/mean": 0.5401785969734192,
      "rewards/verify_math_reward/std": 0.49949926137924194,
      "step": 234
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2617.0,
      "completions/mean_length": 567.9642944335938,
      "completions/mean_terminated_length": 503.81817626953125,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 0.5481049562682215,
      "grad_norm": 0.22268931567668915,
      "kl": 0.00246429443359375,
      "learning_rate": 1e-06,
      "loss": 0.0006,
      "num_tokens": 37134954.0,
      "reward": 0.5892857313156128,
      "reward_std": 0.18727383017539978,
      "rewards/verify_math_reward/mean": 0.5892857313156128,
      "rewards/verify_math_reward/std": 0.4930652678012848,
      "step": 235
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2868.0,
      "completions/mean_length": 600.4375,
      "completions/mean_terminated_length": 552.9864501953125,
      "completions/min_length": 121.0,
      "completions/min_terminated_length": 121.0,
      "epoch": 0.5504373177842565,
      "grad_norm": 0.2422136813402176,
      "kl": 0.0020904541015625,
      "learning_rate": 1e-06,
      "loss": 0.0084,
      "num_tokens": 37291836.0,
      "reward": 0.65625,
      "reward_std": 0.19929736852645874,
      "rewards/verify_math_reward/mean": 0.65625,
      "rewards/verify_math_reward/std": 0.4760226309299469,
      "step": 236
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 1378.0,
      "completions/max_terminated_length": 1378.0,
      "completions/mean_length": 525.3973388671875,
      "completions/mean_terminated_length": 525.3973388671875,
      "completions/min_length": 169.0,
      "completions/min_terminated_length": 169.0,
      "epoch": 0.5527696793002915,
      "grad_norm": 0.273456335067749,
      "kl": 0.002994537353515625,
      "learning_rate": 1e-06,
      "loss": 0.01,
      "num_tokens": 37433365.0,
      "reward": 0.5267857313156128,
      "reward_std": 0.237474724650383,
      "rewards/verify_math_reward/mean": 0.5267857313156128,
      "rewards/verify_math_reward/std": 0.500400185585022,
      "step": 237
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1830.0,
      "completions/mean_length": 543.9553833007812,
      "completions/mean_terminated_length": 495.7375793457031,
      "completions/min_length": 150.0,
      "completions/min_terminated_length": 150.0,
      "epoch": 0.5551020408163265,
      "grad_norm": 0.2322549819946289,
      "kl": 0.002719879150390625,
      "learning_rate": 1e-06,
      "loss": -0.0034,
      "num_tokens": 37579435.0,
      "reward": 0.6205357313156128,
      "reward_std": 0.12535615265369415,
      "rewards/verify_math_reward/mean": 0.6205357313156128,
      "rewards/verify_math_reward/std": 0.4863404929637909,
      "step": 238
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2258.0,
      "completions/mean_length": 577.625,
      "completions/mean_terminated_length": 545.9279174804688,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 0.5574344023323615,
      "grad_norm": 0.2547401487827301,
      "kl": 0.0025634765625,
      "learning_rate": 1e-06,
      "loss": -0.0182,
      "num_tokens": 37727943.0,
      "reward": 0.6026785969734192,
      "reward_std": 0.2057577222585678,
      "rewards/verify_math_reward/mean": 0.6026785969734192,
      "rewards/verify_math_reward/std": 0.4904395341873169,
      "step": 239
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3021.0,
      "completions/mean_length": 561.0045166015625,
      "completions/mean_terminated_length": 529.1576538085938,
      "completions/min_length": 143.0,
      "completions/min_terminated_length": 143.0,
      "epoch": 0.5597667638483965,
      "grad_norm": 0.23458260297775269,
      "kl": 0.002529144287109375,
      "learning_rate": 1e-06,
      "loss": 0.0083,
      "num_tokens": 37871432.0,
      "reward": 0.5848214626312256,
      "reward_std": 0.2183874398469925,
      "rewards/verify_math_reward/mean": 0.5848214030265808,
      "rewards/verify_math_reward/std": 0.49385640025138855,
      "step": 240
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2791.0,
      "completions/mean_length": 724.4910888671875,
      "completions/mean_terminated_length": 647.5159301757812,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 0.5620991253644315,
      "grad_norm": 0.18068929016590118,
      "kl": 0.00234222412109375,
      "learning_rate": 1e-06,
      "loss": 0.0448,
      "num_tokens": 38057230.0,
      "reward": 0.4508928656578064,
      "reward_std": 0.19404374063014984,
      "rewards/verify_math_reward/mean": 0.4508928656578064,
      "rewards/verify_math_reward/std": 0.49869707226753235,
      "step": 241
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2172.0,
      "completions/mean_length": 589.4642944335938,
      "completions/mean_terminated_length": 541.8642578125,
      "completions/min_length": 162.0,
      "completions/min_terminated_length": 162.0,
      "epoch": 0.5644314868804665,
      "grad_norm": 0.26205113530158997,
      "kl": 0.002826690673828125,
      "learning_rate": 1e-06,
      "loss": -0.0134,
      "num_tokens": 38209038.0,
      "reward": 0.5535714626312256,
      "reward_std": 0.20349960029125214,
      "rewards/verify_math_reward/mean": 0.5535714030265808,
      "rewards/verify_math_reward/std": 0.49823519587516785,
      "step": 242
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3522.0,
      "completions/mean_length": 674.1830444335938,
      "completions/mean_terminated_length": 611.9681396484375,
      "completions/min_length": 195.0,
      "completions/min_terminated_length": 195.0,
      "epoch": 0.5667638483965015,
      "grad_norm": 0.249900683760643,
      "kl": 0.002323150634765625,
      "learning_rate": 1e-06,
      "loss": 0.0654,
      "num_tokens": 38382015.0,
      "reward": 0.4151785969734192,
      "reward_std": 0.29488059878349304,
      "rewards/verify_math_reward/mean": 0.4151785671710968,
      "rewards/verify_math_reward/std": 0.49385643005371094,
      "step": 243
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3973.0,
      "completions/mean_length": 582.9107666015625,
      "completions/mean_terminated_length": 551.2612915039062,
      "completions/min_length": 165.0,
      "completions/min_terminated_length": 165.0,
      "epoch": 0.5690962099125364,
      "grad_norm": 0.26367759704589844,
      "kl": 0.0029754638671875,
      "learning_rate": 1e-06,
      "loss": 0.0105,
      "num_tokens": 38531371.0,
      "reward": 0.6160714626312256,
      "reward_std": 0.24980218708515167,
      "rewards/verify_math_reward/mean": 0.6160714030265808,
      "rewards/verify_math_reward/std": 0.48743006587028503,
      "step": 244
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0357142857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3629.0,
      "completions/mean_length": 738.1785888671875,
      "completions/mean_terminated_length": 613.8148193359375,
      "completions/min_length": 153.0,
      "completions/min_terminated_length": 153.0,
      "epoch": 0.5714285714285714,
      "grad_norm": 0.24815772473812103,
      "kl": 0.002346038818359375,
      "learning_rate": 1e-06,
      "loss": 0.105,
      "num_tokens": 38718659.0,
      "reward": 0.598214328289032,
      "reward_std": 0.2439451813697815,
      "rewards/verify_math_reward/mean": 0.5982142686843872,
      "rewards/verify_math_reward/std": 0.49135705828666687,
      "step": 245
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2647.0,
      "completions/mean_length": 716.2142944335938,
      "completions/mean_terminated_length": 685.7658081054688,
      "completions/min_length": 156.0,
      "completions/min_terminated_length": 156.0,
      "epoch": 0.5737609329446064,
      "grad_norm": 0.23341864347457886,
      "kl": 0.002655029296875,
      "learning_rate": 1e-06,
      "loss": 0.028,
      "num_tokens": 38900827.0,
      "reward": 0.4062500298023224,
      "reward_std": 0.2738363444805145,
      "rewards/verify_math_reward/mean": 0.40625,
      "rewards/verify_math_reward/std": 0.4922322630882263,
      "step": 246
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.004464285714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1578.0,
      "completions/mean_length": 543.9464721679688,
      "completions/mean_terminated_length": 528.0179443359375,
      "completions/min_length": 159.0,
      "completions/min_terminated_length": 159.0,
      "epoch": 0.5760932944606414,
      "grad_norm": 0.30561524629592896,
      "kl": 0.003040313720703125,
      "learning_rate": 1e-06,
      "loss": 0.0066,
      "num_tokens": 39042239.0,
      "reward": 0.5803571939468384,
      "reward_std": 0.303001344203949,
      "rewards/verify_math_reward/mean": 0.5803571343421936,
      "rewards/verify_math_reward/std": 0.49460574984550476,
      "step": 247
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3062.0,
      "completions/mean_length": 673.8482666015625,
      "completions/mean_terminated_length": 643.0180053710938,
      "completions/min_length": 163.0,
      "completions/min_terminated_length": 163.0,
      "epoch": 0.5784256559766764,
      "grad_norm": 0.22902356088161469,
      "kl": 0.002460479736328125,
      "learning_rate": 1e-06,
      "loss": 0.0076,
      "num_tokens": 39213909.0,
      "reward": 0.5535714626312256,
      "reward_std": 0.20770913362503052,
      "rewards/verify_math_reward/mean": 0.5535714030265808,
      "rewards/verify_math_reward/std": 0.49823519587516785,
      "step": 248
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.03125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1651.0,
      "completions/mean_length": 721.8035888671875,
      "completions/mean_terminated_length": 612.95849609375,
      "completions/min_length": 123.0,
      "completions/min_terminated_length": 123.0,
      "epoch": 0.5807580174927114,
      "grad_norm": 0.216725155711174,
      "kl": 0.002513885498046875,
      "learning_rate": 1e-06,
      "loss": 0.0191,
      "num_tokens": 39393785.0,
      "reward": 0.4107142984867096,
      "reward_std": 0.21569423377513885,
      "rewards/verify_math_reward/mean": 0.4107142984867096,
      "rewards/verify_math_reward/std": 0.4930652976036072,
      "step": 249
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2559.0,
      "completions/mean_length": 612.9866333007812,
      "completions/mean_terminated_length": 581.6080932617188,
      "completions/min_length": 106.0,
      "completions/min_terminated_length": 106.0,
      "epoch": 0.5830903790087464,
      "grad_norm": 0.23955409228801727,
      "kl": 0.00269317626953125,
      "learning_rate": 1e-06,
      "loss": 0.0232,
      "num_tokens": 39548894.0,
      "reward": 0.6473214626312256,
      "reward_std": 0.21808069944381714,
      "rewards/verify_math_reward/mean": 0.6473214030265808,
      "rewards/verify_math_reward/std": 0.4788738489151001,
      "step": 250
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.004464285714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3036.0,
      "completions/mean_length": 499.0714416503906,
      "completions/mean_terminated_length": 482.9417419433594,
      "completions/min_length": 201.0,
      "completions/min_terminated_length": 201.0,
      "epoch": 0.5854227405247814,
      "grad_norm": 0.27811646461486816,
      "kl": 0.00293731689453125,
      "learning_rate": 1e-06,
      "loss": 0.0375,
      "num_tokens": 39681006.0,
      "reward": 0.691964328289032,
      "reward_std": 0.2627313733100891,
      "rewards/verify_math_reward/mean": 0.6919642686843872,
      "rewards/verify_math_reward/std": 0.46271538734436035,
      "step": 251
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1885.0,
      "completions/mean_length": 554.1875,
      "completions/mean_terminated_length": 506.1086120605469,
      "completions/min_length": 124.0,
      "completions/min_terminated_length": 124.0,
      "epoch": 0.5877551020408164,
      "grad_norm": 0.22807757556438446,
      "kl": 0.002674102783203125,
      "learning_rate": 1e-06,
      "loss": 0.0552,
      "num_tokens": 39827488.0,
      "reward": 0.59375,
      "reward_std": 0.15842114388942719,
      "rewards/verify_math_reward/mean": 0.59375,
      "rewards/verify_math_reward/std": 0.4922322630882263,
      "step": 252
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2387.0,
      "completions/mean_length": 615.3839721679688,
      "completions/mean_terminated_length": 584.0270385742188,
      "completions/min_length": 125.0,
      "completions/min_terminated_length": 125.0,
      "epoch": 0.5900874635568513,
      "grad_norm": 0.26391544938087463,
      "kl": 0.002716064453125,
      "learning_rate": 1e-06,
      "loss": 0.0092,
      "num_tokens": 39987126.0,
      "reward": 0.5892857313156128,
      "reward_std": 0.22289641201496124,
      "rewards/verify_math_reward/mean": 0.5892857313156128,
      "rewards/verify_math_reward/std": 0.4930652976036072,
      "step": 253
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1946.0,
      "completions/mean_length": 622.8125,
      "completions/mean_terminated_length": 575.6651611328125,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 0.5924198250728863,
      "grad_norm": 0.2827799916267395,
      "kl": 0.00335693359375,
      "learning_rate": 1e-06,
      "loss": -0.005,
      "num_tokens": 40152724.0,
      "reward": 0.5758928656578064,
      "reward_std": 0.21356894075870514,
      "rewards/verify_math_reward/mean": 0.5758928656578064,
      "rewards/verify_math_reward/std": 0.4953135550022125,
      "step": 254
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 3665.0,
      "completions/max_terminated_length": 3665.0,
      "completions/mean_length": 537.0223388671875,
      "completions/mean_terminated_length": 537.0223388671875,
      "completions/min_length": 164.0,
      "completions/min_terminated_length": 164.0,
      "epoch": 0.5947521865889213,
      "grad_norm": 0.3180239200592041,
      "kl": 0.00290679931640625,
      "learning_rate": 1e-06,
      "loss": 0.0049,
      "num_tokens": 40297233.0,
      "reward": 0.6428571939468384,
      "reward_std": 0.26243188977241516,
      "rewards/verify_math_reward/mean": 0.6428571343421936,
      "rewards/verify_math_reward/std": 0.48023054003715515,
      "step": 255
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.004464285714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2474.0,
      "completions/mean_length": 553.2366333007812,
      "completions/mean_terminated_length": 537.3497924804688,
      "completions/min_length": 121.0,
      "completions/min_terminated_length": 121.0,
      "epoch": 0.5970845481049563,
      "grad_norm": 0.28264546394348145,
      "kl": 0.002605438232421875,
      "learning_rate": 1e-06,
      "loss": 0.0243,
      "num_tokens": 40438830.0,
      "reward": 0.6517857313156128,
      "reward_std": 0.2284567803144455,
      "rewards/verify_math_reward/mean": 0.6517857313156128,
      "rewards/verify_math_reward/std": 0.47747132182121277,
      "step": 256
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2081.0,
      "completions/mean_length": 592.5357666015625,
      "completions/mean_terminated_length": 560.9729614257812,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 0.5994169096209913,
      "grad_norm": 0.27566614747047424,
      "kl": 0.00299072265625,
      "learning_rate": 1e-06,
      "loss": 0.0258,
      "num_tokens": 40599854.0,
      "reward": 0.5267857313156128,
      "reward_std": 0.1963018774986267,
      "rewards/verify_math_reward/mean": 0.5267857313156128,
      "rewards/verify_math_reward/std": 0.500400185585022,
      "step": 257
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.004464285714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2774.0,
      "completions/mean_length": 548.1517944335938,
      "completions/mean_terminated_length": 532.2421875,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 0.6017492711370263,
      "grad_norm": 0.26068398356437683,
      "kl": 0.0031280517578125,
      "learning_rate": 1e-06,
      "loss": 0.0232,
      "num_tokens": 40743064.0,
      "reward": 0.6026785969734192,
      "reward_std": 0.22935959696769714,
      "rewards/verify_math_reward/mean": 0.6026785969734192,
      "rewards/verify_math_reward/std": 0.4904395043849945,
      "step": 258
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2715.0,
      "completions/mean_length": 590.7142944335938,
      "completions/mean_terminated_length": 559.1351318359375,
      "completions/min_length": 157.0,
      "completions/min_terminated_length": 157.0,
      "epoch": 0.6040816326530613,
      "grad_norm": 0.2197912037372589,
      "kl": 0.00249481201171875,
      "learning_rate": 1e-06,
      "loss": 0.0221,
      "num_tokens": 40902896.0,
      "reward": 0.566964328289032,
      "reward_std": 0.13737240433692932,
      "rewards/verify_math_reward/mean": 0.5669642686843872,
      "rewards/verify_math_reward/std": 0.49660524725914,
      "step": 259
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3635.0,
      "completions/mean_length": 699.9553833007812,
      "completions/mean_terminated_length": 669.3603515625,
      "completions/min_length": 197.0,
      "completions/min_terminated_length": 197.0,
      "epoch": 0.6064139941690962,
      "grad_norm": 0.18691788613796234,
      "kl": 0.0020694732666015625,
      "learning_rate": 1e-06,
      "loss": -0.0057,
      "num_tokens": 41081134.0,
      "reward": 0.4464285969734192,
      "reward_std": 0.13707295060157776,
      "rewards/verify_math_reward/mean": 0.4464285671710968,
      "rewards/verify_math_reward/std": 0.49823519587516785,
      "step": 260
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1895.0,
      "completions/mean_length": 524.2366333007812,
      "completions/mean_terminated_length": 492.0585632324219,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 0.6087463556851312,
      "grad_norm": 0.2733798921108246,
      "kl": 0.002613067626953125,
      "learning_rate": 1e-06,
      "loss": 0.0191,
      "num_tokens": 41223843.0,
      "reward": 0.65625,
      "reward_std": 0.1833682507276535,
      "rewards/verify_math_reward/mean": 0.65625,
      "rewards/verify_math_reward/std": 0.4760226309299469,
      "step": 261
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2262.0,
      "completions/mean_length": 517.8705444335938,
      "completions/mean_terminated_length": 485.6351318359375,
      "completions/min_length": 189.0,
      "completions/min_terminated_length": 189.0,
      "epoch": 0.6110787172011661,
      "grad_norm": 0.2720491886138916,
      "kl": 0.003376007080078125,
      "learning_rate": 1e-06,
      "loss": 0.0338,
      "num_tokens": 41362726.0,
      "reward": 0.5758928656578064,
      "reward_std": 0.178862065076828,
      "rewards/verify_math_reward/mean": 0.5758928656578064,
      "rewards/verify_math_reward/std": 0.4953135550022125,
      "step": 262
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.004464285714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1858.0,
      "completions/mean_length": 575.1473388671875,
      "completions/mean_terminated_length": 559.3587646484375,
      "completions/min_length": 178.0,
      "completions/min_terminated_length": 178.0,
      "epoch": 0.6134110787172011,
      "grad_norm": 0.20130468904972076,
      "kl": 0.00251007080078125,
      "learning_rate": 1e-06,
      "loss": 0.0124,
      "num_tokens": 41513887.0,
      "reward": 0.6116071939468384,
      "reward_std": 0.1460937112569809,
      "rewards/verify_math_reward/mean": 0.6116071343421936,
      "rewards/verify_math_reward/std": 0.4884762763977051,
      "step": 263
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1916.0,
      "completions/mean_length": 657.3080444335938,
      "completions/mean_terminated_length": 578.799072265625,
      "completions/min_length": 139.0,
      "completions/min_terminated_length": 139.0,
      "epoch": 0.6157434402332361,
      "grad_norm": 0.267302542924881,
      "kl": 0.00292205810546875,
      "learning_rate": 1e-06,
      "loss": 0.0199,
      "num_tokens": 41689172.0,
      "reward": 0.4642857313156128,
      "reward_std": 0.23192447423934937,
      "rewards/verify_math_reward/mean": 0.4642857015132904,
      "rewards/verify_math_reward/std": 0.49983978271484375,
      "step": 264
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.049107142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2409.0,
      "completions/mean_length": 787.3170166015625,
      "completions/mean_terminated_length": 616.446044921875,
      "completions/min_length": 157.0,
      "completions/min_terminated_length": 157.0,
      "epoch": 0.6180758017492711,
      "grad_norm": 0.17197687923908234,
      "kl": 0.0020236968994140625,
      "learning_rate": 1e-06,
      "loss": 0.0319,
      "num_tokens": 41890427.0,
      "reward": 0.4196428656578064,
      "reward_std": 0.16488432884216309,
      "rewards/verify_math_reward/mean": 0.4196428656578064,
      "rewards/verify_math_reward/std": 0.49460577964782715,
      "step": 265
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2180.0,
      "completions/mean_length": 657.7098388671875,
      "completions/mean_terminated_length": 626.7342529296875,
      "completions/min_length": 110.0,
      "completions/min_terminated_length": 110.0,
      "epoch": 0.6204081632653061,
      "grad_norm": 0.22111672163009644,
      "kl": 0.00244903564453125,
      "learning_rate": 1e-06,
      "loss": 0.0448,
      "num_tokens": 42057090.0,
      "reward": 0.5580357313156128,
      "reward_std": 0.22558964788913727,
      "rewards/verify_math_reward/mean": 0.5580357313156128,
      "rewards/verify_math_reward/std": 0.4977326989173889,
      "step": 266
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.004464285714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2332.0,
      "completions/mean_length": 635.0402221679688,
      "completions/mean_terminated_length": 619.5202026367188,
      "completions/min_length": 234.0,
      "completions/min_terminated_length": 234.0,
      "epoch": 0.6227405247813411,
      "grad_norm": 0.23865652084350586,
      "kl": 0.00266265869140625,
      "learning_rate": 1e-06,
      "loss": 0.0086,
      "num_tokens": 42220995.0,
      "reward": 0.5758928656578064,
      "reward_std": 0.25010165572166443,
      "rewards/verify_math_reward/mean": 0.5758928656578064,
      "rewards/verify_math_reward/std": 0.4953135848045349,
      "step": 267
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1561.0,
      "completions/mean_length": 631.4598388671875,
      "completions/mean_terminated_length": 600.2477416992188,
      "completions/min_length": 172.0,
      "completions/min_terminated_length": 172.0,
      "epoch": 0.6250728862973761,
      "grad_norm": 0.2775896489620209,
      "kl": 0.002925872802734375,
      "learning_rate": 1e-06,
      "loss": 0.0246,
      "num_tokens": 42392226.0,
      "reward": 0.53125,
      "reward_std": 0.2534010410308838,
      "rewards/verify_math_reward/mean": 0.53125,
      "rewards/verify_math_reward/std": 0.5001401305198669,
      "step": 268
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3737.0,
      "completions/mean_length": 638.1160888671875,
      "completions/mean_terminated_length": 575.2454223632812,
      "completions/min_length": 157.0,
      "completions/min_terminated_length": 157.0,
      "epoch": 0.6274052478134111,
      "grad_norm": 0.17287638783454895,
      "kl": 0.00275421142578125,
      "learning_rate": 1e-06,
      "loss": 0.0093,
      "num_tokens": 42555212.0,
      "reward": 0.5133928656578064,
      "reward_std": 0.11047111451625824,
      "rewards/verify_math_reward/mean": 0.5133928656578064,
      "rewards/verify_math_reward/std": 0.5009400248527527,
      "step": 269
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.004464285714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3778.0,
      "completions/mean_length": 484.3839416503906,
      "completions/mean_terminated_length": 468.1883544921875,
      "completions/min_length": 3.0,
      "completions/min_terminated_length": 3.0,
      "epoch": 0.6297376093294461,
      "grad_norm": 0.29258185625076294,
      "kl": 0.003673553466796875,
      "learning_rate": 1e-06,
      "loss": 0.0174,
      "num_tokens": 42683554.0,
      "reward": 0.6517857313156128,
      "reward_std": 0.2137400209903717,
      "rewards/verify_math_reward/mean": 0.6517857313156128,
      "rewards/verify_math_reward/std": 0.47747132182121277,
      "step": 270
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1713.0,
      "completions/mean_length": 626.1473388671875,
      "completions/mean_terminated_length": 579.0452880859375,
      "completions/min_length": 106.0,
      "completions/min_terminated_length": 106.0,
      "epoch": 0.632069970845481,
      "grad_norm": 0.22278255224227905,
      "kl": 0.00243377685546875,
      "learning_rate": 1e-06,
      "loss": 0.0514,
      "num_tokens": 42847131.0,
      "reward": 0.5625,
      "reward_std": 0.18471625447273254,
      "rewards/verify_math_reward/mean": 0.5625,
      "rewards/verify_math_reward/std": 0.49718940258026123,
      "step": 271
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2156.0,
      "completions/mean_length": 619.0625,
      "completions/mean_terminated_length": 539.6803588867188,
      "completions/min_length": 168.0,
      "completions/min_terminated_length": 168.0,
      "epoch": 0.634402332361516,
      "grad_norm": 0.3052447736263275,
      "kl": 0.00273895263671875,
      "learning_rate": 1e-06,
      "loss": 0.051,
      "num_tokens": 43006217.0,
      "reward": 0.4419642984867096,
      "reward_std": 0.24650557339191437,
      "rewards/verify_math_reward/mean": 0.4419642984867096,
      "rewards/verify_math_reward/std": 0.4977326989173889,
      "step": 272
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1452.0,
      "completions/mean_length": 533.75,
      "completions/mean_terminated_length": 485.3936767578125,
      "completions/min_length": 173.0,
      "completions/min_terminated_length": 173.0,
      "epoch": 0.636734693877551,
      "grad_norm": 0.28764647245407104,
      "kl": 0.00301361083984375,
      "learning_rate": 1e-06,
      "loss": 0.0796,
      "num_tokens": 43146537.0,
      "reward": 0.7142857313156128,
      "reward_std": 0.26859113574028015,
      "rewards/verify_math_reward/mean": 0.7142857313156128,
      "rewards/verify_math_reward/std": 0.45276570320129395,
      "step": 273
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3619.0,
      "completions/mean_length": 720.232177734375,
      "completions/mean_terminated_length": 643.1597900390625,
      "completions/min_length": 131.0,
      "completions/min_terminated_length": 131.0,
      "epoch": 0.639067055393586,
      "grad_norm": 0.24549873173236847,
      "kl": 0.002490997314453125,
      "learning_rate": 1e-06,
      "loss": 0.0215,
      "num_tokens": 43333597.0,
      "reward": 0.4821428656578064,
      "reward_std": 0.2582167685031891,
      "rewards/verify_math_reward/mean": 0.4821428656578064,
      "rewards/verify_math_reward/std": 0.5008001327514648,
      "step": 274
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1952.0,
      "completions/mean_length": 677.21875,
      "completions/mean_terminated_length": 646.4189453125,
      "completions/min_length": 158.0,
      "completions/min_terminated_length": 158.0,
      "epoch": 0.641399416909621,
      "grad_norm": 0.2859312891960144,
      "kl": 0.002758026123046875,
      "learning_rate": 1e-06,
      "loss": 0.004,
      "num_tokens": 43511214.0,
      "reward": 0.4821428656578064,
      "reward_std": 0.267547070980072,
      "rewards/verify_math_reward/mean": 0.4821428656578064,
      "rewards/verify_math_reward/std": 0.5008001327514648,
      "step": 275
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1666.0,
      "completions/mean_length": 650.169677734375,
      "completions/mean_terminated_length": 571.4976806640625,
      "completions/min_length": 153.0,
      "completions/min_terminated_length": 153.0,
      "epoch": 0.643731778425656,
      "grad_norm": 0.2649756968021393,
      "kl": 0.00237274169921875,
      "learning_rate": 1e-06,
      "loss": 0.0103,
      "num_tokens": 43674812.0,
      "reward": 0.504464328289032,
      "reward_std": 0.26828160881996155,
      "rewards/verify_math_reward/mean": 0.5044642686843872,
      "rewards/verify_math_reward/std": 0.5010998845100403,
      "step": 276
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3250.0,
      "completions/mean_length": 633.6116333007812,
      "completions/mean_terminated_length": 570.6590576171875,
      "completions/min_length": 176.0,
      "completions/min_terminated_length": 176.0,
      "epoch": 0.646064139941691,
      "grad_norm": 0.24528780579566956,
      "kl": 0.00266265869140625,
      "learning_rate": 1e-06,
      "loss": -0.0002,
      "num_tokens": 43842685.0,
      "reward": 0.535714328289032,
      "reward_std": 0.20710574090480804,
      "rewards/verify_math_reward/mean": 0.5357142686843872,
      "rewards/verify_math_reward/std": 0.49983981251716614,
      "step": 277
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.03125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3493.0,
      "completions/mean_length": 696.1116333007812,
      "completions/mean_terminated_length": 586.4378051757812,
      "completions/min_length": 115.0,
      "completions/min_terminated_length": 115.0,
      "epoch": 0.648396501457726,
      "grad_norm": 0.2225281298160553,
      "kl": 0.00244903564453125,
      "learning_rate": 1e-06,
      "loss": 0.125,
      "num_tokens": 44019342.0,
      "reward": 0.5446428656578064,
      "reward_std": 0.24498368799686432,
      "rewards/verify_math_reward/mean": 0.5446428656578064,
      "rewards/verify_math_reward/std": 0.4991183280944824,
      "step": 278
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.004464285714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1882.0,
      "completions/mean_length": 534.71875,
      "completions/mean_terminated_length": 518.7489013671875,
      "completions/min_length": 178.0,
      "completions/min_terminated_length": 178.0,
      "epoch": 0.650728862973761,
      "grad_norm": 0.29402709007263184,
      "kl": 0.002429962158203125,
      "learning_rate": 1e-06,
      "loss": 0.0037,
      "num_tokens": 44159327.0,
      "reward": 0.6383928656578064,
      "reward_std": 0.25851622223854065,
      "rewards/verify_math_reward/mean": 0.6383928656578064,
      "rewards/verify_math_reward/std": 0.4815419018268585,
      "step": 279
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3168.0,
      "completions/mean_length": 758.3527221679688,
      "completions/mean_terminated_length": 697.6681518554688,
      "completions/min_length": 183.0,
      "completions/min_terminated_length": 183.0,
      "epoch": 0.6530612244897959,
      "grad_norm": 0.23811939358711243,
      "kl": 0.00225067138671875,
      "learning_rate": 1e-06,
      "loss": 0.0262,
      "num_tokens": 44352358.0,
      "reward": 0.5178571939468384,
      "reward_std": 0.26181843876838684,
      "rewards/verify_math_reward/mean": 0.5178571343421936,
      "rewards/verify_math_reward/std": 0.5008001327514648,
      "step": 280
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4039.0,
      "completions/mean_length": 693.1964721679688,
      "completions/mean_terminated_length": 662.54052734375,
      "completions/min_length": 172.0,
      "completions/min_terminated_length": 172.0,
      "epoch": 0.6553935860058309,
      "grad_norm": 0.21368886530399323,
      "kl": 0.0027618408203125,
      "learning_rate": 1e-06,
      "loss": 0.016,
      "num_tokens": 44530322.0,
      "reward": 0.5133928656578064,
      "reward_std": 0.1847190409898758,
      "rewards/verify_math_reward/mean": 0.5133928656578064,
      "rewards/verify_math_reward/std": 0.5009400248527527,
      "step": 281
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2653.0,
      "completions/mean_length": 541.0267944335938,
      "completions/mean_terminated_length": 509.0,
      "completions/min_length": 136.0,
      "completions/min_terminated_length": 136.0,
      "epoch": 0.6577259475218659,
      "grad_norm": 0.2308286875486374,
      "kl": 0.00319671630859375,
      "learning_rate": 1e-06,
      "loss": 0.027,
      "num_tokens": 44674448.0,
      "reward": 0.535714328289032,
      "reward_std": 0.18727384507656097,
      "rewards/verify_math_reward/mean": 0.5357142686843872,
      "rewards/verify_math_reward/std": 0.49983981251716614,
      "step": 282
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1707.0,
      "completions/mean_length": 573.03125,
      "completions/mean_terminated_length": 525.2081909179688,
      "completions/min_length": 142.0,
      "completions/min_terminated_length": 142.0,
      "epoch": 0.6600583090379009,
      "grad_norm": 0.30643728375434875,
      "kl": 0.002544403076171875,
      "learning_rate": 1e-06,
      "loss": 0.0402,
      "num_tokens": 44833479.0,
      "reward": 0.535714328289032,
      "reward_std": 0.24017076194286346,
      "rewards/verify_math_reward/mean": 0.5357142686843872,
      "rewards/verify_math_reward/std": 0.49983981251716614,
      "step": 283
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1989.0,
      "completions/mean_length": 569.8170166015625,
      "completions/mean_terminated_length": 538.049560546875,
      "completions/min_length": 117.0,
      "completions/min_terminated_length": 117.0,
      "epoch": 0.6623906705539359,
      "grad_norm": 0.2745210528373718,
      "kl": 0.002323150634765625,
      "learning_rate": 1e-06,
      "loss": 0.0264,
      "num_tokens": 44984406.0,
      "reward": 0.59375,
      "reward_std": 0.21282710134983063,
      "rewards/verify_math_reward/mean": 0.59375,
      "rewards/verify_math_reward/std": 0.4922322630882263,
      "step": 284
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3362.0,
      "completions/mean_length": 718.7188110351562,
      "completions/mean_terminated_length": 641.61181640625,
      "completions/min_length": 215.0,
      "completions/min_terminated_length": 215.0,
      "epoch": 0.6647230320699709,
      "grad_norm": 0.24607455730438232,
      "kl": 0.002429962158203125,
      "learning_rate": 1e-06,
      "loss": -0.0092,
      "num_tokens": 45171231.0,
      "reward": 0.4285714626312256,
      "reward_std": 0.2607799470424652,
      "rewards/verify_math_reward/mean": 0.4285714328289032,
      "rewards/verify_math_reward/std": 0.49597999453544617,
      "step": 285
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2756.0,
      "completions/mean_length": 590.4598388671875,
      "completions/mean_terminated_length": 510.42462158203125,
      "completions/min_length": 169.0,
      "completions/min_terminated_length": 169.0,
      "epoch": 0.6670553935860059,
      "grad_norm": 0.2717443108558655,
      "kl": 0.002529144287109375,
      "learning_rate": 1e-06,
      "loss": 0.0469,
      "num_tokens": 45327966.0,
      "reward": 0.4955357313156128,
      "reward_std": 0.23431093990802765,
      "rewards/verify_math_reward/mean": 0.4955357015132904,
      "rewards/verify_math_reward/std": 0.5010998249053955,
      "step": 286
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0267857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1611.0,
      "completions/mean_length": 705.1517944335938,
      "completions/mean_terminated_length": 611.82568359375,
      "completions/min_length": 87.0,
      "completions/min_terminated_length": 87.0,
      "epoch": 0.6693877551020408,
      "grad_norm": 0.237776979804039,
      "kl": 0.002727508544921875,
      "learning_rate": 1e-06,
      "loss": 0.0322,
      "num_tokens": 45510584.0,
      "reward": 0.5,
      "reward_std": 0.2724939286708832,
      "rewards/verify_math_reward/mean": 0.5,
      "rewards/verify_math_reward/std": 0.5011197924613953,
      "step": 287
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1630.0,
      "completions/mean_length": 553.53125,
      "completions/mean_terminated_length": 505.4434509277344,
      "completions/min_length": 94.0,
      "completions/min_terminated_length": 94.0,
      "epoch": 0.6717201166180758,
      "grad_norm": 0.309678316116333,
      "kl": 0.00322723388671875,
      "learning_rate": 1e-06,
      "loss": 0.0128,
      "num_tokens": 45657047.0,
      "reward": 0.625,
      "reward_std": 0.24754685163497925,
      "rewards/verify_math_reward/mean": 0.625,
      "rewards/verify_math_reward/std": 0.4852071702480316,
      "step": 288
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3109.0,
      "completions/mean_length": 723.4241333007812,
      "completions/mean_terminated_length": 646.4246215820312,
      "completions/min_length": 140.0,
      "completions/min_terminated_length": 140.0,
      "epoch": 0.6740524781341107,
      "grad_norm": 0.23280808329582214,
      "kl": 0.002765655517578125,
      "learning_rate": 1e-06,
      "loss": -0.0022,
      "num_tokens": 45838278.0,
      "reward": 0.5535714626312256,
      "reward_std": 0.23626233637332916,
      "rewards/verify_math_reward/mean": 0.5535714030265808,
      "rewards/verify_math_reward/std": 0.49823519587516785,
      "step": 289
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.004464285714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3816.0,
      "completions/mean_length": 599.28125,
      "completions/mean_terminated_length": 583.6009521484375,
      "completions/min_length": 156.0,
      "completions/min_terminated_length": 156.0,
      "epoch": 0.6763848396501457,
      "grad_norm": 0.17826339602470398,
      "kl": 0.002777099609375,
      "learning_rate": 1e-06,
      "loss": 0.0078,
      "num_tokens": 45998789.0,
      "reward": 0.5580357313156128,
      "reward_std": 0.1303030252456665,
      "rewards/verify_math_reward/mean": 0.5580357313156128,
      "rewards/verify_math_reward/std": 0.49773266911506653,
      "step": 290
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 2155.0,
      "completions/max_terminated_length": 2155.0,
      "completions/mean_length": 504.169677734375,
      "completions/mean_terminated_length": 504.169677734375,
      "completions/min_length": 58.0,
      "completions/min_terminated_length": 58.0,
      "epoch": 0.6787172011661807,
      "grad_norm": 0.24889233708381653,
      "kl": 0.003597259521484375,
      "learning_rate": 1e-06,
      "loss": -0.0147,
      "num_tokens": 46141163.0,
      "reward": 0.535714328289032,
      "reward_std": 0.14835184812545776,
      "rewards/verify_math_reward/mean": 0.5357142686843872,
      "rewards/verify_math_reward/std": 0.49983981251716614,
      "step": 291
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3117.0,
      "completions/mean_length": 659.8928833007812,
      "completions/mean_terminated_length": 597.4181518554688,
      "completions/min_length": 142.0,
      "completions/min_terminated_length": 142.0,
      "epoch": 0.6810495626822157,
      "grad_norm": 0.19757574796676636,
      "kl": 0.00254058837890625,
      "learning_rate": 1e-06,
      "loss": -0.0062,
      "num_tokens": 46310859.0,
      "reward": 0.5178571939468384,
      "reward_std": 0.15165123343467712,
      "rewards/verify_math_reward/mean": 0.5178571343421936,
      "rewards/verify_math_reward/std": 0.5008001327514648,
      "step": 292
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3519.0,
      "completions/mean_length": 636.3973388671875,
      "completions/mean_terminated_length": 573.4954223632812,
      "completions/min_length": 155.0,
      "completions/min_terminated_length": 155.0,
      "epoch": 0.6833819241982507,
      "grad_norm": 0.3277430832386017,
      "kl": 0.00293731689453125,
      "learning_rate": 1e-06,
      "loss": 0.0299,
      "num_tokens": 46474092.0,
      "reward": 0.5758928656578064,
      "reward_std": 0.2669408917427063,
      "rewards/verify_math_reward/mean": 0.5758928656578064,
      "rewards/verify_math_reward/std": 0.4953135848045349,
      "step": 293
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0401785714285714,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3226.0,
      "completions/mean_length": 819.4285888671875,
      "completions/mean_terminated_length": 682.269775390625,
      "completions/min_length": 155.0,
      "completions/min_terminated_length": 155.0,
      "epoch": 0.6857142857142857,
      "grad_norm": 0.19413602352142334,
      "kl": 0.0022869110107421875,
      "learning_rate": 1e-06,
      "loss": 0.0102,
      "num_tokens": 46681548.0,
      "reward": 0.5491071939468384,
      "reward_std": 0.22033601999282837,
      "rewards/verify_math_reward/mean": 0.5491071343421936,
      "rewards/verify_math_reward/std": 0.49869707226753235,
      "step": 294
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0357142857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2538.0,
      "completions/mean_length": 756.1250610351562,
      "completions/mean_terminated_length": 632.4259033203125,
      "completions/min_length": 187.0,
      "completions/min_terminated_length": 187.0,
      "epoch": 0.6880466472303207,
      "grad_norm": 0.31327852606773376,
      "kl": 0.00290679931640625,
      "learning_rate": 1e-06,
      "loss": 0.035,
      "num_tokens": 46868584.0,
      "reward": 0.4821428656578064,
      "reward_std": 0.16457758843898773,
      "rewards/verify_math_reward/mean": 0.4821428656578064,
      "rewards/verify_math_reward/std": 0.5008001327514648,
      "step": 295
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1557.0,
      "completions/mean_length": 593.2455444335938,
      "completions/mean_terminated_length": 545.6968383789062,
      "completions/min_length": 198.0,
      "completions/min_terminated_length": 198.0,
      "epoch": 0.6903790087463557,
      "grad_norm": 0.1804625689983368,
      "kl": 0.00264739990234375,
      "learning_rate": 1e-06,
      "loss": 0.0073,
      "num_tokens": 47028927.0,
      "reward": 0.598214328289032,
      "reward_std": 0.15268969535827637,
      "rewards/verify_math_reward/mean": 0.5982142686843872,
      "rewards/verify_math_reward/std": 0.49135705828666687,
      "step": 296
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3367.0,
      "completions/mean_length": 665.0982666015625,
      "completions/mean_terminated_length": 602.7181396484375,
      "completions/min_length": 140.0,
      "completions/min_terminated_length": 140.0,
      "epoch": 0.6927113702623907,
      "grad_norm": 0.3225614130496979,
      "kl": 0.00260162353515625,
      "learning_rate": 1e-06,
      "loss": 0.0963,
      "num_tokens": 47197613.0,
      "reward": 0.5580357313156128,
      "reward_std": 0.25821956992149353,
      "rewards/verify_math_reward/mean": 0.5580357313156128,
      "rewards/verify_math_reward/std": 0.49773266911506653,
      "step": 297
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2163.0,
      "completions/mean_length": 577.0089721679688,
      "completions/mean_terminated_length": 545.3063354492188,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 0.6950437317784257,
      "grad_norm": 0.2598956227302551,
      "kl": 0.00286865234375,
      "learning_rate": 1e-06,
      "loss": 0.0299,
      "num_tokens": 47347927.0,
      "reward": 0.65625,
      "reward_std": 0.2197326421737671,
      "rewards/verify_math_reward/mean": 0.65625,
      "rewards/verify_math_reward/std": 0.4760226309299469,
      "step": 298
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2775.0,
      "completions/mean_length": 647.575927734375,
      "completions/mean_terminated_length": 600.7647094726562,
      "completions/min_length": 190.0,
      "completions/min_terminated_length": 190.0,
      "epoch": 0.6973760932944606,
      "grad_norm": 0.2414235770702362,
      "kl": 0.002384185791015625,
      "learning_rate": 1e-06,
      "loss": 0.0113,
      "num_tokens": 47517312.0,
      "reward": 0.5758928656578064,
      "reward_std": 0.19794653356075287,
      "rewards/verify_math_reward/mean": 0.5758928656578064,
      "rewards/verify_math_reward/std": 0.4953135550022125,
      "step": 299
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3675.0,
      "completions/mean_length": 754.2098388671875,
      "completions/mean_terminated_length": 677.9132080078125,
      "completions/min_length": 172.0,
      "completions/min_terminated_length": 172.0,
      "epoch": 0.6997084548104956,
      "grad_norm": 0.21052268147468567,
      "kl": 0.002254486083984375,
      "learning_rate": 1e-06,
      "loss": -0.004,
      "num_tokens": 47706223.0,
      "reward": 0.424107164144516,
      "reward_std": 0.19886226952075958,
      "rewards/verify_math_reward/mean": 0.4241071343421936,
      "rewards/verify_math_reward/std": 0.4953135550022125,
      "step": 300
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0357142857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3031.0,
      "completions/mean_length": 823.9241333007812,
      "completions/mean_terminated_length": 702.7361450195312,
      "completions/min_length": 183.0,
      "completions/min_terminated_length": 183.0,
      "epoch": 0.7020408163265306,
      "grad_norm": 0.25020062923431396,
      "kl": 0.002361297607421875,
      "learning_rate": 1e-06,
      "loss": 0.0415,
      "num_tokens": 47911286.0,
      "reward": 0.5,
      "reward_std": 0.2582167387008667,
      "rewards/verify_math_reward/mean": 0.5,
      "rewards/verify_math_reward/std": 0.5011197924613953,
      "step": 301
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1376.0,
      "completions/mean_length": 661.7098388671875,
      "completions/mean_terminated_length": 599.2681884765625,
      "completions/min_length": 162.0,
      "completions/min_terminated_length": 162.0,
      "epoch": 0.7043731778425656,
      "grad_norm": 0.26508408784866333,
      "kl": 0.00241851806640625,
      "learning_rate": 1e-06,
      "loss": 0.0085,
      "num_tokens": 48084709.0,
      "reward": 0.5580357313156128,
      "reward_std": 0.27956050634384155,
      "rewards/verify_math_reward/mean": 0.5580357313156128,
      "rewards/verify_math_reward/std": 0.49773266911506653,
      "step": 302
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 1530.0,
      "completions/max_terminated_length": 1530.0,
      "completions/mean_length": 568.4420166015625,
      "completions/mean_terminated_length": 568.4420166015625,
      "completions/min_length": 189.0,
      "completions/min_terminated_length": 189.0,
      "epoch": 0.7067055393586006,
      "grad_norm": 0.29956838488578796,
      "kl": 0.002964019775390625,
      "learning_rate": 1e-06,
      "loss": 0.0324,
      "num_tokens": 48236312.0,
      "reward": 0.5848214626312256,
      "reward_std": 0.2842051088809967,
      "rewards/verify_math_reward/mean": 0.5848214030265808,
      "rewards/verify_math_reward/std": 0.49385643005371094,
      "step": 303
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.03125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2112.0,
      "completions/mean_length": 661.4285888671875,
      "completions/mean_terminated_length": 550.6359252929688,
      "completions/min_length": 129.0,
      "completions/min_terminated_length": 129.0,
      "epoch": 0.7090379008746356,
      "grad_norm": 0.26300880312919617,
      "kl": 0.002956390380859375,
      "learning_rate": 1e-06,
      "loss": -0.0026,
      "num_tokens": 48407736.0,
      "reward": 0.486607164144516,
      "reward_std": 0.24168705940246582,
      "rewards/verify_math_reward/mean": 0.4866071343421936,
      "rewards/verify_math_reward/std": 0.5009400248527527,
      "step": 304
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2046.0,
      "completions/mean_length": 613.2053833007812,
      "completions/mean_terminated_length": 565.9276123046875,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 0.7113702623906706,
      "grad_norm": 0.30458030104637146,
      "kl": 0.002872467041015625,
      "learning_rate": 1e-06,
      "loss": 0.0394,
      "num_tokens": 48567294.0,
      "reward": 0.6517857313156128,
      "reward_std": 0.24949544668197632,
      "rewards/verify_math_reward/mean": 0.6517857313156128,
      "rewards/verify_math_reward/std": 0.47747132182121277,
      "step": 305
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.03125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2758.0,
      "completions/mean_length": 733.950927734375,
      "completions/mean_terminated_length": 625.4976806640625,
      "completions/min_length": 24.0,
      "completions/min_terminated_length": 24.0,
      "epoch": 0.7137026239067056,
      "grad_norm": 0.2270134836435318,
      "kl": 0.002635955810546875,
      "learning_rate": 1e-06,
      "loss": 0.0355,
      "num_tokens": 48755403.0,
      "reward": 0.4642857313156128,
      "reward_std": 0.24077412486076355,
      "rewards/verify_math_reward/mean": 0.4642857015132904,
      "rewards/verify_math_reward/std": 0.49983981251716614,
      "step": 306
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3995.0,
      "completions/mean_length": 679.65625,
      "completions/mean_terminated_length": 633.2805786132812,
      "completions/min_length": 154.0,
      "completions/min_terminated_length": 154.0,
      "epoch": 0.7160349854227406,
      "grad_norm": 0.277005672454834,
      "kl": 0.0029144287109375,
      "learning_rate": 1e-06,
      "loss": 0.015,
      "num_tokens": 48933398.0,
      "reward": 0.4821428656578064,
      "reward_std": 0.2250189185142517,
      "rewards/verify_math_reward/mean": 0.4821428656578064,
      "rewards/verify_math_reward/std": 0.5008001327514648,
      "step": 307
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1484.0,
      "completions/mean_length": 521.34375,
      "completions/mean_terminated_length": 489.1396484375,
      "completions/min_length": 106.0,
      "completions/min_terminated_length": 106.0,
      "epoch": 0.7183673469387755,
      "grad_norm": 0.28764063119888306,
      "kl": 0.0028839111328125,
      "learning_rate": 1e-06,
      "loss": 0.0252,
      "num_tokens": 49070147.0,
      "reward": 0.660714328289032,
      "reward_std": 0.24680502712726593,
      "rewards/verify_math_reward/mean": 0.6607142686843872,
      "rewards/verify_math_reward/std": 0.4745272994041443,
      "step": 308
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0357142857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1439.0,
      "completions/mean_length": 656.3527221679688,
      "completions/mean_terminated_length": 528.9583129882812,
      "completions/min_length": 155.0,
      "completions/min_terminated_length": 155.0,
      "epoch": 0.7206997084548105,
      "grad_norm": 0.27909383177757263,
      "kl": 0.0028228759765625,
      "learning_rate": 1e-06,
      "loss": 0.0326,
      "num_tokens": 49238482.0,
      "reward": 0.6071428656578064,
      "reward_std": 0.20636393129825592,
      "rewards/verify_math_reward/mean": 0.6071428656578064,
      "rewards/verify_math_reward/std": 0.48947930335998535,
      "step": 309
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1957.0,
      "completions/mean_length": 632.8616333007812,
      "completions/mean_terminated_length": 601.6621704101562,
      "completions/min_length": 162.0,
      "completions/min_terminated_length": 162.0,
      "epoch": 0.7230320699708455,
      "grad_norm": 0.22965008020401,
      "kl": 0.00463104248046875,
      "learning_rate": 1e-06,
      "loss": 0.0173,
      "num_tokens": 49401211.0,
      "reward": 0.5892857313156128,
      "reward_std": 0.1590273380279541,
      "rewards/verify_math_reward/mean": 0.5892857313156128,
      "rewards/verify_math_reward/std": 0.4930652976036072,
      "step": 310
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2120.0,
      "completions/mean_length": 709.6830444335938,
      "completions/mean_terminated_length": 648.1135864257812,
      "completions/min_length": 215.0,
      "completions/min_terminated_length": 215.0,
      "epoch": 0.7253644314868805,
      "grad_norm": 0.23763540387153625,
      "kl": 0.002582550048828125,
      "learning_rate": 1e-06,
      "loss": 0.0599,
      "num_tokens": 49584460.0,
      "reward": 0.5089285969734192,
      "reward_std": 0.26363420486450195,
      "rewards/verify_math_reward/mean": 0.5089285969734192,
      "rewards/verify_math_reward/std": 0.5010399222373962,
      "step": 311
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3078.0,
      "completions/mean_length": 564.5714721679688,
      "completions/mean_terminated_length": 532.7567749023438,
      "completions/min_length": 134.0,
      "completions/min_terminated_length": 134.0,
      "epoch": 0.7276967930029155,
      "grad_norm": 0.2912660837173462,
      "kl": 0.00273895263671875,
      "learning_rate": 1e-06,
      "loss": 0.0439,
      "num_tokens": 49730276.0,
      "reward": 0.6696428656578064,
      "reward_std": 0.2119242548942566,
      "rewards/verify_math_reward/mean": 0.6696428656578064,
      "rewards/verify_math_reward/std": 0.4713950753211975,
      "step": 312
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.004464285714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1249.0,
      "completions/mean_length": 503.732177734375,
      "completions/mean_terminated_length": 487.62335205078125,
      "completions/min_length": 136.0,
      "completions/min_terminated_length": 136.0,
      "epoch": 0.7300291545189505,
      "grad_norm": 0.32089799642562866,
      "kl": 0.0036163330078125,
      "learning_rate": 1e-06,
      "loss": 0.046,
      "num_tokens": 49867160.0,
      "reward": 0.6428571939468384,
      "reward_std": 0.2790871262550354,
      "rewards/verify_math_reward/mean": 0.6428571343421936,
      "rewards/verify_math_reward/std": 0.48023056983947754,
      "step": 313
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1756.0,
      "completions/mean_length": 626.1295166015625,
      "completions/mean_terminated_length": 594.869384765625,
      "completions/min_length": 180.0,
      "completions/min_terminated_length": 180.0,
      "epoch": 0.7323615160349854,
      "grad_norm": 0.27264782786369324,
      "kl": 0.002834320068359375,
      "learning_rate": 1e-06,
      "loss": 0.0178,
      "num_tokens": 50027325.0,
      "reward": 0.566964328289032,
      "reward_std": 0.2857242226600647,
      "rewards/verify_math_reward/mean": 0.5669642686843872,
      "rewards/verify_math_reward/std": 0.4966052174568176,
      "step": 314
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.004464285714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1992.0,
      "completions/mean_length": 570.0848388671875,
      "completions/mean_terminated_length": 554.2735595703125,
      "completions/min_length": 193.0,
      "completions/min_terminated_length": 193.0,
      "epoch": 0.7346938775510204,
      "grad_norm": 0.2196282595396042,
      "kl": 0.00276947021484375,
      "learning_rate": 1e-06,
      "loss": 0.0116,
      "num_tokens": 50177688.0,
      "reward": 0.65625,
      "reward_std": 0.18202301859855652,
      "rewards/verify_math_reward/mean": 0.65625,
      "rewards/verify_math_reward/std": 0.4760226309299469,
      "step": 315
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.03125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3677.0,
      "completions/mean_length": 708.263427734375,
      "completions/mean_terminated_length": 598.9815673828125,
      "completions/min_length": 171.0,
      "completions/min_terminated_length": 171.0,
      "epoch": 0.7370262390670554,
      "grad_norm": 0.23442630469799042,
      "kl": 0.0024261474609375,
      "learning_rate": 1e-06,
      "loss": 0.0027,
      "num_tokens": 50358803.0,
      "reward": 0.5401785969734192,
      "reward_std": 0.19929735362529755,
      "rewards/verify_math_reward/mean": 0.5401785969734192,
      "rewards/verify_math_reward/std": 0.49949929118156433,
      "step": 316
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3688.0,
      "completions/mean_length": 636.9330444335938,
      "completions/mean_terminated_length": 574.0408935546875,
      "completions/min_length": 143.0,
      "completions/min_terminated_length": 143.0,
      "epoch": 0.7393586005830903,
      "grad_norm": 0.29054102301597595,
      "kl": 0.00304412841796875,
      "learning_rate": 1e-06,
      "loss": 0.0479,
      "num_tokens": 50523668.0,
      "reward": 0.5178571939468384,
      "reward_std": 0.3070724606513977,
      "rewards/verify_math_reward/mean": 0.5178571343421936,
      "rewards/verify_math_reward/std": 0.5008001327514648,
      "step": 317
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3165.0,
      "completions/mean_length": 657.9642944335938,
      "completions/mean_terminated_length": 579.4702758789062,
      "completions/min_length": 159.0,
      "completions/min_terminated_length": 159.0,
      "epoch": 0.7416909620991253,
      "grad_norm": 0.23028461635112762,
      "kl": 0.002498626708984375,
      "learning_rate": 1e-06,
      "loss": 0.0329,
      "num_tokens": 50692412.0,
      "reward": 0.6428571939468384,
      "reward_std": 0.27743518352508545,
      "rewards/verify_math_reward/mean": 0.6428571343421936,
      "rewards/verify_math_reward/std": 0.48023054003715515,
      "step": 318
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.004464285714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3843.0,
      "completions/mean_length": 597.375,
      "completions/mean_terminated_length": 581.6861572265625,
      "completions/min_length": 111.0,
      "completions/min_terminated_length": 111.0,
      "epoch": 0.7440233236151603,
      "grad_norm": 0.22642745077610016,
      "kl": 0.00305938720703125,
      "learning_rate": 1e-06,
      "loss": 0.0023,
      "num_tokens": 50855136.0,
      "reward": 0.5267857313156128,
      "reward_std": 0.17720730602741241,
      "rewards/verify_math_reward/mean": 0.5267857313156128,
      "rewards/verify_math_reward/std": 0.500400185585022,
      "step": 319
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1671.0,
      "completions/mean_length": 528.3214721679688,
      "completions/mean_terminated_length": 496.1802062988281,
      "completions/min_length": 146.0,
      "completions/min_terminated_length": 146.0,
      "epoch": 0.7463556851311953,
      "grad_norm": 0.30777016282081604,
      "kl": 0.004055023193359375,
      "learning_rate": 1e-06,
      "loss": 0.0423,
      "num_tokens": 50991064.0,
      "reward": 0.6696428656578064,
      "reward_std": 0.2718849182128906,
      "rewards/verify_math_reward/mean": 0.6696428656578064,
      "rewards/verify_math_reward/std": 0.4713950753211975,
      "step": 320
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.004464285714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2417.0,
      "completions/mean_length": 614.4642944335938,
      "completions/mean_terminated_length": 598.85205078125,
      "completions/min_length": 70.0,
      "completions/min_terminated_length": 70.0,
      "epoch": 0.7486880466472303,
      "grad_norm": 0.2332543432712555,
      "kl": 0.003437042236328125,
      "learning_rate": 1e-06,
      "loss": -0.0139,
      "num_tokens": 51152456.0,
      "reward": 0.5267857313156128,
      "reward_std": 0.220338836312294,
      "rewards/verify_math_reward/mean": 0.5267857313156128,
      "rewards/verify_math_reward/std": 0.500400185585022,
      "step": 321
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3255.0,
      "completions/mean_length": 670.6964721679688,
      "completions/mean_terminated_length": 608.4181518554688,
      "completions/min_length": 111.0,
      "completions/min_terminated_length": 111.0,
      "epoch": 0.7510204081632653,
      "grad_norm": 0.24177144467830658,
      "kl": 0.00246429443359375,
      "learning_rate": 1e-06,
      "loss": 0.0426,
      "num_tokens": 51326092.0,
      "reward": 0.6517857313156128,
      "reward_std": 0.2458893060684204,
      "rewards/verify_math_reward/mean": 0.6517857313156128,
      "rewards/verify_math_reward/std": 0.47747132182121277,
      "step": 322
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3210.0,
      "completions/mean_length": 790.2813110351562,
      "completions/mean_terminated_length": 760.5,
      "completions/min_length": 227.0,
      "completions/min_terminated_length": 227.0,
      "epoch": 0.7533527696793003,
      "grad_norm": 0.18753404915332794,
      "kl": 0.002582550048828125,
      "learning_rate": 1e-06,
      "loss": 0.0022,
      "num_tokens": 51530483.0,
      "reward": 0.4062500298023224,
      "reward_std": 0.22094503045082092,
      "rewards/verify_math_reward/mean": 0.40625,
      "rewards/verify_math_reward/std": 0.4922322630882263,
      "step": 323
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1779.0,
      "completions/mean_length": 565.5848388671875,
      "completions/mean_terminated_length": 501.39544677734375,
      "completions/min_length": 169.0,
      "completions/min_terminated_length": 169.0,
      "epoch": 0.7556851311953353,
      "grad_norm": 0.21227282285690308,
      "kl": 0.00350189208984375,
      "learning_rate": 1e-06,
      "loss": 0.0346,
      "num_tokens": 51678470.0,
      "reward": 0.625,
      "reward_std": 0.12596234679222107,
      "rewards/verify_math_reward/mean": 0.625,
      "rewards/verify_math_reward/std": 0.4852071702480316,
      "step": 324
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2320.0,
      "completions/mean_length": 611.0982666015625,
      "completions/mean_terminated_length": 563.7918701171875,
      "completions/min_length": 159.0,
      "completions/min_terminated_length": 159.0,
      "epoch": 0.7580174927113703,
      "grad_norm": 0.20451150834560394,
      "kl": 0.002536773681640625,
      "learning_rate": 1e-06,
      "loss": 0.0086,
      "num_tokens": 51841772.0,
      "reward": 0.6116071939468384,
      "reward_std": 0.1237042024731636,
      "rewards/verify_math_reward/mean": 0.6116071343421936,
      "rewards/verify_math_reward/std": 0.4884762763977051,
      "step": 325
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2162.0,
      "completions/mean_length": 499.1562805175781,
      "completions/mean_terminated_length": 450.330322265625,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 0.7603498542274052,
      "grad_norm": 0.3030953109264374,
      "kl": 0.002872467041015625,
      "learning_rate": 1e-06,
      "loss": 0.0221,
      "num_tokens": 51969783.0,
      "reward": 0.6830357313156128,
      "reward_std": 0.24003510177135468,
      "rewards/verify_math_reward/mean": 0.6830357313156128,
      "rewards/verify_math_reward/std": 0.4663354456424713,
      "step": 326
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.044642857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3058.0,
      "completions/mean_length": 841.8482666015625,
      "completions/mean_terminated_length": 689.7850341796875,
      "completions/min_length": 199.0,
      "completions/min_terminated_length": 199.0,
      "epoch": 0.7626822157434402,
      "grad_norm": 0.1860758662223816,
      "kl": 0.00211334228515625,
      "learning_rate": 1e-06,
      "loss": 0.0311,
      "num_tokens": 52182957.0,
      "reward": 0.4464285969734192,
      "reward_std": 0.23235955834388733,
      "rewards/verify_math_reward/mean": 0.4464285671710968,
      "rewards/verify_math_reward/std": 0.49823519587516785,
      "step": 327
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0758928571428571,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2630.0,
      "completions/mean_length": 844.6607666015625,
      "completions/mean_terminated_length": 577.6425170898438,
      "completions/min_length": 180.0,
      "completions/min_terminated_length": 180.0,
      "epoch": 0.7650145772594752,
      "grad_norm": 0.24089130759239197,
      "kl": 0.002498626708984375,
      "learning_rate": 1e-06,
      "loss": 0.0652,
      "num_tokens": 52399113.0,
      "reward": 0.5625,
      "reward_std": 0.20320294797420502,
      "rewards/verify_math_reward/mean": 0.5625,
      "rewards/verify_math_reward/std": 0.49718940258026123,
      "step": 328
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.004464285714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1675.0,
      "completions/mean_length": 601.3705444335938,
      "completions/mean_terminated_length": 585.6995849609375,
      "completions/min_length": 174.0,
      "completions/min_terminated_length": 174.0,
      "epoch": 0.7673469387755102,
      "grad_norm": 0.2542576491832733,
      "kl": 0.002681732177734375,
      "learning_rate": 1e-06,
      "loss": 0.0195,
      "num_tokens": 52560980.0,
      "reward": 0.6160714626312256,
      "reward_std": 0.22771494090557098,
      "rewards/verify_math_reward/mean": 0.6160714030265808,
      "rewards/verify_math_reward/std": 0.48743006587028503,
      "step": 329
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3245.0,
      "completions/mean_length": 604.0357666015625,
      "completions/mean_terminated_length": 556.6334838867188,
      "completions/min_length": 136.0,
      "completions/min_terminated_length": 136.0,
      "epoch": 0.7696793002915452,
      "grad_norm": 0.2152654528617859,
      "kl": 0.0027008056640625,
      "learning_rate": 1e-06,
      "loss": -0.0111,
      "num_tokens": 52714860.0,
      "reward": 0.6875000596046448,
      "reward_std": 0.1979493498802185,
      "rewards/verify_math_reward/mean": 0.6875,
      "rewards/verify_math_reward/std": 0.4645504951477051,
      "step": 330
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.004464285714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2202.0,
      "completions/mean_length": 520.0357666015625,
      "completions/mean_terminated_length": 504.0000305175781,
      "completions/min_length": 120.0,
      "completions/min_terminated_length": 120.0,
      "epoch": 0.7720116618075802,
      "grad_norm": 0.15844669938087463,
      "kl": 0.003021240234375,
      "learning_rate": 1e-06,
      "loss": 0.0097,
      "num_tokens": 52849452.0,
      "reward": 0.6830357313156128,
      "reward_std": 0.1124253123998642,
      "rewards/verify_math_reward/mean": 0.6830357313156128,
      "rewards/verify_math_reward/std": 0.4663354754447937,
      "step": 331
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2081.0,
      "completions/mean_length": 697.1160888671875,
      "completions/mean_terminated_length": 635.3181762695312,
      "completions/min_length": 140.0,
      "completions/min_terminated_length": 140.0,
      "epoch": 0.7743440233236152,
      "grad_norm": 0.230719193816185,
      "kl": 0.002788543701171875,
      "learning_rate": 1e-06,
      "loss": 0.0148,
      "num_tokens": 53026982.0,
      "reward": 0.5178571939468384,
      "reward_std": 0.23235955834388733,
      "rewards/verify_math_reward/mean": 0.5178571343421936,
      "rewards/verify_math_reward/std": 0.5008001327514648,
      "step": 332
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2053.0,
      "completions/mean_length": 540.232177734375,
      "completions/mean_terminated_length": 508.1982116699219,
      "completions/min_length": 132.0,
      "completions/min_terminated_length": 132.0,
      "epoch": 0.7766763848396502,
      "grad_norm": 0.31468960642814636,
      "kl": 0.003276824951171875,
      "learning_rate": 1e-06,
      "loss": 0.0393,
      "num_tokens": 53170906.0,
      "reward": 0.5,
      "reward_std": 0.2818186283111572,
      "rewards/verify_math_reward/mean": 0.5,
      "rewards/verify_math_reward/std": 0.5011197924613953,
      "step": 333
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0357142857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2453.0,
      "completions/mean_length": 800.7857666015625,
      "completions/mean_terminated_length": 678.74072265625,
      "completions/min_length": 132.0,
      "completions/min_terminated_length": 132.0,
      "epoch": 0.7790087463556852,
      "grad_norm": 0.19736206531524658,
      "kl": 0.0021724700927734375,
      "learning_rate": 1e-06,
      "loss": 0.0094,
      "num_tokens": 53369754.0,
      "reward": 0.4508928656578064,
      "reward_std": 0.17525030672550201,
      "rewards/verify_math_reward/mean": 0.4508928656578064,
      "rewards/verify_math_reward/std": 0.49869707226753235,
      "step": 334
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.03125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2565.0,
      "completions/mean_length": 694.9420166015625,
      "completions/mean_terminated_length": 585.2304077148438,
      "completions/min_length": 167.0,
      "completions/min_terminated_length": 167.0,
      "epoch": 0.7813411078717201,
      "grad_norm": 0.24868535995483398,
      "kl": 0.002613067626953125,
      "learning_rate": 1e-06,
      "loss": 0.0726,
      "num_tokens": 53552533.0,
      "reward": 0.5223214626312256,
      "reward_std": 0.287075012922287,
      "rewards/verify_math_reward/mean": 0.5223214030265808,
      "rewards/verify_math_reward/std": 0.5006202459335327,
      "step": 335
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3976.0,
      "completions/mean_length": 700.9688110351562,
      "completions/mean_terminated_length": 639.2409057617188,
      "completions/min_length": 143.0,
      "completions/min_terminated_length": 143.0,
      "epoch": 0.7836734693877551,
      "grad_norm": 0.1996821165084839,
      "kl": 0.002552032470703125,
      "learning_rate": 1e-06,
      "loss": 0.0374,
      "num_tokens": 53735670.0,
      "reward": 0.4151785969734192,
      "reward_std": 0.19043760001659393,
      "rewards/verify_math_reward/mean": 0.4151785671710968,
      "rewards/verify_math_reward/std": 0.49385643005371094,
      "step": 336
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4073.0,
      "completions/mean_length": 610.6428833007812,
      "completions/mean_terminated_length": 579.2432861328125,
      "completions/min_length": 167.0,
      "completions/min_terminated_length": 167.0,
      "epoch": 0.7860058309037901,
      "grad_norm": 0.28694623708724976,
      "kl": 0.003108978271484375,
      "learning_rate": 1e-06,
      "loss": 0.0193,
      "num_tokens": 53892774.0,
      "reward": 0.6741071939468384,
      "reward_std": 0.24424465000629425,
      "rewards/verify_math_reward/mean": 0.6741071343421936,
      "rewards/verify_math_reward/std": 0.46975722908973694,
      "step": 337
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0267857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1700.0,
      "completions/mean_length": 700.9553833007812,
      "completions/mean_terminated_length": 607.5137329101562,
      "completions/min_length": 189.0,
      "completions/min_terminated_length": 189.0,
      "epoch": 0.7883381924198251,
      "grad_norm": 0.17995084822177887,
      "kl": 0.00258636474609375,
      "learning_rate": 1e-06,
      "loss": 0.0106,
      "num_tokens": 54071436.0,
      "reward": 0.4776785969734192,
      "reward_std": 0.09919221699237823,
      "rewards/verify_math_reward/mean": 0.4776785671710968,
      "rewards/verify_math_reward/std": 0.5006202459335327,
      "step": 338
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1759.0,
      "completions/mean_length": 565.1473388671875,
      "completions/mean_terminated_length": 533.3378295898438,
      "completions/min_length": 190.0,
      "completions/min_terminated_length": 190.0,
      "epoch": 0.79067055393586,
      "grad_norm": 0.2467787116765976,
      "kl": 0.00278472900390625,
      "learning_rate": 1e-06,
      "loss": 0.0555,
      "num_tokens": 54225077.0,
      "reward": 0.5267857313156128,
      "reward_std": 0.22228743135929108,
      "rewards/verify_math_reward/mean": 0.5267857313156128,
      "rewards/verify_math_reward/std": 0.500400185585022,
      "step": 339
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2036.0,
      "completions/mean_length": 630.3035888671875,
      "completions/mean_terminated_length": 551.1780395507812,
      "completions/min_length": 70.0,
      "completions/min_terminated_length": 70.0,
      "epoch": 0.793002915451895,
      "grad_norm": 0.2259238064289093,
      "kl": 0.003124237060546875,
      "learning_rate": 1e-06,
      "loss": 0.0273,
      "num_tokens": 54386977.0,
      "reward": 0.5178571939468384,
      "reward_std": 0.20306451618671417,
      "rewards/verify_math_reward/mean": 0.5178571343421936,
      "rewards/verify_math_reward/std": 0.5008001327514648,
      "step": 340
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 1990.0,
      "completions/max_terminated_length": 1990.0,
      "completions/mean_length": 512.0982666015625,
      "completions/mean_terminated_length": 512.0982666015625,
      "completions/min_length": 180.0,
      "completions/min_terminated_length": 180.0,
      "epoch": 0.79533527696793,
      "grad_norm": 0.27471014857292175,
      "kl": 0.002613067626953125,
      "learning_rate": 1e-06,
      "loss": 0.0011,
      "num_tokens": 54518279.0,
      "reward": 0.6875000596046448,
      "reward_std": 0.17300227284431458,
      "rewards/verify_math_reward/mean": 0.6875,
      "rewards/verify_math_reward/std": 0.4645504951477051,
      "step": 341
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 2337.0,
      "completions/max_terminated_length": 2337.0,
      "completions/mean_length": 499.3482360839844,
      "completions/mean_terminated_length": 499.3482360839844,
      "completions/min_length": 159.0,
      "completions/min_terminated_length": 159.0,
      "epoch": 0.797667638483965,
      "grad_norm": 0.2643197178840637,
      "kl": 0.00286102294921875,
      "learning_rate": 1e-06,
      "loss": 0.0033,
      "num_tokens": 54655029.0,
      "reward": 0.5491071939468384,
      "reward_std": 0.23191718757152557,
      "rewards/verify_math_reward/mean": 0.5491071343421936,
      "rewards/verify_math_reward/std": 0.49869707226753235,
      "step": 342
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2478.0,
      "completions/mean_length": 708.0357666015625,
      "completions/mean_terminated_length": 646.4363403320312,
      "completions/min_length": 96.0,
      "completions/min_terminated_length": 96.0,
      "epoch": 0.8,
      "grad_norm": 0.18370066583156586,
      "kl": 0.00308990478515625,
      "learning_rate": 1e-06,
      "loss": 0.0048,
      "num_tokens": 54831797.0,
      "reward": 0.6696428656578064,
      "reward_std": 0.20320293307304382,
      "rewards/verify_math_reward/mean": 0.6696428656578064,
      "rewards/verify_math_reward/std": 0.4713950753211975,
      "step": 343
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.004464285714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2742.0,
      "completions/mean_length": 681.419677734375,
      "completions/mean_terminated_length": 666.107666015625,
      "completions/min_length": 200.0,
      "completions/min_terminated_length": 200.0,
      "epoch": 0.8023323615160349,
      "grad_norm": 0.1940321922302246,
      "kl": 0.0029296875,
      "learning_rate": 1e-06,
      "loss": -0.0031,
      "num_tokens": 55011643.0,
      "reward": 0.486607164144516,
      "reward_std": 0.1460937112569809,
      "rewards/verify_math_reward/mean": 0.4866071343421936,
      "rewards/verify_math_reward/std": 0.5009400248527527,
      "step": 344
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 1981.0,
      "completions/max_terminated_length": 1981.0,
      "completions/mean_length": 555.7545166015625,
      "completions/mean_terminated_length": 555.7545166015625,
      "completions/min_length": 114.0,
      "completions/min_terminated_length": 114.0,
      "epoch": 0.8046647230320699,
      "grad_norm": 0.23973211646080017,
      "kl": 0.00304412841796875,
      "learning_rate": 1e-06,
      "loss": -0.0035,
      "num_tokens": 55158476.0,
      "reward": 0.625,
      "reward_std": 0.18397442996501923,
      "rewards/verify_math_reward/mean": 0.625,
      "rewards/verify_math_reward/std": 0.4852071702480316,
      "step": 345
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 1498.0,
      "completions/max_terminated_length": 1498.0,
      "completions/mean_length": 552.4642944335938,
      "completions/mean_terminated_length": 552.4642944335938,
      "completions/min_length": 164.0,
      "completions/min_terminated_length": 164.0,
      "epoch": 0.8069970845481049,
      "grad_norm": 0.2807849049568176,
      "kl": 0.00318145751953125,
      "learning_rate": 1e-06,
      "loss": 0.0363,
      "num_tokens": 55302796.0,
      "reward": 0.5803571939468384,
      "reward_std": 0.2693273723125458,
      "rewards/verify_math_reward/mean": 0.5803571343421936,
      "rewards/verify_math_reward/std": 0.49460574984550476,
      "step": 346
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.004464285714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1415.0,
      "completions/mean_length": 465.2500305175781,
      "completions/mean_terminated_length": 448.9686279296875,
      "completions/min_length": 97.0,
      "completions/min_terminated_length": 97.0,
      "epoch": 0.8093294460641399,
      "grad_norm": 0.255930095911026,
      "kl": 0.0030059814453125,
      "learning_rate": 1e-06,
      "loss": -0.0053,
      "num_tokens": 55422732.0,
      "reward": 0.6517857313156128,
      "reward_std": 0.19057324528694153,
      "rewards/verify_math_reward/mean": 0.6517857313156128,
      "rewards/verify_math_reward/std": 0.47747132182121277,
      "step": 347
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3885.0,
      "completions/mean_length": 597.4464721679688,
      "completions/mean_terminated_length": 533.8363647460938,
      "completions/min_length": 158.0,
      "completions/min_terminated_length": 158.0,
      "epoch": 0.8116618075801749,
      "grad_norm": 0.24181298911571503,
      "kl": 0.0027923583984375,
      "learning_rate": 1e-06,
      "loss": 0.0428,
      "num_tokens": 55578592.0,
      "reward": 0.4910714626312256,
      "reward_std": 0.1842811554670334,
      "rewards/verify_math_reward/mean": 0.4910714328289032,
      "rewards/verify_math_reward/std": 0.5010399222373962,
      "step": 348
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2376.0,
      "completions/mean_length": 526.0670166015625,
      "completions/mean_terminated_length": 477.6063537597656,
      "completions/min_length": 184.0,
      "completions/min_terminated_length": 184.0,
      "epoch": 0.8139941690962099,
      "grad_norm": 0.2987842261791229,
      "kl": 0.00356292724609375,
      "learning_rate": 1e-06,
      "loss": 0.0222,
      "num_tokens": 55714367.0,
      "reward": 0.6473214626312256,
      "reward_std": 0.25535523891448975,
      "rewards/verify_math_reward/mean": 0.6473214030265808,
      "rewards/verify_math_reward/std": 0.4788738191127777,
      "step": 349
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3106.0,
      "completions/mean_length": 552.8705444335938,
      "completions/mean_terminated_length": 520.950439453125,
      "completions/min_length": 165.0,
      "completions/min_terminated_length": 165.0,
      "epoch": 0.8163265306122449,
      "grad_norm": 0.238222137093544,
      "kl": 0.003093719482421875,
      "learning_rate": 1e-06,
      "loss": 0.0383,
      "num_tokens": 55858914.0,
      "reward": 0.5803571939468384,
      "reward_std": 0.20966331660747528,
      "rewards/verify_math_reward/mean": 0.5803571343421936,
      "rewards/verify_math_reward/std": 0.49460577964782715,
      "step": 350
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1920.0,
      "completions/mean_length": 685.4152221679688,
      "completions/mean_terminated_length": 607.5479125976562,
      "completions/min_length": 78.0,
      "completions/min_terminated_length": 78.0,
      "epoch": 0.8186588921282799,
      "grad_norm": 0.2661639153957367,
      "kl": 0.0029754638671875,
      "learning_rate": 1e-06,
      "loss": 0.035,
      "num_tokens": 56033735.0,
      "reward": 0.5,
      "reward_std": 0.24229323863983154,
      "rewards/verify_math_reward/mean": 0.5,
      "rewards/verify_math_reward/std": 0.5011197924613953,
      "step": 351
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1815.0,
      "completions/mean_length": 544.5089721679688,
      "completions/mean_terminated_length": 479.93634033203125,
      "completions/min_length": 84.0,
      "completions/min_terminated_length": 84.0,
      "epoch": 0.8209912536443149,
      "grad_norm": 0.2590837776660919,
      "kl": 0.0030975341796875,
      "learning_rate": 1e-06,
      "loss": 0.0209,
      "num_tokens": 56175849.0,
      "reward": 0.7098214626312256,
      "reward_std": 0.19538895785808563,
      "rewards/verify_math_reward/mean": 0.7098214030265808,
      "rewards/verify_math_reward/std": 0.4548610746860504,
      "step": 352
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3576.0,
      "completions/mean_length": 653.4866333007812,
      "completions/mean_terminated_length": 590.8954467773438,
      "completions/min_length": 176.0,
      "completions/min_terminated_length": 176.0,
      "epoch": 0.8233236151603499,
      "grad_norm": 0.1900016963481903,
      "kl": 0.002742767333984375,
      "learning_rate": 1e-06,
      "loss": 0.0306,
      "num_tokens": 56349542.0,
      "reward": 0.5401785969734192,
      "reward_std": 0.16006861627101898,
      "rewards/verify_math_reward/mean": 0.5401785969734192,
      "rewards/verify_math_reward/std": 0.49949926137924194,
      "step": 353
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0267857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2598.0,
      "completions/mean_length": 756.8035888671875,
      "completions/mean_terminated_length": 664.8990478515625,
      "completions/min_length": 172.0,
      "completions/min_terminated_length": 172.0,
      "epoch": 0.8256559766763848,
      "grad_norm": 0.2148529291152954,
      "kl": 0.0026702880859375,
      "learning_rate": 1e-06,
      "loss": 0.0581,
      "num_tokens": 56545306.0,
      "reward": 0.4776785969734192,
      "reward_std": 0.19508221745491028,
      "rewards/verify_math_reward/mean": 0.4776785671710968,
      "rewards/verify_math_reward/std": 0.5006202459335327,
      "step": 354
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1936.0,
      "completions/mean_length": 669.0670166015625,
      "completions/mean_terminated_length": 622.5475463867188,
      "completions/min_length": 183.0,
      "completions/min_terminated_length": 183.0,
      "epoch": 0.8279883381924198,
      "grad_norm": 0.2178030163049698,
      "kl": 0.003086090087890625,
      "learning_rate": 1e-06,
      "loss": 0.0271,
      "num_tokens": 56719521.0,
      "reward": 0.5223214626312256,
      "reward_std": 0.20996278524398804,
      "rewards/verify_math_reward/mean": 0.5223214030265808,
      "rewards/verify_math_reward/std": 0.5006201863288879,
      "step": 355
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 1532.0,
      "completions/max_terminated_length": 1532.0,
      "completions/mean_length": 479.5357360839844,
      "completions/mean_terminated_length": 479.5357360839844,
      "completions/min_length": 143.0,
      "completions/min_terminated_length": 143.0,
      "epoch": 0.8303206997084548,
      "grad_norm": 0.24629198014736176,
      "kl": 0.003513336181640625,
      "learning_rate": 1e-06,
      "loss": -0.0007,
      "num_tokens": 56850889.0,
      "reward": 0.629464328289032,
      "reward_std": 0.17464692890644073,
      "rewards/verify_math_reward/mean": 0.6294642686843872,
      "rewards/verify_math_reward/std": 0.4840298891067505,
      "step": 356
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1809.0,
      "completions/mean_length": 509.5089416503906,
      "completions/mean_terminated_length": 477.1982116699219,
      "completions/min_length": 188.0,
      "completions/min_terminated_length": 188.0,
      "epoch": 0.8326530612244898,
      "grad_norm": 0.22888772189617157,
      "kl": 0.002506256103515625,
      "learning_rate": 1e-06,
      "loss": 0.0567,
      "num_tokens": 56983659.0,
      "reward": 0.7366071939468384,
      "reward_std": 0.1847190409898758,
      "rewards/verify_math_reward/mean": 0.7366071343421936,
      "rewards/verify_math_reward/std": 0.44146019220352173,
      "step": 357
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1700.0,
      "completions/mean_length": 608.5223388671875,
      "completions/mean_terminated_length": 577.1036376953125,
      "completions/min_length": 146.0,
      "completions/min_terminated_length": 146.0,
      "epoch": 0.8349854227405248,
      "grad_norm": 0.24409568309783936,
      "kl": 0.002948760986328125,
      "learning_rate": 1e-06,
      "loss": 0.0112,
      "num_tokens": 57145352.0,
      "reward": 0.4375000298023224,
      "reward_std": 0.2051515281200409,
      "rewards/verify_math_reward/mean": 0.4375,
      "rewards/verify_math_reward/std": 0.49718940258026123,
      "step": 358
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2183.0,
      "completions/mean_length": 702.482177734375,
      "completions/mean_terminated_length": 656.4163208007812,
      "completions/min_length": 139.0,
      "completions/min_terminated_length": 139.0,
      "epoch": 0.8373177842565598,
      "grad_norm": 0.222054123878479,
      "kl": 0.0023956298828125,
      "learning_rate": 1e-06,
      "loss": 0.0358,
      "num_tokens": 57324172.0,
      "reward": 0.4910714626312256,
      "reward_std": 0.2526620328426361,
      "rewards/verify_math_reward/mean": 0.4910714328289032,
      "rewards/verify_math_reward/std": 0.5010399222373962,
      "step": 359
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1370.0,
      "completions/mean_length": 594.40625,
      "completions/mean_terminated_length": 562.8603515625,
      "completions/min_length": 160.0,
      "completions/min_terminated_length": 160.0,
      "epoch": 0.8396501457725948,
      "grad_norm": 0.2607190012931824,
      "kl": 0.002899169921875,
      "learning_rate": 1e-06,
      "loss": 0.0155,
      "num_tokens": 57479383.0,
      "reward": 0.6383928656578064,
      "reward_std": 0.1931336224079132,
      "rewards/verify_math_reward/mean": 0.6383928656578064,
      "rewards/verify_math_reward/std": 0.48154187202453613,
      "step": 360
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2589.0,
      "completions/mean_length": 615.5402221679688,
      "completions/mean_terminated_length": 536.0775756835938,
      "completions/min_length": 161.0,
      "completions/min_terminated_length": 161.0,
      "epoch": 0.8419825072886298,
      "grad_norm": 0.26173636317253113,
      "kl": 0.002773284912109375,
      "learning_rate": 1e-06,
      "loss": 0.0428,
      "num_tokens": 57640688.0,
      "reward": 0.5491071939468384,
      "reward_std": 0.22471500933170319,
      "rewards/verify_math_reward/mean": 0.5491071343421936,
      "rewards/verify_math_reward/std": 0.49869707226753235,
      "step": 361
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.03125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2059.0,
      "completions/mean_length": 764.3214721679688,
      "completions/mean_terminated_length": 656.847900390625,
      "completions/min_length": 232.0,
      "completions/min_terminated_length": 232.0,
      "epoch": 0.8443148688046648,
      "grad_norm": 0.240379199385643,
      "kl": 0.0029144287109375,
      "learning_rate": 1e-06,
      "loss": 0.0338,
      "num_tokens": 57834704.0,
      "reward": 0.5178571939468384,
      "reward_std": 0.22558683156967163,
      "rewards/verify_math_reward/mean": 0.5178571343421936,
      "rewards/verify_math_reward/std": 0.5008001327514648,
      "step": 362
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.004464285714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2574.0,
      "completions/mean_length": 563.4866333007812,
      "completions/mean_terminated_length": 547.645751953125,
      "completions/min_length": 160.0,
      "completions/min_terminated_length": 160.0,
      "epoch": 0.8466472303206997,
      "grad_norm": 0.24983163177967072,
      "kl": 0.003002166748046875,
      "learning_rate": 1e-06,
      "loss": 0.0069,
      "num_tokens": 57982949.0,
      "reward": 0.6160714626312256,
      "reward_std": 0.15872061252593994,
      "rewards/verify_math_reward/mean": 0.6160714030265808,
      "rewards/verify_math_reward/std": 0.4874300956726074,
      "step": 363
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1842.0,
      "completions/mean_length": 533.2767944335938,
      "completions/mean_terminated_length": 501.1802062988281,
      "completions/min_length": 176.0,
      "completions/min_terminated_length": 176.0,
      "epoch": 0.8489795918367347,
      "grad_norm": 0.25354379415512085,
      "kl": 0.003021240234375,
      "learning_rate": 1e-06,
      "loss": 0.0277,
      "num_tokens": 58122107.0,
      "reward": 0.7276785969734192,
      "reward_std": 0.1690966635942459,
      "rewards/verify_math_reward/mean": 0.7276785969734192,
      "rewards/verify_math_reward/std": 0.4461514353752136,
      "step": 364
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1712.0,
      "completions/mean_length": 599.28125,
      "completions/mean_terminated_length": 567.779296875,
      "completions/min_length": 195.0,
      "completions/min_terminated_length": 195.0,
      "epoch": 0.8513119533527697,
      "grad_norm": 0.23023010790348053,
      "kl": 0.002780914306640625,
      "learning_rate": 1e-06,
      "loss": 0.0328,
      "num_tokens": 58276594.0,
      "reward": 0.6830357313156128,
      "reward_std": 0.20320014655590057,
      "rewards/verify_math_reward/mean": 0.6830357313156128,
      "rewards/verify_math_reward/std": 0.4663354754447937,
      "step": 365
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1782.0,
      "completions/mean_length": 638.3170166015625,
      "completions/mean_terminated_length": 575.4500122070312,
      "completions/min_length": 101.0,
      "completions/min_terminated_length": 101.0,
      "epoch": 0.8536443148688047,
      "grad_norm": 0.23316563665866852,
      "kl": 0.003299713134765625,
      "learning_rate": 1e-06,
      "loss": -0.0065,
      "num_tokens": 58441009.0,
      "reward": 0.598214328289032,
      "reward_std": 0.18727383017539978,
      "rewards/verify_math_reward/mean": 0.5982142686843872,
      "rewards/verify_math_reward/std": 0.49135705828666687,
      "step": 366
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4090.0,
      "completions/mean_length": 627.5,
      "completions/mean_terminated_length": 564.4363403320312,
      "completions/min_length": 141.0,
      "completions/min_terminated_length": 141.0,
      "epoch": 0.8559766763848397,
      "grad_norm": 0.25327152013778687,
      "kl": 0.002445220947265625,
      "learning_rate": 1e-06,
      "loss": -0.0198,
      "num_tokens": 58603377.0,
      "reward": 0.7098214626312256,
      "reward_std": 0.14023670554161072,
      "rewards/verify_math_reward/mean": 0.7098214030265808,
      "rewards/verify_math_reward/std": 0.4548610746860504,
      "step": 367
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1967.0,
      "completions/mean_length": 703.9553833007812,
      "completions/mean_terminated_length": 642.2817993164062,
      "completions/min_length": 184.0,
      "completions/min_terminated_length": 184.0,
      "epoch": 0.8583090379008746,
      "grad_norm": 0.2600734233856201,
      "kl": 0.002674102783203125,
      "learning_rate": 1e-06,
      "loss": 0.001,
      "num_tokens": 58784655.0,
      "reward": 0.4955357313156128,
      "reward_std": 0.28737616539001465,
      "rewards/verify_math_reward/mean": 0.4955357015132904,
      "rewards/verify_math_reward/std": 0.5010998249053955,
      "step": 368
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1941.0,
      "completions/mean_length": 624.1116333007812,
      "completions/mean_terminated_length": 544.8447265625,
      "completions/min_length": 95.0,
      "completions/min_terminated_length": 95.0,
      "epoch": 0.8606413994169096,
      "grad_norm": 0.2903830409049988,
      "kl": 0.003284454345703125,
      "learning_rate": 1e-06,
      "loss": 0.0398,
      "num_tokens": 58949656.0,
      "reward": 0.5,
      "reward_std": 0.25730663537979126,
      "rewards/verify_math_reward/mean": 0.5,
      "rewards/verify_math_reward/std": 0.5011197924613953,
      "step": 369
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3677.0,
      "completions/mean_length": 633.2277221679688,
      "completions/mean_terminated_length": 586.2217407226562,
      "completions/min_length": 132.0,
      "completions/min_terminated_length": 132.0,
      "epoch": 0.8629737609329446,
      "grad_norm": 0.2585247755050659,
      "kl": 0.00289154052734375,
      "learning_rate": 1e-06,
      "loss": 0.0294,
      "num_tokens": 59116291.0,
      "reward": 0.5491071939468384,
      "reward_std": 0.29488059878349304,
      "rewards/verify_math_reward/mean": 0.5491071343421936,
      "rewards/verify_math_reward/std": 0.49869707226753235,
      "step": 370
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.004464285714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1987.0,
      "completions/mean_length": 554.4330444335938,
      "completions/mean_terminated_length": 538.5515747070312,
      "completions/min_length": 173.0,
      "completions/min_terminated_length": 173.0,
      "epoch": 0.8653061224489796,
      "grad_norm": 0.2985505759716034,
      "kl": 0.002960205078125,
      "learning_rate": 1e-06,
      "loss": 0.0832,
      "num_tokens": 59265532.0,
      "reward": 0.7142857313156128,
      "reward_std": 0.2944483458995819,
      "rewards/verify_math_reward/mean": 0.7142857313156128,
      "rewards/verify_math_reward/std": 0.45276570320129395,
      "step": 371
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.004464285714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1631.0,
      "completions/mean_length": 554.7902221679688,
      "completions/mean_terminated_length": 538.9103393554688,
      "completions/min_length": 129.0,
      "completions/min_terminated_length": 129.0,
      "epoch": 0.8676384839650145,
      "grad_norm": 0.2962406873703003,
      "kl": 0.002971649169921875,
      "learning_rate": 1e-06,
      "loss": 0.0279,
      "num_tokens": 59412301.0,
      "reward": 0.59375,
      "reward_std": 0.2864660322666168,
      "rewards/verify_math_reward/mean": 0.59375,
      "rewards/verify_math_reward/std": 0.4922322630882263,
      "step": 372
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3065.0,
      "completions/mean_length": 693.4420166015625,
      "completions/mean_terminated_length": 631.5772705078125,
      "completions/min_length": 207.0,
      "completions/min_terminated_length": 207.0,
      "epoch": 0.8699708454810495,
      "grad_norm": 0.20983396470546722,
      "kl": 0.00283050537109375,
      "learning_rate": 1e-06,
      "loss": -0.0164,
      "num_tokens": 59586864.0,
      "reward": 0.5401785969734192,
      "reward_std": 0.1976454108953476,
      "rewards/verify_math_reward/mean": 0.5401785969734192,
      "rewards/verify_math_reward/std": 0.49949926137924194,
      "step": 373
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2759.0,
      "completions/mean_length": 654.9152221679688,
      "completions/mean_terminated_length": 623.9144287109375,
      "completions/min_length": 153.0,
      "completions/min_terminated_length": 153.0,
      "epoch": 0.8723032069970845,
      "grad_norm": 0.19895663857460022,
      "kl": 0.0031280517578125,
      "learning_rate": 1e-06,
      "loss": 0.0163,
      "num_tokens": 59751733.0,
      "reward": 0.504464328289032,
      "reward_std": 0.1609787493944168,
      "rewards/verify_math_reward/mean": 0.5044642686843872,
      "rewards/verify_math_reward/std": 0.5010998249053955,
      "step": 374
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2510.0,
      "completions/mean_length": 584.3660888671875,
      "completions/mean_terminated_length": 536.6968383789062,
      "completions/min_length": 158.0,
      "completions/min_terminated_length": 158.0,
      "epoch": 0.8746355685131195,
      "grad_norm": 0.26324746012687683,
      "kl": 0.00290679931640625,
      "learning_rate": 1e-06,
      "loss": 0.0336,
      "num_tokens": 59901871.0,
      "reward": 0.6964285969734192,
      "reward_std": 0.20997007191181183,
      "rewards/verify_math_reward/mean": 0.6964285969734192,
      "rewards/verify_math_reward/std": 0.46082955598831177,
      "step": 375
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 1503.0,
      "completions/max_terminated_length": 1503.0,
      "completions/mean_length": 535.482177734375,
      "completions/mean_terminated_length": 535.482177734375,
      "completions/min_length": 123.0,
      "completions/min_terminated_length": 123.0,
      "epoch": 0.8769679300291545,
      "grad_norm": 0.2551153004169464,
      "kl": 0.003528594970703125,
      "learning_rate": 1e-06,
      "loss": 0.0186,
      "num_tokens": 60041931.0,
      "reward": 0.6071428656578064,
      "reward_std": 0.17659834027290344,
      "rewards/verify_math_reward/mean": 0.6071428656578064,
      "rewards/verify_math_reward/std": 0.48947930335998535,
      "step": 376
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.004464285714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3321.0,
      "completions/mean_length": 632.5535888671875,
      "completions/mean_terminated_length": 617.0224609375,
      "completions/min_length": 138.0,
      "completions/min_terminated_length": 138.0,
      "epoch": 0.8793002915451895,
      "grad_norm": 0.26995542645454407,
      "kl": 0.00276947021484375,
      "learning_rate": 1e-06,
      "loss": 0.0288,
      "num_tokens": 60206087.0,
      "reward": 0.59375,
      "reward_std": 0.26121222972869873,
      "rewards/verify_math_reward/mean": 0.59375,
      "rewards/verify_math_reward/std": 0.4922322630882263,
      "step": 377
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0267857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2294.0,
      "completions/mean_length": 668.0848388671875,
      "completions/mean_terminated_length": 573.738525390625,
      "completions/min_length": 143.0,
      "completions/min_terminated_length": 143.0,
      "epoch": 0.8816326530612245,
      "grad_norm": 0.2051822394132614,
      "kl": 0.00415802001953125,
      "learning_rate": 1e-06,
      "loss": 0.0126,
      "num_tokens": 60374274.0,
      "reward": 0.53125,
      "reward_std": 0.19330193102359772,
      "rewards/verify_math_reward/mean": 0.53125,
      "rewards/verify_math_reward/std": 0.5001401305198669,
      "step": 378
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3709.0,
      "completions/mean_length": 734.4642944335938,
      "completions/mean_terminated_length": 657.7168579101562,
      "completions/min_length": 151.0,
      "completions/min_terminated_length": 151.0,
      "epoch": 0.8839650145772595,
      "grad_norm": 0.2789381444454193,
      "kl": 0.002765655517578125,
      "learning_rate": 1e-06,
      "loss": 0.0213,
      "num_tokens": 60561146.0,
      "reward": 0.486607164144516,
      "reward_std": 0.2714526653289795,
      "rewards/verify_math_reward/mean": 0.4866071343421936,
      "rewards/verify_math_reward/std": 0.5009400248527527,
      "step": 379
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2681.0,
      "completions/mean_length": 639.8705444335938,
      "completions/mean_terminated_length": 592.9547729492188,
      "completions/min_length": 162.0,
      "completions/min_terminated_length": 162.0,
      "epoch": 0.8862973760932945,
      "grad_norm": 0.26450979709625244,
      "kl": 0.002971649169921875,
      "learning_rate": 1e-06,
      "loss": 0.0458,
      "num_tokens": 60728621.0,
      "reward": 0.5580357313156128,
      "reward_std": 0.204110249876976,
      "rewards/verify_math_reward/mean": 0.5580357313156128,
      "rewards/verify_math_reward/std": 0.49773266911506653,
      "step": 380
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1746.0,
      "completions/mean_length": 623.7098388671875,
      "completions/mean_terminated_length": 560.5772705078125,
      "completions/min_length": 152.0,
      "completions/min_terminated_length": 152.0,
      "epoch": 0.8886297376093294,
      "grad_norm": 0.27074259519577026,
      "kl": 0.00325775146484375,
      "learning_rate": 1e-06,
      "loss": 0.0264,
      "num_tokens": 60889812.0,
      "reward": 0.4821428656578064,
      "reward_std": 0.24003231525421143,
      "rewards/verify_math_reward/mean": 0.4821428656578064,
      "rewards/verify_math_reward/std": 0.5008001327514648,
      "step": 381
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.004464285714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2262.0,
      "completions/mean_length": 622.0848388671875,
      "completions/mean_terminated_length": 606.5067749023438,
      "completions/min_length": 166.0,
      "completions/min_terminated_length": 166.0,
      "epoch": 0.8909620991253644,
      "grad_norm": 0.2521023154258728,
      "kl": 0.003017425537109375,
      "learning_rate": 1e-06,
      "loss": 0.0203,
      "num_tokens": 61048655.0,
      "reward": 0.625,
      "reward_std": 0.24229323863983154,
      "rewards/verify_math_reward/mean": 0.625,
      "rewards/verify_math_reward/std": 0.4852071702480316,
      "step": 382
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.004464285714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3000.0,
      "completions/mean_length": 630.4152221679688,
      "completions/mean_terminated_length": 614.8744506835938,
      "completions/min_length": 137.0,
      "completions/min_terminated_length": 137.0,
      "epoch": 0.8932944606413994,
      "grad_norm": 0.279275506734848,
      "kl": 0.00324249267578125,
      "learning_rate": 1e-06,
      "loss": -0.0305,
      "num_tokens": 61211604.0,
      "reward": 0.566964328289032,
      "reward_std": 0.25851622223854065,
      "rewards/verify_math_reward/mean": 0.5669642686843872,
      "rewards/verify_math_reward/std": 0.49660524725914,
      "step": 383
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1758.0,
      "completions/mean_length": 584.9910888671875,
      "completions/mean_terminated_length": 553.3603515625,
      "completions/min_length": 170.0,
      "completions/min_terminated_length": 170.0,
      "epoch": 0.8956268221574344,
      "grad_norm": 0.24243345856666565,
      "kl": 0.0030517578125,
      "learning_rate": 1e-06,
      "loss": 0.0198,
      "num_tokens": 61363386.0,
      "reward": 0.6071428656578064,
      "reward_std": 0.2167326956987381,
      "rewards/verify_math_reward/mean": 0.6071428656578064,
      "rewards/verify_math_reward/std": 0.48947930335998535,
      "step": 384
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1989.0,
      "completions/mean_length": 601.34375,
      "completions/mean_terminated_length": 553.905029296875,
      "completions/min_length": 181.0,
      "completions/min_terminated_length": 181.0,
      "epoch": 0.8979591836734694,
      "grad_norm": 0.2959814965724945,
      "kl": 0.00304412841796875,
      "learning_rate": 1e-06,
      "loss": 0.0071,
      "num_tokens": 61525207.0,
      "reward": 0.5535714626312256,
      "reward_std": 0.2672403156757355,
      "rewards/verify_math_reward/mean": 0.5535714030265808,
      "rewards/verify_math_reward/std": 0.49823519587516785,
      "step": 385
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3316.0,
      "completions/mean_length": 669.3303833007812,
      "completions/mean_terminated_length": 622.8145141601562,
      "completions/min_length": 224.0,
      "completions/min_terminated_length": 224.0,
      "epoch": 0.9002915451895044,
      "grad_norm": 0.2504737973213196,
      "kl": 0.002788543701171875,
      "learning_rate": 1e-06,
      "loss": 0.0497,
      "num_tokens": 61702753.0,
      "reward": 0.5223214626312256,
      "reward_std": 0.23764583468437195,
      "rewards/verify_math_reward/mean": 0.5223214030265808,
      "rewards/verify_math_reward/std": 0.5006201863288879,
      "step": 386
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2066.0,
      "completions/mean_length": 622.4017944335938,
      "completions/mean_terminated_length": 591.1080932617188,
      "completions/min_length": 204.0,
      "completions/min_terminated_length": 204.0,
      "epoch": 0.9026239067055394,
      "grad_norm": 0.2649012804031372,
      "kl": 0.00327301025390625,
      "learning_rate": 1e-06,
      "loss": 0.0012,
      "num_tokens": 61858083.0,
      "reward": 0.6071428656578064,
      "reward_std": 0.2083180993795395,
      "rewards/verify_math_reward/mean": 0.6071428656578064,
      "rewards/verify_math_reward/std": 0.48947930335998535,
      "step": 387
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1904.0,
      "completions/mean_length": 659.0670166015625,
      "completions/mean_terminated_length": 596.5772705078125,
      "completions/min_length": 173.0,
      "completions/min_terminated_length": 173.0,
      "epoch": 0.9049562682215744,
      "grad_norm": 0.3432794213294983,
      "kl": 0.002933502197265625,
      "learning_rate": 1e-06,
      "loss": 0.0443,
      "num_tokens": 62026362.0,
      "reward": 0.59375,
      "reward_std": 0.21356894075870514,
      "rewards/verify_math_reward/mean": 0.59375,
      "rewards/verify_math_reward/std": 0.4922322630882263,
      "step": 388
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2138.0,
      "completions/mean_length": 688.9732666015625,
      "completions/mean_terminated_length": 611.1871948242188,
      "completions/min_length": 216.0,
      "completions/min_terminated_length": 216.0,
      "epoch": 0.9072886297376094,
      "grad_norm": 0.1855618804693222,
      "kl": 0.002826690673828125,
      "learning_rate": 1e-06,
      "loss": -0.0065,
      "num_tokens": 62203636.0,
      "reward": 0.53125,
      "reward_std": 0.17330171167850494,
      "rewards/verify_math_reward/mean": 0.53125,
      "rewards/verify_math_reward/std": 0.5001401305198669,
      "step": 389
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0267857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2124.0,
      "completions/mean_length": 750.3303833007812,
      "completions/mean_terminated_length": 658.2476806640625,
      "completions/min_length": 123.0,
      "completions/min_terminated_length": 123.0,
      "epoch": 0.9096209912536443,
      "grad_norm": 0.23227335512638092,
      "kl": 0.002773284912109375,
      "learning_rate": 1e-06,
      "loss": 0.0604,
      "num_tokens": 62392654.0,
      "reward": 0.4375000298023224,
      "reward_std": 0.19690078496932983,
      "rewards/verify_math_reward/mean": 0.4375,
      "rewards/verify_math_reward/std": 0.49718940258026123,
      "step": 390
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.03125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3690.0,
      "completions/mean_length": 678.8348388671875,
      "completions/mean_terminated_length": 568.6036987304688,
      "completions/min_length": 142.0,
      "completions/min_terminated_length": 142.0,
      "epoch": 0.9119533527696793,
      "grad_norm": 0.20150570571422577,
      "kl": 0.0031280517578125,
      "learning_rate": 1e-06,
      "loss": 0.0381,
      "num_tokens": 62564121.0,
      "reward": 0.5267857313156128,
      "reward_std": 0.17555983364582062,
      "rewards/verify_math_reward/mean": 0.5267857313156128,
      "rewards/verify_math_reward/std": 0.5004002451896667,
      "step": 391
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1775.0,
      "completions/mean_length": 669.5357666015625,
      "completions/mean_terminated_length": 623.0226440429688,
      "completions/min_length": 169.0,
      "completions/min_terminated_length": 169.0,
      "epoch": 0.9142857142857143,
      "grad_norm": 0.22798162698745728,
      "kl": 0.002750396728515625,
      "learning_rate": 1e-06,
      "loss": 0.0184,
      "num_tokens": 62736729.0,
      "reward": 0.4955357313156128,
      "reward_std": 0.22094503045082092,
      "rewards/verify_math_reward/mean": 0.4955357015132904,
      "rewards/verify_math_reward/std": 0.5010998249053955,
      "step": 392
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1920.0,
      "completions/mean_length": 544.7767944335938,
      "completions/mean_terminated_length": 496.5701599121094,
      "completions/min_length": 135.0,
      "completions/min_terminated_length": 135.0,
      "epoch": 0.9166180758017493,
      "grad_norm": 0.26611626148223877,
      "kl": 0.003292083740234375,
      "learning_rate": 1e-06,
      "loss": -0.0128,
      "num_tokens": 62883903.0,
      "reward": 0.4821428656578064,
      "reward_std": 0.23040536046028137,
      "rewards/verify_math_reward/mean": 0.4821428656578064,
      "rewards/verify_math_reward/std": 0.5008001327514648,
      "step": 393
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0580357142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3714.0,
      "completions/mean_length": 812.1607666015625,
      "completions/mean_terminated_length": 609.8388671875,
      "completions/min_length": 231.0,
      "completions/min_terminated_length": 231.0,
      "epoch": 0.9189504373177843,
      "grad_norm": 0.2215123325586319,
      "kl": 0.002330780029296875,
      "learning_rate": 1e-06,
      "loss": 0.0209,
      "num_tokens": 63089571.0,
      "reward": 0.4821428656578064,
      "reward_std": 0.23205281794071198,
      "rewards/verify_math_reward/mean": 0.4821428656578064,
      "rewards/verify_math_reward/std": 0.5008001327514648,
      "step": 394
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.03125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3498.0,
      "completions/mean_length": 793.4642944335938,
      "completions/mean_terminated_length": 686.9308471679688,
      "completions/min_length": 222.0,
      "completions/min_terminated_length": 222.0,
      "epoch": 0.9212827988338192,
      "grad_norm": 0.21604013442993164,
      "kl": 0.00289154052734375,
      "learning_rate": 1e-06,
      "loss": 0.009,
      "num_tokens": 63286203.0,
      "reward": 0.4687500298023224,
      "reward_std": 0.18276484310626984,
      "rewards/verify_math_reward/mean": 0.46875,
      "rewards/verify_math_reward/std": 0.5001401305198669,
      "step": 395
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.004464285714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2077.0,
      "completions/mean_length": 583.482177734375,
      "completions/mean_terminated_length": 567.73095703125,
      "completions/min_length": 146.0,
      "completions/min_terminated_length": 146.0,
      "epoch": 0.9236151603498542,
      "grad_norm": 0.2729056775569916,
      "kl": 0.003192901611328125,
      "learning_rate": 1e-06,
      "loss": 0.0881,
      "num_tokens": 63448335.0,
      "reward": 0.5625,
      "reward_std": 0.24815024435520172,
      "rewards/verify_math_reward/mean": 0.5625,
      "rewards/verify_math_reward/std": 0.49718940258026123,
      "step": 396
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1804.0,
      "completions/mean_length": 638.8660888671875,
      "completions/mean_terminated_length": 559.93603515625,
      "completions/min_length": 177.0,
      "completions/min_terminated_length": 177.0,
      "epoch": 0.9259475218658892,
      "grad_norm": 0.2541603744029999,
      "kl": 0.00305938720703125,
      "learning_rate": 1e-06,
      "loss": 0.0335,
      "num_tokens": 63612353.0,
      "reward": 0.5803571939468384,
      "reward_std": 0.2024611085653305,
      "rewards/verify_math_reward/mean": 0.5803571343421936,
      "rewards/verify_math_reward/std": 0.49460577964782715,
      "step": 397
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2310.0,
      "completions/mean_length": 642.1205444335938,
      "completions/mean_terminated_length": 595.2352905273438,
      "completions/min_length": 149.0,
      "completions/min_terminated_length": 149.0,
      "epoch": 0.9282798833819242,
      "grad_norm": 0.24556109309196472,
      "kl": 0.0027008056640625,
      "learning_rate": 1e-06,
      "loss": 0.0174,
      "num_tokens": 63779372.0,
      "reward": 0.5446428656578064,
      "reward_std": 0.19539175927639008,
      "rewards/verify_math_reward/mean": 0.5446428656578064,
      "rewards/verify_math_reward/std": 0.4991183578968048,
      "step": 398
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0267857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2991.0,
      "completions/mean_length": 755.2053833007812,
      "completions/mean_terminated_length": 663.2568359375,
      "completions/min_length": 144.0,
      "completions/min_terminated_length": 144.0,
      "epoch": 0.9306122448979591,
      "grad_norm": 0.22407422959804535,
      "kl": 0.002590179443359375,
      "learning_rate": 1e-06,
      "loss": 0.0852,
      "num_tokens": 63972642.0,
      "reward": 0.5535714626312256,
      "reward_std": 0.23673290014266968,
      "rewards/verify_math_reward/mean": 0.5535714030265808,
      "rewards/verify_math_reward/std": 0.49823519587516785,
      "step": 399
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3997.0,
      "completions/mean_length": 624.4642944335938,
      "completions/mean_terminated_length": 577.3394165039062,
      "completions/min_length": 150.0,
      "completions/min_terminated_length": 150.0,
      "epoch": 0.9329446064139941,
      "grad_norm": 0.2202650010585785,
      "kl": 0.002841949462890625,
      "learning_rate": 1e-06,
      "loss": 0.0229,
      "num_tokens": 64134586.0,
      "reward": 0.6026785969734192,
      "reward_std": 0.19642741978168488,
      "rewards/verify_math_reward/mean": 0.6026785969734192,
      "rewards/verify_math_reward/std": 0.4904395043849945,
      "step": 400
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 1578.0,
      "completions/max_terminated_length": 1578.0,
      "completions/mean_length": 544.9241333007812,
      "completions/mean_terminated_length": 544.9241333007812,
      "completions/min_length": 198.0,
      "completions/min_terminated_length": 198.0,
      "epoch": 0.9352769679300291,
      "grad_norm": 0.3074655830860138,
      "kl": 0.003437042236328125,
      "learning_rate": 1e-06,
      "loss": 0.0258,
      "num_tokens": 64276137.0,
      "reward": 0.6741071939468384,
      "reward_std": 0.2475440502166748,
      "rewards/verify_math_reward/mean": 0.6741071343421936,
      "rewards/verify_math_reward/std": 0.46975722908973694,
      "step": 401
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2308.0,
      "completions/mean_length": 660.6160888671875,
      "completions/mean_terminated_length": 598.154541015625,
      "completions/min_length": 160.0,
      "completions/min_terminated_length": 160.0,
      "epoch": 0.9376093294460641,
      "grad_norm": 0.20243020355701447,
      "kl": 0.002689361572265625,
      "learning_rate": 1e-06,
      "loss": 0.0038,
      "num_tokens": 64452179.0,
      "reward": 0.504464328289032,
      "reward_std": 0.15060995519161224,
      "rewards/verify_math_reward/mean": 0.5044642686843872,
      "rewards/verify_math_reward/std": 0.5010998845100403,
      "step": 402
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.03125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3675.0,
      "completions/mean_length": 662.9553833007812,
      "completions/mean_terminated_length": 552.2119750976562,
      "completions/min_length": 132.0,
      "completions/min_terminated_length": 132.0,
      "epoch": 0.9399416909620991,
      "grad_norm": 0.25982972979545593,
      "kl": 0.0028228759765625,
      "learning_rate": 1e-06,
      "loss": -0.0118,
      "num_tokens": 64619345.0,
      "reward": 0.5714285969734192,
      "reward_std": 0.2445441037416458,
      "rewards/verify_math_reward/mean": 0.5714285969734192,
      "rewards/verify_math_reward/std": 0.49597999453544617,
      "step": 403
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.004464285714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1920.0,
      "completions/mean_length": 628.4330444335938,
      "completions/mean_terminated_length": 612.8834228515625,
      "completions/min_length": 88.0,
      "completions/min_terminated_length": 88.0,
      "epoch": 0.9422740524781341,
      "grad_norm": 0.253924697637558,
      "kl": 0.00443267822265625,
      "learning_rate": 1e-06,
      "loss": 0.0395,
      "num_tokens": 64779418.0,
      "reward": 0.5267857313156128,
      "reward_std": 0.2576133608818054,
      "rewards/verify_math_reward/mean": 0.5267857313156128,
      "rewards/verify_math_reward/std": 0.500400185585022,
      "step": 404
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 2335.0,
      "completions/max_terminated_length": 2335.0,
      "completions/mean_length": 614.3214721679688,
      "completions/mean_terminated_length": 614.3214721679688,
      "completions/min_length": 170.0,
      "completions/min_terminated_length": 170.0,
      "epoch": 0.9446064139941691,
      "grad_norm": 0.25928112864494324,
      "kl": 0.0035400390625,
      "learning_rate": 1e-06,
      "loss": -0.0117,
      "num_tokens": 64947074.0,
      "reward": 0.4196428656578064,
      "reward_std": 0.22094222903251648,
      "rewards/verify_math_reward/mean": 0.4196428656578064,
      "rewards/verify_math_reward/std": 0.49460577964782715,
      "step": 405
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1774.0,
      "completions/mean_length": 612.875,
      "completions/mean_terminated_length": 565.5927734375,
      "completions/min_length": 130.0,
      "completions/min_terminated_length": 130.0,
      "epoch": 0.9469387755102041,
      "grad_norm": 0.2391643524169922,
      "kl": 0.00316619873046875,
      "learning_rate": 1e-06,
      "loss": 0.0207,
      "num_tokens": 65105822.0,
      "reward": 0.5803571939468384,
      "reward_std": 0.18262921273708344,
      "rewards/verify_math_reward/mean": 0.5803571343421936,
      "rewards/verify_math_reward/std": 0.49460574984550476,
      "step": 406
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1488.0,
      "completions/mean_length": 568.1160888671875,
      "completions/mean_terminated_length": 536.3333740234375,
      "completions/min_length": 109.0,
      "completions/min_terminated_length": 109.0,
      "epoch": 0.9492711370262391,
      "grad_norm": 0.2419482320547104,
      "kl": 0.003387451171875,
      "learning_rate": 1e-06,
      "loss": 0.027,
      "num_tokens": 65263992.0,
      "reward": 0.65625,
      "reward_std": 0.1892252415418625,
      "rewards/verify_math_reward/mean": 0.65625,
      "rewards/verify_math_reward/std": 0.4760226309299469,
      "step": 407
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2297.0,
      "completions/mean_length": 648.419677734375,
      "completions/mean_terminated_length": 601.6199340820312,
      "completions/min_length": 142.0,
      "completions/min_terminated_length": 142.0,
      "epoch": 0.9516034985422741,
      "grad_norm": 0.20338350534439087,
      "kl": 0.002971649169921875,
      "learning_rate": 1e-06,
      "loss": 0.0419,
      "num_tokens": 65426726.0,
      "reward": 0.5446428656578064,
      "reward_std": 0.17720730602741241,
      "rewards/verify_math_reward/mean": 0.5446428656578064,
      "rewards/verify_math_reward/std": 0.4991183578968048,
      "step": 408
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1894.0,
      "completions/mean_length": 640.0402221679688,
      "completions/mean_terminated_length": 608.9053955078125,
      "completions/min_length": 92.0,
      "completions/min_terminated_length": 92.0,
      "epoch": 0.953935860058309,
      "grad_norm": 0.23347207903862,
      "kl": 0.003124237060546875,
      "learning_rate": 1e-06,
      "loss": 0.0371,
      "num_tokens": 65593551.0,
      "reward": 0.5625,
      "reward_std": 0.20124875009059906,
      "rewards/verify_math_reward/mean": 0.5625,
      "rewards/verify_math_reward/std": 0.49718940258026123,
      "step": 409
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0357142857142857,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2335.0,
      "completions/mean_length": 710.607177734375,
      "completions/mean_terminated_length": 585.2222290039062,
      "completions/min_length": 187.0,
      "completions/min_terminated_length": 187.0,
      "epoch": 0.956268221574344,
      "grad_norm": 0.16663672029972076,
      "kl": 0.0030059814453125,
      "learning_rate": 1e-06,
      "loss": 0.0122,
      "num_tokens": 65770463.0,
      "reward": 0.5223214626312256,
      "reward_std": 0.15225742757320404,
      "rewards/verify_math_reward/mean": 0.5223214030265808,
      "rewards/verify_math_reward/std": 0.5006201863288879,
      "step": 410
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3761.0,
      "completions/mean_length": 804.919677734375,
      "completions/mean_terminated_length": 775.270263671875,
      "completions/min_length": 162.0,
      "completions/min_terminated_length": 162.0,
      "epoch": 0.958600583090379,
      "grad_norm": 0.21586303412914276,
      "kl": 0.002864837646484375,
      "learning_rate": 1e-06,
      "loss": -0.0089,
      "num_tokens": 65971501.0,
      "reward": 0.4687500298023224,
      "reward_std": 0.19525612890720367,
      "rewards/verify_math_reward/mean": 0.46875,
      "rewards/verify_math_reward/std": 0.5001401305198669,
      "step": 411
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.004464285714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3432.0,
      "completions/mean_length": 623.107177734375,
      "completions/mean_terminated_length": 607.53369140625,
      "completions/min_length": 150.0,
      "completions/min_terminated_length": 150.0,
      "epoch": 0.960932944606414,
      "grad_norm": 0.19395920634269714,
      "kl": 0.0031585693359375,
      "learning_rate": 1e-06,
      "loss": 0.0115,
      "num_tokens": 66136613.0,
      "reward": 0.4821428656578064,
      "reward_std": 0.1717798113822937,
      "rewards/verify_math_reward/mean": 0.4821428656578064,
      "rewards/verify_math_reward/std": 0.5008001327514648,
      "step": 412
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1551.0,
      "completions/mean_length": 620.6517944335938,
      "completions/mean_terminated_length": 589.3423461914062,
      "completions/min_length": 194.0,
      "completions/min_terminated_length": 194.0,
      "epoch": 0.963265306122449,
      "grad_norm": 0.2274623066186905,
      "kl": 0.0033721923828125,
      "learning_rate": 1e-06,
      "loss": 0.021,
      "num_tokens": 66300647.0,
      "reward": 0.486607164144516,
      "reward_std": 0.20216168463230133,
      "rewards/verify_math_reward/mean": 0.4866071343421936,
      "rewards/verify_math_reward/std": 0.5009400248527527,
      "step": 413
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1640.0,
      "completions/mean_length": 594.7857666015625,
      "completions/mean_terminated_length": 563.2432861328125,
      "completions/min_length": 194.0,
      "completions/min_terminated_length": 194.0,
      "epoch": 0.965597667638484,
      "grad_norm": 0.26743146777153015,
      "kl": 0.002994537353515625,
      "learning_rate": 1e-06,
      "loss": 0.0261,
      "num_tokens": 66453927.0,
      "reward": 0.6830357313156128,
      "reward_std": 0.24333453178405762,
      "rewards/verify_math_reward/mean": 0.6830357313156128,
      "rewards/verify_math_reward/std": 0.4663354456424713,
      "step": 414
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3358.0,
      "completions/mean_length": 651.0535888671875,
      "completions/mean_terminated_length": 604.2896118164062,
      "completions/min_length": 85.0,
      "completions/min_terminated_length": 85.0,
      "epoch": 0.967930029154519,
      "grad_norm": 0.2531168758869171,
      "kl": 0.002796173095703125,
      "learning_rate": 1e-06,
      "loss": -0.0073,
      "num_tokens": 66620363.0,
      "reward": 0.504464328289032,
      "reward_std": 0.25356483459472656,
      "rewards/verify_math_reward/mean": 0.5044642686843872,
      "rewards/verify_math_reward/std": 0.5010998845100403,
      "step": 415
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.004464285714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2555.0,
      "completions/mean_length": 543.40625,
      "completions/mean_terminated_length": 527.475341796875,
      "completions/min_length": 134.0,
      "completions/min_terminated_length": 134.0,
      "epoch": 0.970262390670554,
      "grad_norm": 0.30833593010902405,
      "kl": 0.00341796875,
      "learning_rate": 1e-06,
      "loss": 0.0459,
      "num_tokens": 66769374.0,
      "reward": 0.6205357313156128,
      "reward_std": 0.27639955282211304,
      "rewards/verify_math_reward/mean": 0.6205357313156128,
      "rewards/verify_math_reward/std": 0.4863404929637909,
      "step": 416
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4086.0,
      "completions/mean_length": 561.4285888671875,
      "completions/mean_terminated_length": 529.5855712890625,
      "completions/min_length": 199.0,
      "completions/min_terminated_length": 199.0,
      "epoch": 0.972594752186589,
      "grad_norm": 0.2585616707801819,
      "kl": 0.0034637451171875,
      "learning_rate": 1e-06,
      "loss": 0.0176,
      "num_tokens": 66920702.0,
      "reward": 0.53125,
      "reward_std": 0.2125304490327835,
      "rewards/verify_math_reward/mean": 0.53125,
      "rewards/verify_math_reward/std": 0.5001401305198669,
      "step": 417
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0267857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 4018.0,
      "completions/mean_length": 698.6830444335938,
      "completions/mean_terminated_length": 605.1788940429688,
      "completions/min_length": 175.0,
      "completions/min_terminated_length": 175.0,
      "epoch": 0.9749271137026239,
      "grad_norm": 0.2406994253396988,
      "kl": 0.002899169921875,
      "learning_rate": 1e-06,
      "loss": 0.041,
      "num_tokens": 67100599.0,
      "reward": 0.4598214626312256,
      "reward_std": 0.21777845919132233,
      "rewards/verify_math_reward/mean": 0.4598214328289032,
      "rewards/verify_math_reward/std": 0.49949926137924194,
      "step": 418
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.008928571428571397,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2071.0,
      "completions/mean_length": 579.0089721679688,
      "completions/mean_terminated_length": 547.3243408203125,
      "completions/min_length": 137.0,
      "completions/min_terminated_length": 137.0,
      "epoch": 0.9772594752186589,
      "grad_norm": 0.30493125319480896,
      "kl": 0.003597259521484375,
      "learning_rate": 1e-06,
      "loss": 0.0291,
      "num_tokens": 67250985.0,
      "reward": 0.5758928656578064,
      "reward_std": 0.26663413643836975,
      "rewards/verify_math_reward/mean": 0.5758928656578064,
      "rewards/verify_math_reward/std": 0.4953135848045349,
      "step": 419
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.004464285714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1407.0,
      "completions/mean_length": 474.0535888671875,
      "completions/mean_terminated_length": 457.8116760253906,
      "completions/min_length": 113.0,
      "completions/min_terminated_length": 113.0,
      "epoch": 0.9795918367346939,
      "grad_norm": 0.3409460186958313,
      "kl": 0.00598907470703125,
      "learning_rate": 1e-06,
      "loss": 0.0225,
      "num_tokens": 67375421.0,
      "reward": 0.5803571939468384,
      "reward_std": 0.2634703516960144,
      "rewards/verify_math_reward/mean": 0.5803571343421936,
      "rewards/verify_math_reward/std": 0.49460574984550476,
      "step": 420
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.022321428571428603,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2122.0,
      "completions/mean_length": 596.3616333007812,
      "completions/mean_terminated_length": 516.461181640625,
      "completions/min_length": 180.0,
      "completions/min_terminated_length": 180.0,
      "epoch": 0.9819241982507289,
      "grad_norm": 0.1891840696334839,
      "kl": 0.0029144287109375,
      "learning_rate": 1e-06,
      "loss": 0.025,
      "num_tokens": 67530294.0,
      "reward": 0.5848214626312256,
      "reward_std": 0.16592560708522797,
      "rewards/verify_math_reward/mean": 0.5848214030265808,
      "rewards/verify_math_reward/std": 0.49385640025138855,
      "step": 421
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0,
      "completions/max_length": 2185.0,
      "completions/max_terminated_length": 2185.0,
      "completions/mean_length": 633.7723388671875,
      "completions/mean_terminated_length": 633.7723388671875,
      "completions/min_length": 136.0,
      "completions/min_terminated_length": 136.0,
      "epoch": 0.9842565597667638,
      "grad_norm": 0.2414131462574005,
      "kl": 0.003475189208984375,
      "learning_rate": 1e-06,
      "loss": 0.0227,
      "num_tokens": 67693523.0,
      "reward": 0.5133928656578064,
      "reward_std": 0.163971409201622,
      "rewards/verify_math_reward/mean": 0.5133928656578064,
      "rewards/verify_math_reward/std": 0.5009400248527527,
      "step": 422
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.013392857142857095,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 1701.0,
      "completions/mean_length": 559.9330444335938,
      "completions/mean_terminated_length": 511.9321594238281,
      "completions/min_length": 147.0,
      "completions/min_terminated_length": 147.0,
      "epoch": 0.9865889212827988,
      "grad_norm": 0.2653488516807556,
      "kl": 0.00396728515625,
      "learning_rate": 1e-06,
      "loss": 0.0089,
      "num_tokens": 67838772.0,
      "reward": 0.5625,
      "reward_std": 0.22289641201496124,
      "rewards/verify_math_reward/mean": 0.5625,
      "rewards/verify_math_reward/std": 0.49718940258026123,
      "step": 423
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0267857142857143,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3941.0,
      "completions/mean_length": 705.4955444335938,
      "completions/mean_terminated_length": 612.1788940429688,
      "completions/min_length": 182.0,
      "completions/min_terminated_length": 182.0,
      "epoch": 0.9889212827988338,
      "grad_norm": 0.20497190952301025,
      "kl": 0.003063201904296875,
      "learning_rate": 1e-06,
      "loss": 0.0279,
      "num_tokens": 68016619.0,
      "reward": 0.5223214626312256,
      "reward_std": 0.19404374063014984,
      "rewards/verify_math_reward/mean": 0.5223214030265808,
      "rewards/verify_math_reward/std": 0.5006201863288879,
      "step": 424
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.03125,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3048.0,
      "completions/mean_length": 651.3482666015625,
      "completions/mean_terminated_length": 540.2304077148438,
      "completions/min_length": 117.0,
      "completions/min_terminated_length": 117.0,
      "epoch": 0.9912536443148688,
      "grad_norm": 0.24258698523044586,
      "kl": 0.00313568115234375,
      "learning_rate": 1e-06,
      "loss": 0.035,
      "num_tokens": 68190953.0,
      "reward": 0.4642857313156128,
      "reward_std": 0.20306451618671417,
      "rewards/verify_math_reward/mean": 0.4642857015132904,
      "rewards/verify_math_reward/std": 0.49983978271484375,
      "step": 425
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.017857142857142905,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 3861.0,
      "completions/mean_length": 570.625,
      "completions/mean_terminated_length": 506.5272521972656,
      "completions/min_length": 168.0,
      "completions/min_terminated_length": 168.0,
      "epoch": 0.9935860058309038,
      "grad_norm": 0.29159656167030334,
      "kl": 0.00347900390625,
      "learning_rate": 1e-06,
      "loss": 0.043,
      "num_tokens": 68338653.0,
      "reward": 0.5491071939468384,
      "reward_std": 0.23296575248241425,
      "rewards/verify_math_reward/mean": 0.5491071343421936,
      "rewards/verify_math_reward/std": 0.49869707226753235,
      "step": 426
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.004464285714285698,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2233.0,
      "completions/mean_length": 618.0982666015625,
      "completions/mean_terminated_length": 602.5022583007812,
      "completions/min_length": 187.0,
      "completions/min_terminated_length": 187.0,
      "epoch": 0.9959183673469387,
      "grad_norm": 0.2417595386505127,
      "kl": 0.004085540771484375,
      "learning_rate": 1e-06,
      "loss": 0.0389,
      "num_tokens": 68498275.0,
      "reward": 0.5758928656578064,
      "reward_std": 0.23387587070465088,
      "rewards/verify_math_reward/mean": 0.5758928656578064,
      "rewards/verify_math_reward/std": 0.4953135550022125,
      "step": 427
    },
    {
      "clip_ratio/high_max": 0.0,
      "clip_ratio/high_mean": 0.0,
      "clip_ratio/low_mean": 0.0,
      "clip_ratio/low_min": 0.0,
      "clip_ratio/region_mean": 0.0,
      "completions/clipped_ratio": 0.0234375,
      "completions/max_length": 4096.0,
      "completions/max_terminated_length": 2932.0,
      "completions/mean_length": 703.9296875,
      "completions/mean_terminated_length": 622.52001953125,
      "completions/min_length": 146.0,
      "completions/min_terminated_length": 146.0,
      "epoch": 0.9982507288629737,
      "grad_norm": 0.25353655219078064,
      "kl": 0.00315093994140625,
      "learning_rate": 1e-06,
      "loss": 0.0298,
      "num_tokens": 68666795.0,
      "reward": 0.5178571939468384,
      "reward_std": 0.22424164414405823,
      "rewards/verify_math_reward/mean": 0.5178571343421936,
      "rewards/verify_math_reward/std": 0.5008001327514648,
      "step": 428
    },
    {
      "epoch": 0.9982507288629737,
      "step": 428,
      "total_flos": 0.0,
      "train_loss": 0.025071051198389125,
      "train_runtime": 12494.3327,
      "train_samples_per_second": 0.96,
      "train_steps_per_second": 0.034
    }
  ],
  "logging_steps": 1,
  "max_steps": 428,
  "num_input_tokens_seen": 68666795,
  "num_train_epochs": 1,
  "save_steps": 32,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 0.0,
  "train_batch_size": 8,
  "trial_name": null,
  "trial_params": null
}