|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.0074413989830088055, |
|
"eval_steps": 500, |
|
"global_step": 300, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1160.0, |
|
"completions/mean_terminated_length": 1160.0, |
|
"completions/min_length": 1159.0, |
|
"completions/min_terminated_length": 1159.0, |
|
"epoch": 2.480466327669602e-05, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0, |
|
"loss": 0.0, |
|
"num_tokens": 3234.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1160.5, |
|
"completions/mean_terminated_length": 1160.5, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 4.960932655339204e-05, |
|
"grad_norm": 3.5991625785827637, |
|
"learning_rate": 3.3333333333333334e-08, |
|
"loss": 0.0002, |
|
"num_tokens": 6453.0, |
|
"reward": 0.5, |
|
"reward_std": 0.7071067690849304, |
|
"rewards/accuracy_reward/mean": 0.5, |
|
"rewards/accuracy_reward/std": 0.7071067690849304, |
|
"step": 2 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1160.5, |
|
"completions/mean_terminated_length": 1160.5, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 7.441398983008806e-05, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.666666666666667e-08, |
|
"loss": 0.0, |
|
"num_tokens": 9684.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 3 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1160.0, |
|
"completions/max_terminated_length": 1160.0, |
|
"completions/mean_length": 1159.5, |
|
"completions/mean_terminated_length": 1159.5, |
|
"completions/min_length": 1159.0, |
|
"completions/min_terminated_length": 1159.0, |
|
"epoch": 9.921865310678408e-05, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-07, |
|
"loss": 0.0, |
|
"num_tokens": 13001.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 4 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1160.5, |
|
"completions/mean_terminated_length": 1160.5, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.00012402331638348009, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.3333333333333334e-07, |
|
"loss": 0.0, |
|
"num_tokens": 16442.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 1.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 5 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 768.0, |
|
"completions/max_terminated_length": 768.0, |
|
"completions/mean_length": 714.5, |
|
"completions/mean_terminated_length": 714.5, |
|
"completions/min_length": 661.0, |
|
"completions/min_terminated_length": 661.0, |
|
"epoch": 0.00014882797966017612, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.6666666666666665e-07, |
|
"loss": 0.0, |
|
"num_tokens": 18685.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 1.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 6 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1037.0, |
|
"completions/max_terminated_length": 1037.0, |
|
"completions/mean_length": 994.5, |
|
"completions/mean_terminated_length": 994.5, |
|
"completions/min_length": 952.0, |
|
"completions/min_terminated_length": 952.0, |
|
"epoch": 0.00017363264293687212, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2e-07, |
|
"loss": 0.0, |
|
"num_tokens": 21594.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 7 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 786.0, |
|
"completions/max_terminated_length": 786.0, |
|
"completions/mean_length": 772.5, |
|
"completions/mean_terminated_length": 772.5, |
|
"completions/min_length": 759.0, |
|
"completions/min_terminated_length": 759.0, |
|
"epoch": 0.00019843730621356816, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.3333333333333333e-07, |
|
"loss": 0.0, |
|
"num_tokens": 24151.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 1.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 8 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1160.0, |
|
"completions/mean_terminated_length": 1160.0, |
|
"completions/min_length": 1159.0, |
|
"completions/min_terminated_length": 1159.0, |
|
"epoch": 0.00022324196949026416, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.6666666666666667e-07, |
|
"loss": 0.0, |
|
"num_tokens": 27465.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 9 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1113.0, |
|
"completions/max_terminated_length": 1113.0, |
|
"completions/mean_length": 1082.5, |
|
"completions/mean_terminated_length": 1082.5, |
|
"completions/min_length": 1052.0, |
|
"completions/min_terminated_length": 1052.0, |
|
"epoch": 0.00024804663276696017, |
|
"grad_norm": 0.0, |
|
"learning_rate": 3e-07, |
|
"loss": 0.0, |
|
"num_tokens": 30556.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 10 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1161.0, |
|
"completions/mean_terminated_length": 1161.0, |
|
"completions/min_length": 1161.0, |
|
"completions/min_terminated_length": 1161.0, |
|
"epoch": 0.00027285129604365623, |
|
"grad_norm": 0.0, |
|
"learning_rate": 3.333333333333333e-07, |
|
"loss": 0.0, |
|
"num_tokens": 33788.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 11 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1154.0, |
|
"completions/max_terminated_length": 1154.0, |
|
"completions/mean_length": 1145.5, |
|
"completions/mean_terminated_length": 1145.5, |
|
"completions/min_length": 1137.0, |
|
"completions/min_terminated_length": 1137.0, |
|
"epoch": 0.00029765595932035224, |
|
"grad_norm": 0.0, |
|
"learning_rate": 3.666666666666666e-07, |
|
"loss": 0.0, |
|
"num_tokens": 37019.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 12 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1160.0, |
|
"completions/max_terminated_length": 1160.0, |
|
"completions/mean_length": 1159.5, |
|
"completions/mean_terminated_length": 1159.5, |
|
"completions/min_length": 1159.0, |
|
"completions/min_terminated_length": 1159.0, |
|
"epoch": 0.00032246062259704824, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4e-07, |
|
"loss": 0.0, |
|
"num_tokens": 40204.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 13 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 765.0, |
|
"completions/max_terminated_length": 765.0, |
|
"completions/mean_length": 732.5, |
|
"completions/mean_terminated_length": 732.5, |
|
"completions/min_length": 700.0, |
|
"completions/min_terminated_length": 700.0, |
|
"epoch": 0.00034726528587374425, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.3333333333333335e-07, |
|
"loss": 0.0, |
|
"num_tokens": 42471.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 1.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 14 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1160.5, |
|
"completions/mean_terminated_length": 1160.5, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.0003720699491504403, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.6666666666666666e-07, |
|
"loss": 0.0, |
|
"num_tokens": 45810.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 15 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 828.0, |
|
"completions/max_terminated_length": 828.0, |
|
"completions/mean_length": 745.0, |
|
"completions/mean_terminated_length": 745.0, |
|
"completions/min_length": 662.0, |
|
"completions/min_terminated_length": 662.0, |
|
"epoch": 0.0003968746124271363, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0, |
|
"num_tokens": 48120.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 1.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 16 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1160.0, |
|
"completions/max_terminated_length": 1160.0, |
|
"completions/mean_length": 1136.0, |
|
"completions/mean_terminated_length": 1136.0, |
|
"completions/min_length": 1112.0, |
|
"completions/min_terminated_length": 1112.0, |
|
"epoch": 0.0004216792757038323, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.333333333333333e-07, |
|
"loss": 0.0, |
|
"num_tokens": 51370.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 17 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1104.5, |
|
"completions/mean_terminated_length": 1104.5, |
|
"completions/min_length": 1048.0, |
|
"completions/min_terminated_length": 1048.0, |
|
"epoch": 0.00044648393898052833, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.666666666666666e-07, |
|
"loss": 0.0, |
|
"num_tokens": 54403.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 18 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1050.0, |
|
"completions/max_terminated_length": 1050.0, |
|
"completions/mean_length": 1049.5, |
|
"completions/mean_terminated_length": 1049.5, |
|
"completions/min_length": 1049.0, |
|
"completions/min_terminated_length": 1049.0, |
|
"epoch": 0.00047128860225722433, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6e-07, |
|
"loss": 0.0, |
|
"num_tokens": 57450.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 19 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1081.0, |
|
"completions/max_terminated_length": 1081.0, |
|
"completions/mean_length": 963.0, |
|
"completions/mean_terminated_length": 963.0, |
|
"completions/min_length": 845.0, |
|
"completions/min_terminated_length": 845.0, |
|
"epoch": 0.0004960932655339203, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.333333333333332e-07, |
|
"loss": 0.0, |
|
"num_tokens": 60254.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 1.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 20 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1117.0, |
|
"completions/max_terminated_length": 1117.0, |
|
"completions/mean_length": 937.0, |
|
"completions/mean_terminated_length": 937.0, |
|
"completions/min_length": 757.0, |
|
"completions/min_terminated_length": 757.0, |
|
"epoch": 0.0005208979288106164, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.666666666666666e-07, |
|
"loss": 0.0, |
|
"num_tokens": 62956.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 1.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 21 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1034.0, |
|
"completions/max_terminated_length": 1034.0, |
|
"completions/mean_length": 919.0, |
|
"completions/mean_terminated_length": 919.0, |
|
"completions/min_length": 804.0, |
|
"completions/min_terminated_length": 804.0, |
|
"epoch": 0.0005457025920873125, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7e-07, |
|
"loss": 0.0, |
|
"num_tokens": 65606.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 1.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 22 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 715.0, |
|
"completions/max_terminated_length": 715.0, |
|
"completions/mean_length": 645.0, |
|
"completions/mean_terminated_length": 645.0, |
|
"completions/min_length": 575.0, |
|
"completions/min_terminated_length": 575.0, |
|
"epoch": 0.0005705072553640084, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.333333333333332e-07, |
|
"loss": 0.0, |
|
"num_tokens": 67746.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 1.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 23 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1160.5, |
|
"completions/mean_terminated_length": 1160.5, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.0005953119186407045, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.666666666666667e-07, |
|
"loss": 0.0, |
|
"num_tokens": 70995.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 24 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1159.0, |
|
"completions/max_terminated_length": 1159.0, |
|
"completions/mean_length": 1147.0, |
|
"completions/mean_terminated_length": 1147.0, |
|
"completions/min_length": 1135.0, |
|
"completions/min_terminated_length": 1135.0, |
|
"epoch": 0.0006201165819174004, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8e-07, |
|
"loss": 0.0, |
|
"num_tokens": 74153.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 1.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 25 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 725.0, |
|
"completions/max_terminated_length": 725.0, |
|
"completions/mean_length": 674.0, |
|
"completions/mean_terminated_length": 674.0, |
|
"completions/min_length": 623.0, |
|
"completions/min_terminated_length": 623.0, |
|
"epoch": 0.0006449212451940965, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.333333333333333e-07, |
|
"loss": 0.0, |
|
"num_tokens": 76333.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 1.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 26 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1160.0, |
|
"completions/max_terminated_length": 1160.0, |
|
"completions/mean_length": 1098.5, |
|
"completions/mean_terminated_length": 1098.5, |
|
"completions/min_length": 1037.0, |
|
"completions/min_terminated_length": 1037.0, |
|
"epoch": 0.0006697259084707925, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.666666666666667e-07, |
|
"loss": 0.0, |
|
"num_tokens": 79358.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 27 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1160.0, |
|
"completions/max_terminated_length": 1160.0, |
|
"completions/mean_length": 1111.0, |
|
"completions/mean_terminated_length": 1111.0, |
|
"completions/min_length": 1062.0, |
|
"completions/min_terminated_length": 1062.0, |
|
"epoch": 0.0006945305717474885, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9e-07, |
|
"loss": 0.0, |
|
"num_tokens": 82644.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 28 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1159.0, |
|
"completions/max_terminated_length": 1159.0, |
|
"completions/mean_length": 1118.5, |
|
"completions/mean_terminated_length": 1118.5, |
|
"completions/min_length": 1078.0, |
|
"completions/min_terminated_length": 1078.0, |
|
"epoch": 0.0007193352350241846, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.333333333333333e-07, |
|
"loss": 0.0, |
|
"num_tokens": 85839.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 29 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1160.0, |
|
"completions/max_terminated_length": 1160.0, |
|
"completions/mean_length": 1160.0, |
|
"completions/mean_terminated_length": 1160.0, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.0007441398983008806, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.666666666666666e-07, |
|
"loss": 0.0, |
|
"num_tokens": 89189.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 30 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1048.0, |
|
"completions/max_terminated_length": 1048.0, |
|
"completions/mean_length": 1047.5, |
|
"completions/mean_terminated_length": 1047.5, |
|
"completions/min_length": 1047.0, |
|
"completions/min_terminated_length": 1047.0, |
|
"epoch": 0.0007689445615775766, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0, |
|
"num_tokens": 92180.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 31 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1160.5, |
|
"completions/mean_terminated_length": 1160.5, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.0007937492248542726, |
|
"grad_norm": 4.284660816192627, |
|
"learning_rate": 9.99966154001881e-07, |
|
"loss": 0.0002, |
|
"num_tokens": 95421.0, |
|
"reward": 0.5, |
|
"reward_std": 0.7071067690849304, |
|
"rewards/accuracy_reward/mean": 0.5, |
|
"rewards/accuracy_reward/std": 0.7071067690849304, |
|
"step": 32 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1160.0, |
|
"completions/max_terminated_length": 1160.0, |
|
"completions/mean_length": 1160.0, |
|
"completions/mean_terminated_length": 1160.0, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.0008185538881309686, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.998646205897307e-07, |
|
"loss": 0.0, |
|
"num_tokens": 98827.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 33 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1160.0, |
|
"completions/max_terminated_length": 1160.0, |
|
"completions/mean_length": 1159.5, |
|
"completions/mean_terminated_length": 1159.5, |
|
"completions/min_length": 1159.0, |
|
"completions/min_terminated_length": 1159.0, |
|
"epoch": 0.0008433585514076646, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.996954135095478e-07, |
|
"loss": 0.0, |
|
"num_tokens": 102050.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 34 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1113.5, |
|
"completions/mean_terminated_length": 1113.5, |
|
"completions/min_length": 1066.0, |
|
"completions/min_terminated_length": 1066.0, |
|
"epoch": 0.0008681632146843607, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.994585556692624e-07, |
|
"loss": 0.0, |
|
"num_tokens": 105141.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 35 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 965.0, |
|
"completions/mean_terminated_length": 965.0, |
|
"completions/min_length": 769.0, |
|
"completions/min_terminated_length": 769.0, |
|
"epoch": 0.0008929678779610567, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.991540791356342e-07, |
|
"loss": 0.0, |
|
"num_tokens": 108287.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 1.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 36 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1046.0, |
|
"completions/max_terminated_length": 1046.0, |
|
"completions/mean_length": 1046.0, |
|
"completions/mean_terminated_length": 1046.0, |
|
"completions/min_length": 1046.0, |
|
"completions/min_terminated_length": 1046.0, |
|
"epoch": 0.0009177725412377527, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.98782025129912e-07, |
|
"loss": 0.0, |
|
"num_tokens": 111249.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 1.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 37 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1042.0, |
|
"completions/max_terminated_length": 1042.0, |
|
"completions/mean_length": 1041.5, |
|
"completions/mean_terminated_length": 1041.5, |
|
"completions/min_length": 1041.0, |
|
"completions/min_terminated_length": 1041.0, |
|
"epoch": 0.0009425772045144487, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.983424440222529e-07, |
|
"loss": 0.0, |
|
"num_tokens": 114208.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 1.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 38 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1160.5, |
|
"completions/mean_terminated_length": 1160.5, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.0009673818677911447, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.978353953249021e-07, |
|
"loss": 0.0, |
|
"num_tokens": 117375.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 1.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 39 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1161.0, |
|
"completions/mean_terminated_length": 1161.0, |
|
"completions/min_length": 1161.0, |
|
"completions/min_terminated_length": 1161.0, |
|
"epoch": 0.0009921865310678407, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.972609476841365e-07, |
|
"loss": 0.0, |
|
"num_tokens": 120777.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 40 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1160.0, |
|
"completions/max_terminated_length": 1160.0, |
|
"completions/mean_length": 1159.5, |
|
"completions/mean_terminated_length": 1159.5, |
|
"completions/min_length": 1159.0, |
|
"completions/min_terminated_length": 1159.0, |
|
"epoch": 0.0010169911943445368, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.966191788709714e-07, |
|
"loss": 0.0, |
|
"num_tokens": 124034.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 41 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1160.0, |
|
"completions/max_terminated_length": 1160.0, |
|
"completions/mean_length": 1159.5, |
|
"completions/mean_terminated_length": 1159.5, |
|
"completions/min_length": 1159.0, |
|
"completions/min_terminated_length": 1159.0, |
|
"epoch": 0.0010417958576212328, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.959101757706308e-07, |
|
"loss": 0.0, |
|
"num_tokens": 127235.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 42 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1050.0, |
|
"completions/max_terminated_length": 1050.0, |
|
"completions/mean_length": 1050.0, |
|
"completions/mean_terminated_length": 1050.0, |
|
"completions/min_length": 1050.0, |
|
"completions/min_terminated_length": 1050.0, |
|
"epoch": 0.0010666005208979288, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.95134034370785e-07, |
|
"loss": 0.0, |
|
"num_tokens": 130287.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 43 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1160.0, |
|
"completions/max_terminated_length": 1160.0, |
|
"completions/mean_length": 1160.0, |
|
"completions/mean_terminated_length": 1160.0, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.001091405184174625, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.942908597485558e-07, |
|
"loss": 0.0, |
|
"num_tokens": 133547.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 44 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1103.5, |
|
"completions/mean_terminated_length": 1103.5, |
|
"completions/min_length": 1046.0, |
|
"completions/min_terminated_length": 1046.0, |
|
"epoch": 0.0011162098474513209, |
|
"grad_norm": 4.441093921661377, |
|
"learning_rate": 9.933807660562896e-07, |
|
"loss": 0.0261, |
|
"num_tokens": 136646.0, |
|
"reward": 0.5, |
|
"reward_std": 0.7071067690849304, |
|
"rewards/accuracy_reward/mean": 0.5, |
|
"rewards/accuracy_reward/std": 0.7071067690849304, |
|
"step": 45 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1160.5, |
|
"completions/mean_terminated_length": 1160.5, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.0011410145107280168, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.92403876506104e-07, |
|
"loss": 0.0, |
|
"num_tokens": 139961.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 46 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1159.0, |
|
"completions/max_terminated_length": 1159.0, |
|
"completions/mean_length": 1159.0, |
|
"completions/mean_terminated_length": 1159.0, |
|
"completions/min_length": 1159.0, |
|
"completions/min_terminated_length": 1159.0, |
|
"epoch": 0.0011658191740047128, |
|
"grad_norm": 5.3396992683410645, |
|
"learning_rate": 9.913603233532067e-07, |
|
"loss": 0.0, |
|
"num_tokens": 143307.0, |
|
"reward": 0.5, |
|
"reward_std": 0.7071067690849304, |
|
"rewards/accuracy_reward/mean": 0.5, |
|
"rewards/accuracy_reward/std": 0.7071067690849304, |
|
"step": 47 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1160.0, |
|
"completions/max_terminated_length": 1160.0, |
|
"completions/mean_length": 1116.5, |
|
"completions/mean_terminated_length": 1116.5, |
|
"completions/min_length": 1073.0, |
|
"completions/min_terminated_length": 1073.0, |
|
"epoch": 0.001190623837281409, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.902502478779896e-07, |
|
"loss": 0.0, |
|
"num_tokens": 146460.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 48 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1057.0, |
|
"completions/max_terminated_length": 1057.0, |
|
"completions/mean_length": 1057.0, |
|
"completions/mean_terminated_length": 1057.0, |
|
"completions/min_length": 1057.0, |
|
"completions/min_terminated_length": 1057.0, |
|
"epoch": 0.001215428500558105, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.890738003669027e-07, |
|
"loss": 0.0, |
|
"num_tokens": 149482.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 49 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1160.5, |
|
"completions/mean_terminated_length": 1160.5, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.0012402331638348009, |
|
"grad_norm": 4.062455177307129, |
|
"learning_rate": 9.878311400921072e-07, |
|
"loss": 0.0002, |
|
"num_tokens": 152685.0, |
|
"reward": 0.5, |
|
"reward_std": 0.7071067690849304, |
|
"rewards/accuracy_reward/mean": 0.5, |
|
"rewards/accuracy_reward/std": 0.7071067690849304, |
|
"step": 50 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1160.0, |
|
"completions/max_terminated_length": 1160.0, |
|
"completions/mean_length": 1160.0, |
|
"completions/mean_terminated_length": 1160.0, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.001265037827111497, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.865224352899118e-07, |
|
"loss": 0.0, |
|
"num_tokens": 156049.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 51 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1160.0, |
|
"completions/max_terminated_length": 1160.0, |
|
"completions/mean_length": 1160.0, |
|
"completions/mean_terminated_length": 1160.0, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.001289842490388193, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.851478631379982e-07, |
|
"loss": 0.0, |
|
"num_tokens": 159381.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 52 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1160.5, |
|
"completions/mean_terminated_length": 1160.5, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.001314647153664889, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.837076097314318e-07, |
|
"loss": 0.0, |
|
"num_tokens": 162618.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 53 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1161.0, |
|
"completions/mean_terminated_length": 1161.0, |
|
"completions/min_length": 1161.0, |
|
"completions/min_terminated_length": 1161.0, |
|
"epoch": 0.001339451816941585, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.822018700574694e-07, |
|
"loss": 0.0, |
|
"num_tokens": 165894.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 54 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1113.0, |
|
"completions/mean_terminated_length": 1113.0, |
|
"completions/min_length": 1065.0, |
|
"completions/min_terminated_length": 1065.0, |
|
"epoch": 0.001364256480218281, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.806308479691594e-07, |
|
"loss": 0.0, |
|
"num_tokens": 169096.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 55 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1160.5, |
|
"completions/mean_terminated_length": 1160.5, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.001389061143494977, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.789947561577443e-07, |
|
"loss": 0.0, |
|
"num_tokens": 172307.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 56 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1160.5, |
|
"completions/mean_terminated_length": 1160.5, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.0014138658067716732, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.77293816123866e-07, |
|
"loss": 0.0, |
|
"num_tokens": 175836.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 57 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1160.5, |
|
"completions/mean_terminated_length": 1160.5, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.0014386704700483691, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.755282581475767e-07, |
|
"loss": 0.0, |
|
"num_tokens": 179287.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 58 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1160.5, |
|
"completions/mean_terminated_length": 1160.5, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.001463475133325065, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.736983212571645e-07, |
|
"loss": 0.0, |
|
"num_tokens": 182440.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 59 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1160.0, |
|
"completions/max_terminated_length": 1160.0, |
|
"completions/mean_length": 1160.0, |
|
"completions/mean_terminated_length": 1160.0, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.0014882797966017612, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.718042531967916e-07, |
|
"loss": 0.0, |
|
"num_tokens": 185618.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 60 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1160.5, |
|
"completions/mean_terminated_length": 1160.5, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.0015130844598784572, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.698463103929541e-07, |
|
"loss": 0.0, |
|
"num_tokens": 188861.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 61 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1064.0, |
|
"completions/max_terminated_length": 1064.0, |
|
"completions/mean_length": 1062.5, |
|
"completions/mean_terminated_length": 1062.5, |
|
"completions/min_length": 1061.0, |
|
"completions/min_terminated_length": 1061.0, |
|
"epoch": 0.0015378891231551531, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.678247579197658e-07, |
|
"loss": 0.0, |
|
"num_tokens": 191992.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 62 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1159.0, |
|
"completions/max_terminated_length": 1159.0, |
|
"completions/mean_length": 1159.0, |
|
"completions/mean_terminated_length": 1159.0, |
|
"completions/min_length": 1159.0, |
|
"completions/min_terminated_length": 1159.0, |
|
"epoch": 0.001562693786431849, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.657398694630712e-07, |
|
"loss": 0.0, |
|
"num_tokens": 195192.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 63 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1160.5, |
|
"completions/mean_terminated_length": 1160.5, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.0015874984497085453, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.635919272833937e-07, |
|
"loss": 0.0, |
|
"num_tokens": 198417.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 64 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1152.0, |
|
"completions/mean_terminated_length": 1152.0, |
|
"completions/min_length": 1143.0, |
|
"completions/min_terminated_length": 1143.0, |
|
"epoch": 0.0016123031129852412, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.613812221777212e-07, |
|
"loss": 0.0, |
|
"num_tokens": 201671.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 65 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1132.0, |
|
"completions/max_terminated_length": 1132.0, |
|
"completions/mean_length": 1113.5, |
|
"completions/mean_terminated_length": 1113.5, |
|
"completions/min_length": 1095.0, |
|
"completions/min_terminated_length": 1095.0, |
|
"epoch": 0.0016371077762619372, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.591080534401371e-07, |
|
"loss": 0.0, |
|
"num_tokens": 204726.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 66 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1160.0, |
|
"completions/mean_terminated_length": 1160.0, |
|
"completions/min_length": 1159.0, |
|
"completions/min_terminated_length": 1159.0, |
|
"epoch": 0.0016619124395386333, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.567727288213004e-07, |
|
"loss": 0.0, |
|
"num_tokens": 208104.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 67 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1160.0, |
|
"completions/max_terminated_length": 1160.0, |
|
"completions/mean_length": 1117.0, |
|
"completions/mean_terminated_length": 1117.0, |
|
"completions/min_length": 1074.0, |
|
"completions/min_terminated_length": 1074.0, |
|
"epoch": 0.0016867171028153293, |
|
"grad_norm": 5.227029323577881, |
|
"learning_rate": 9.543755644867822e-07, |
|
"loss": 0.0192, |
|
"num_tokens": 211292.0, |
|
"reward": 0.5, |
|
"reward_std": 0.7071067690849304, |
|
"rewards/accuracy_reward/mean": 0.5, |
|
"rewards/accuracy_reward/std": 0.7071067690849304, |
|
"step": 68 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1161.0, |
|
"completions/mean_terminated_length": 1161.0, |
|
"completions/min_length": 1161.0, |
|
"completions/min_terminated_length": 1161.0, |
|
"epoch": 0.0017115217660920252, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.519168849742602e-07, |
|
"loss": 0.0, |
|
"num_tokens": 214614.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 69 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1065.0, |
|
"completions/max_terminated_length": 1065.0, |
|
"completions/mean_length": 1065.0, |
|
"completions/mean_terminated_length": 1065.0, |
|
"completions/min_length": 1065.0, |
|
"completions/min_terminated_length": 1065.0, |
|
"epoch": 0.0017363264293687214, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.493970231495834e-07, |
|
"loss": 0.0, |
|
"num_tokens": 217596.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 1.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 70 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1160.5, |
|
"completions/mean_terminated_length": 1160.5, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.0017611310926454174, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.468163201617061e-07, |
|
"loss": 0.0, |
|
"num_tokens": 220835.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 71 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1160.5, |
|
"completions/mean_terminated_length": 1160.5, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.0017859357559221133, |
|
"grad_norm": 5.141083240509033, |
|
"learning_rate": 9.44175125396502e-07, |
|
"loss": 0.0002, |
|
"num_tokens": 224422.0, |
|
"reward": 0.5, |
|
"reward_std": 0.7071067690849304, |
|
"rewards/accuracy_reward/mean": 0.5, |
|
"rewards/accuracy_reward/std": 0.7071067690849304, |
|
"step": 72 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1160.0, |
|
"completions/max_terminated_length": 1160.0, |
|
"completions/mean_length": 1098.0, |
|
"completions/mean_terminated_length": 1098.0, |
|
"completions/min_length": 1036.0, |
|
"completions/min_terminated_length": 1036.0, |
|
"epoch": 0.0018107404191988095, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.414737964294634e-07, |
|
"loss": 0.0, |
|
"num_tokens": 227472.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 73 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1160.0, |
|
"completions/mean_terminated_length": 1160.0, |
|
"completions/min_length": 1159.0, |
|
"completions/min_terminated_length": 1159.0, |
|
"epoch": 0.0018355450824755054, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.387126989772909e-07, |
|
"loss": 0.0, |
|
"num_tokens": 230768.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 74 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1160.0, |
|
"completions/mean_terminated_length": 1160.0, |
|
"completions/min_length": 1159.0, |
|
"completions/min_terminated_length": 1159.0, |
|
"epoch": 0.0018603497457522014, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.358922068483811e-07, |
|
"loss": 0.0, |
|
"num_tokens": 234020.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 1.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 75 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1160.5, |
|
"completions/mean_terminated_length": 1160.5, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.0018851544090288973, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.330127018922193e-07, |
|
"loss": 0.0, |
|
"num_tokens": 237325.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 76 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1160.0, |
|
"completions/max_terminated_length": 1160.0, |
|
"completions/mean_length": 1160.0, |
|
"completions/mean_terminated_length": 1160.0, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.0019099590723055935, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.300745739476828e-07, |
|
"loss": 0.0, |
|
"num_tokens": 240813.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 77 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1083.0, |
|
"completions/max_terminated_length": 1083.0, |
|
"completions/mean_length": 1065.5, |
|
"completions/mean_terminated_length": 1065.5, |
|
"completions/min_length": 1048.0, |
|
"completions/min_terminated_length": 1048.0, |
|
"epoch": 0.0019347637355822895, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.270782207902627e-07, |
|
"loss": 0.0, |
|
"num_tokens": 243864.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 1.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 78 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1111.5, |
|
"completions/mean_terminated_length": 1111.5, |
|
"completions/min_length": 1062.0, |
|
"completions/min_terminated_length": 1062.0, |
|
"epoch": 0.0019595683988589854, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.240240480782129e-07, |
|
"loss": 0.0, |
|
"num_tokens": 246915.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 79 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1160.5, |
|
"completions/mean_terminated_length": 1160.5, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.0019843730621356814, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.209124692976287e-07, |
|
"loss": 0.0, |
|
"num_tokens": 250094.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 80 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1160.0, |
|
"completions/max_terminated_length": 1160.0, |
|
"completions/mean_length": 1160.0, |
|
"completions/mean_terminated_length": 1160.0, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.0020091777254123773, |
|
"grad_norm": 3.792428493499756, |
|
"learning_rate": 9.177439057064682e-07, |
|
"loss": 0.0, |
|
"num_tokens": 253312.0, |
|
"reward": 0.5, |
|
"reward_std": 0.7071067690849304, |
|
"rewards/accuracy_reward/mean": 0.5, |
|
"rewards/accuracy_reward/std": 0.7071067690849304, |
|
"step": 81 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 652.0, |
|
"completions/max_terminated_length": 652.0, |
|
"completions/mean_length": 652.0, |
|
"completions/mean_terminated_length": 652.0, |
|
"completions/min_length": 652.0, |
|
"completions/min_terminated_length": 652.0, |
|
"epoch": 0.0020339823886890737, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.145187862775208e-07, |
|
"loss": 0.0, |
|
"num_tokens": 255432.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 1.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 82 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1160.0, |
|
"completions/max_terminated_length": 1160.0, |
|
"completions/mean_length": 1159.5, |
|
"completions/mean_terminated_length": 1159.5, |
|
"completions/min_length": 1159.0, |
|
"completions/min_terminated_length": 1159.0, |
|
"epoch": 0.0020587870519657697, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.112375476403311e-07, |
|
"loss": 0.0, |
|
"num_tokens": 258637.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 83 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1160.0, |
|
"completions/mean_terminated_length": 1160.0, |
|
"completions/min_length": 1159.0, |
|
"completions/min_terminated_length": 1159.0, |
|
"epoch": 0.0020835917152424656, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.079006340220861e-07, |
|
"loss": 0.0, |
|
"num_tokens": 261909.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 1.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 84 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1160.0, |
|
"completions/max_terminated_length": 1160.0, |
|
"completions/mean_length": 1104.0, |
|
"completions/mean_terminated_length": 1104.0, |
|
"completions/min_length": 1048.0, |
|
"completions/min_terminated_length": 1048.0, |
|
"epoch": 0.0021083963785191616, |
|
"grad_norm": 4.88740348815918, |
|
"learning_rate": 9.045084971874737e-07, |
|
"loss": -0.0254, |
|
"num_tokens": 264967.0, |
|
"reward": 0.5, |
|
"reward_std": 0.7071067690849304, |
|
"rewards/accuracy_reward/mean": 0.5, |
|
"rewards/accuracy_reward/std": 0.7071067690849304, |
|
"step": 85 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1160.0, |
|
"completions/mean_terminated_length": 1160.0, |
|
"completions/min_length": 1159.0, |
|
"completions/min_terminated_length": 1159.0, |
|
"epoch": 0.0021332010417958575, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.010615963775219e-07, |
|
"loss": 0.0, |
|
"num_tokens": 268191.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 1.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 86 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1160.5, |
|
"completions/mean_terminated_length": 1160.5, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.0021580057050725535, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.975603982474238e-07, |
|
"loss": 0.0, |
|
"num_tokens": 271528.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 87 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1160.0, |
|
"completions/max_terminated_length": 1160.0, |
|
"completions/mean_length": 1101.5, |
|
"completions/mean_terminated_length": 1101.5, |
|
"completions/min_length": 1043.0, |
|
"completions/min_terminated_length": 1043.0, |
|
"epoch": 0.00218281036834925, |
|
"grad_norm": 3.760263204574585, |
|
"learning_rate": 8.940053768033608e-07, |
|
"loss": 0.0266, |
|
"num_tokens": 274553.0, |
|
"reward": 0.5, |
|
"reward_std": 0.7071067690849304, |
|
"rewards/accuracy_reward/mean": 0.5, |
|
"rewards/accuracy_reward/std": 0.7071067690849304, |
|
"step": 88 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1160.0, |
|
"completions/mean_terminated_length": 1160.0, |
|
"completions/min_length": 1159.0, |
|
"completions/min_terminated_length": 1159.0, |
|
"epoch": 0.002207615031625946, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.903970133383296e-07, |
|
"loss": 0.0, |
|
"num_tokens": 277835.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 89 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1041.0, |
|
"completions/max_terminated_length": 1041.0, |
|
"completions/mean_length": 1039.0, |
|
"completions/mean_terminated_length": 1039.0, |
|
"completions/min_length": 1037.0, |
|
"completions/min_terminated_length": 1037.0, |
|
"epoch": 0.0022324196949026417, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.86735796366982e-07, |
|
"loss": 0.0, |
|
"num_tokens": 280845.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 90 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1160.0, |
|
"completions/max_terminated_length": 1160.0, |
|
"completions/mean_length": 1059.5, |
|
"completions/mean_terminated_length": 1059.5, |
|
"completions/min_length": 959.0, |
|
"completions/min_terminated_length": 959.0, |
|
"epoch": 0.0022572243581793377, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.83022221559489e-07, |
|
"loss": 0.0, |
|
"num_tokens": 283852.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 1.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 91 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1160.0, |
|
"completions/max_terminated_length": 1160.0, |
|
"completions/mean_length": 1160.0, |
|
"completions/mean_terminated_length": 1160.0, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.0022820290214560337, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.792567916744345e-07, |
|
"loss": 0.0, |
|
"num_tokens": 287084.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 92 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1160.5, |
|
"completions/mean_terminated_length": 1160.5, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.0023068336847327296, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.754400164907496e-07, |
|
"loss": 0.0, |
|
"num_tokens": 290353.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 93 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1160.0, |
|
"completions/max_terminated_length": 1160.0, |
|
"completions/mean_length": 1160.0, |
|
"completions/mean_terminated_length": 1160.0, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.0023316383480094256, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.71572412738697e-07, |
|
"loss": 0.0, |
|
"num_tokens": 293569.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 94 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1160.5, |
|
"completions/mean_terminated_length": 1160.5, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.002356443011286122, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.676545040299143e-07, |
|
"loss": 0.0, |
|
"num_tokens": 296842.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 95 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1103.5, |
|
"completions/mean_terminated_length": 1103.5, |
|
"completions/min_length": 1046.0, |
|
"completions/min_terminated_length": 1046.0, |
|
"epoch": 0.002381247674562818, |
|
"grad_norm": 5.504190444946289, |
|
"learning_rate": 8.636868207865243e-07, |
|
"loss": -0.0261, |
|
"num_tokens": 299897.0, |
|
"reward": 0.5, |
|
"reward_std": 0.7071067690849304, |
|
"rewards/accuracy_reward/mean": 0.5, |
|
"rewards/accuracy_reward/std": 0.7071067690849304, |
|
"step": 96 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1160.0, |
|
"completions/max_terminated_length": 1160.0, |
|
"completions/mean_length": 1120.5, |
|
"completions/mean_terminated_length": 1120.5, |
|
"completions/min_length": 1081.0, |
|
"completions/min_terminated_length": 1081.0, |
|
"epoch": 0.002406052337839514, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.596699001693255e-07, |
|
"loss": 0.0, |
|
"num_tokens": 303098.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 97 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1160.0, |
|
"completions/max_terminated_length": 1160.0, |
|
"completions/mean_length": 1159.5, |
|
"completions/mean_terminated_length": 1159.5, |
|
"completions/min_length": 1159.0, |
|
"completions/min_terminated_length": 1159.0, |
|
"epoch": 0.00243085700111621, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.556042860050685e-07, |
|
"loss": 0.0, |
|
"num_tokens": 306485.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 98 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 701.0, |
|
"completions/max_terminated_length": 701.0, |
|
"completions/mean_length": 682.0, |
|
"completions/mean_terminated_length": 682.0, |
|
"completions/min_length": 663.0, |
|
"completions/min_terminated_length": 663.0, |
|
"epoch": 0.0024556616643929058, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.514905287128309e-07, |
|
"loss": 0.0, |
|
"num_tokens": 308721.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 99 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1160.5, |
|
"completions/mean_terminated_length": 1160.5, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.0024804663276696017, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.473291852294986e-07, |
|
"loss": 0.0, |
|
"num_tokens": 312088.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1161.0, |
|
"completions/mean_terminated_length": 1161.0, |
|
"completions/min_length": 1161.0, |
|
"completions/min_terminated_length": 1161.0, |
|
"epoch": 0.002505270990946298, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.431208189343669e-07, |
|
"loss": 0.0, |
|
"num_tokens": 315250.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 101 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1160.5, |
|
"completions/mean_terminated_length": 1160.5, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.002530075654222994, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.388659995728662e-07, |
|
"loss": 0.0, |
|
"num_tokens": 318417.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 102 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1160.0, |
|
"completions/max_terminated_length": 1160.0, |
|
"completions/mean_length": 1160.0, |
|
"completions/mean_terminated_length": 1160.0, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.00255488031749969, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.34565303179429e-07, |
|
"loss": 0.0, |
|
"num_tokens": 321599.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 103 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1161.0, |
|
"completions/mean_terminated_length": 1161.0, |
|
"completions/min_length": 1161.0, |
|
"completions/min_terminated_length": 1161.0, |
|
"epoch": 0.002579684980776386, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.302193119995038e-07, |
|
"loss": 0.0, |
|
"num_tokens": 324831.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 104 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1160.0, |
|
"completions/max_terminated_length": 1160.0, |
|
"completions/mean_length": 1160.0, |
|
"completions/mean_terminated_length": 1160.0, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.002604489644053082, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.258286144107276e-07, |
|
"loss": 0.0, |
|
"num_tokens": 328037.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 105 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1105.5, |
|
"completions/mean_terminated_length": 1105.5, |
|
"completions/min_length": 1050.0, |
|
"completions/min_terminated_length": 1050.0, |
|
"epoch": 0.002629294307329778, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.213938048432696e-07, |
|
"loss": 0.0, |
|
"num_tokens": 331154.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 106 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 786.0, |
|
"completions/max_terminated_length": 786.0, |
|
"completions/mean_length": 660.5, |
|
"completions/mean_terminated_length": 660.5, |
|
"completions/min_length": 535.0, |
|
"completions/min_terminated_length": 535.0, |
|
"epoch": 0.0026540989706064742, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.16915483699355e-07, |
|
"loss": 0.0, |
|
"num_tokens": 333371.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 1.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 107 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1161.0, |
|
"completions/mean_terminated_length": 1161.0, |
|
"completions/min_length": 1161.0, |
|
"completions/min_terminated_length": 1161.0, |
|
"epoch": 0.00267890363388317, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.123942572719799e-07, |
|
"loss": 0.0, |
|
"num_tokens": 336527.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 108 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1160.0, |
|
"completions/max_terminated_length": 1160.0, |
|
"completions/mean_length": 1160.0, |
|
"completions/mean_terminated_length": 1160.0, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.002703708297159866, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.07830737662829e-07, |
|
"loss": 0.0, |
|
"num_tokens": 339687.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 109 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1160.0, |
|
"completions/max_terminated_length": 1160.0, |
|
"completions/mean_length": 1160.0, |
|
"completions/mean_terminated_length": 1160.0, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.002728512960436562, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.032255426994068e-07, |
|
"loss": 0.0, |
|
"num_tokens": 342873.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 110 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1058.0, |
|
"completions/max_terminated_length": 1058.0, |
|
"completions/mean_length": 1054.5, |
|
"completions/mean_terminated_length": 1054.5, |
|
"completions/min_length": 1051.0, |
|
"completions/min_terminated_length": 1051.0, |
|
"epoch": 0.002753317623713258, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.985792958513931e-07, |
|
"loss": 0.0, |
|
"num_tokens": 345860.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 111 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1160.5, |
|
"completions/mean_terminated_length": 1160.5, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.002778122286989954, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.938926261462365e-07, |
|
"loss": 0.0, |
|
"num_tokens": 349275.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 112 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1160.0, |
|
"completions/mean_terminated_length": 1160.0, |
|
"completions/min_length": 1159.0, |
|
"completions/min_terminated_length": 1159.0, |
|
"epoch": 0.00280292695026665, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.891661680839932e-07, |
|
"loss": 0.0, |
|
"num_tokens": 352521.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 113 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1161.0, |
|
"completions/mean_terminated_length": 1161.0, |
|
"completions/min_length": 1161.0, |
|
"completions/min_terminated_length": 1161.0, |
|
"epoch": 0.0028277316135433463, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.844005615514258e-07, |
|
"loss": 0.0, |
|
"num_tokens": 355809.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 114 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1055.0, |
|
"completions/max_terminated_length": 1055.0, |
|
"completions/mean_length": 1055.0, |
|
"completions/mean_terminated_length": 1055.0, |
|
"completions/min_length": 1055.0, |
|
"completions/min_terminated_length": 1055.0, |
|
"epoch": 0.0028525362768200423, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.795964517353733e-07, |
|
"loss": 0.0, |
|
"num_tokens": 358777.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 1.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 115 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1098.5, |
|
"completions/mean_terminated_length": 1098.5, |
|
"completions/min_length": 1036.0, |
|
"completions/min_terminated_length": 1036.0, |
|
"epoch": 0.0028773409400967382, |
|
"grad_norm": 4.180582046508789, |
|
"learning_rate": 7.74754489035403e-07, |
|
"loss": 0.0284, |
|
"num_tokens": 361834.0, |
|
"reward": 0.5, |
|
"reward_std": 0.7071067690849304, |
|
"rewards/accuracy_reward/mean": 0.5, |
|
"rewards/accuracy_reward/std": 0.7071067690849304, |
|
"step": 116 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1160.0, |
|
"completions/max_terminated_length": 1160.0, |
|
"completions/mean_length": 1124.5, |
|
"completions/mean_terminated_length": 1124.5, |
|
"completions/min_length": 1089.0, |
|
"completions/min_terminated_length": 1089.0, |
|
"epoch": 0.002902145603373434, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.698753289757564e-07, |
|
"loss": 0.0, |
|
"num_tokens": 365245.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 1.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 117 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1159.0, |
|
"completions/max_terminated_length": 1159.0, |
|
"completions/mean_length": 1159.0, |
|
"completions/mean_terminated_length": 1159.0, |
|
"completions/min_length": 1159.0, |
|
"completions/min_terminated_length": 1159.0, |
|
"epoch": 0.00292695026665013, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.649596321166024e-07, |
|
"loss": 0.0, |
|
"num_tokens": 368539.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 118 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1160.0, |
|
"completions/mean_terminated_length": 1160.0, |
|
"completions/min_length": 1159.0, |
|
"completions/min_terminated_length": 1159.0, |
|
"epoch": 0.002951754929926826, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.600080639646076e-07, |
|
"loss": 0.0, |
|
"num_tokens": 371833.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 119 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 830.0, |
|
"completions/max_terminated_length": 830.0, |
|
"completions/mean_length": 812.5, |
|
"completions/mean_terminated_length": 812.5, |
|
"completions/min_length": 795.0, |
|
"completions/min_terminated_length": 795.0, |
|
"epoch": 0.0029765595932035225, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.550212948828376e-07, |
|
"loss": 0.0, |
|
"num_tokens": 374264.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 1.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 120 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1160.5, |
|
"completions/mean_terminated_length": 1160.5, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.0030013642564802184, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.5e-07, |
|
"loss": 0.0, |
|
"num_tokens": 377523.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 121 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1160.0, |
|
"completions/max_terminated_length": 1160.0, |
|
"completions/mean_length": 1159.5, |
|
"completions/mean_terminated_length": 1159.5, |
|
"completions/min_length": 1159.0, |
|
"completions/min_terminated_length": 1159.0, |
|
"epoch": 0.0030261689197569144, |
|
"grad_norm": 4.116443634033203, |
|
"learning_rate": 7.449448591190434e-07, |
|
"loss": -0.0002, |
|
"num_tokens": 381088.0, |
|
"reward": 0.5, |
|
"reward_std": 0.7071067690849304, |
|
"rewards/accuracy_reward/mean": 0.5, |
|
"rewards/accuracy_reward/std": 0.7071067690849304, |
|
"step": 122 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 823.0, |
|
"completions/max_terminated_length": 823.0, |
|
"completions/mean_length": 729.5, |
|
"completions/mean_terminated_length": 729.5, |
|
"completions/min_length": 636.0, |
|
"completions/min_terminated_length": 636.0, |
|
"epoch": 0.0030509735830336103, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.398565566251232e-07, |
|
"loss": 0.0, |
|
"num_tokens": 383359.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 1.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 123 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1112.5, |
|
"completions/mean_terminated_length": 1112.5, |
|
"completions/min_length": 1064.0, |
|
"completions/min_terminated_length": 1064.0, |
|
"epoch": 0.0030757782463103063, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.347357813929454e-07, |
|
"loss": 0.0, |
|
"num_tokens": 386644.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 124 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 944.0, |
|
"completions/max_terminated_length": 944.0, |
|
"completions/mean_length": 840.0, |
|
"completions/mean_terminated_length": 840.0, |
|
"completions/min_length": 736.0, |
|
"completions/min_terminated_length": 736.0, |
|
"epoch": 0.0031005829095870022, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.295832266935058e-07, |
|
"loss": 0.0, |
|
"num_tokens": 389140.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 1.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 125 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1160.0, |
|
"completions/max_terminated_length": 1160.0, |
|
"completions/mean_length": 1160.0, |
|
"completions/mean_terminated_length": 1160.0, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.003125387572863698, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.243995901002311e-07, |
|
"loss": 0.0, |
|
"num_tokens": 392478.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 126 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 949.0, |
|
"completions/max_terminated_length": 949.0, |
|
"completions/mean_length": 896.0, |
|
"completions/mean_terminated_length": 896.0, |
|
"completions/min_length": 843.0, |
|
"completions/min_terminated_length": 843.0, |
|
"epoch": 0.0031501922361403946, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.191855733945386e-07, |
|
"loss": 0.0, |
|
"num_tokens": 395126.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 127 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1159.0, |
|
"completions/max_terminated_length": 1159.0, |
|
"completions/mean_length": 1156.5, |
|
"completions/mean_terminated_length": 1156.5, |
|
"completions/min_length": 1154.0, |
|
"completions/min_terminated_length": 1154.0, |
|
"epoch": 0.0031749968994170905, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.139418824708271e-07, |
|
"loss": 0.0, |
|
"num_tokens": 398385.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 128 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1160.0, |
|
"completions/mean_terminated_length": 1160.0, |
|
"completions/min_length": 1159.0, |
|
"completions/min_terminated_length": 1159.0, |
|
"epoch": 0.0031998015626937865, |
|
"grad_norm": 4.577914237976074, |
|
"learning_rate": 7.086692272409089e-07, |
|
"loss": 0.0004, |
|
"num_tokens": 401619.0, |
|
"reward": 0.5, |
|
"reward_std": 0.7071067690849304, |
|
"rewards/accuracy_reward/mean": 0.5, |
|
"rewards/accuracy_reward/std": 0.7071067690849304, |
|
"step": 129 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1160.0, |
|
"completions/max_terminated_length": 1160.0, |
|
"completions/mean_length": 1160.0, |
|
"completions/mean_terminated_length": 1160.0, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.0032246062259704824, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.033683215379002e-07, |
|
"loss": 0.0, |
|
"num_tokens": 404991.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 1.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 130 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 394.0, |
|
"completions/max_terminated_length": 394.0, |
|
"completions/mean_length": 389.0, |
|
"completions/mean_terminated_length": 389.0, |
|
"completions/min_length": 384.0, |
|
"completions/min_terminated_length": 384.0, |
|
"epoch": 0.0032494108892471784, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.980398830195784e-07, |
|
"loss": 0.0, |
|
"num_tokens": 406647.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 1.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 131 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1160.0, |
|
"completions/max_terminated_length": 1160.0, |
|
"completions/mean_length": 1159.5, |
|
"completions/mean_terminated_length": 1159.5, |
|
"completions/min_length": 1159.0, |
|
"completions/min_terminated_length": 1159.0, |
|
"epoch": 0.0032742155525238743, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.926846330712241e-07, |
|
"loss": 0.0, |
|
"num_tokens": 410248.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 1.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 132 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1160.0, |
|
"completions/max_terminated_length": 1160.0, |
|
"completions/mean_length": 1139.0, |
|
"completions/mean_terminated_length": 1139.0, |
|
"completions/min_length": 1118.0, |
|
"completions/min_terminated_length": 1118.0, |
|
"epoch": 0.0032990202158005707, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.87303296707956e-07, |
|
"loss": 0.0, |
|
"num_tokens": 413390.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 133 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1161.0, |
|
"completions/mean_terminated_length": 1161.0, |
|
"completions/min_length": 1161.0, |
|
"completions/min_terminated_length": 1161.0, |
|
"epoch": 0.0033238248790772667, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.818966024765757e-07, |
|
"loss": 0.0, |
|
"num_tokens": 416560.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 1.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 134 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1160.0, |
|
"completions/max_terminated_length": 1160.0, |
|
"completions/mean_length": 1160.0, |
|
"completions/mean_terminated_length": 1160.0, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.0033486295423539626, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.764652823569343e-07, |
|
"loss": 0.0, |
|
"num_tokens": 419844.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 135 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1160.0, |
|
"completions/mean_terminated_length": 1160.0, |
|
"completions/min_length": 1159.0, |
|
"completions/min_terminated_length": 1159.0, |
|
"epoch": 0.0033734342056306586, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.710100716628344e-07, |
|
"loss": 0.0, |
|
"num_tokens": 423018.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 136 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1160.5, |
|
"completions/mean_terminated_length": 1160.5, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.0033982388689073545, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.65531708942479e-07, |
|
"loss": 0.0, |
|
"num_tokens": 426257.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 137 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1160.0, |
|
"completions/max_terminated_length": 1160.0, |
|
"completions/mean_length": 1160.0, |
|
"completions/mean_terminated_length": 1160.0, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.0034230435321840505, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.600309358784857e-07, |
|
"loss": 0.0, |
|
"num_tokens": 429451.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 138 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1161.0, |
|
"completions/mean_terminated_length": 1161.0, |
|
"completions/min_length": 1161.0, |
|
"completions/min_terminated_length": 1161.0, |
|
"epoch": 0.0034478481954607464, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.545084971874736e-07, |
|
"loss": 0.0, |
|
"num_tokens": 432661.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 139 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1160.0, |
|
"completions/mean_terminated_length": 1160.0, |
|
"completions/min_length": 1159.0, |
|
"completions/min_terminated_length": 1159.0, |
|
"epoch": 0.003472652858737443, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.489651405192409e-07, |
|
"loss": 0.0, |
|
"num_tokens": 435989.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 140 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 712.0, |
|
"completions/max_terminated_length": 712.0, |
|
"completions/mean_length": 666.5, |
|
"completions/mean_terminated_length": 666.5, |
|
"completions/min_length": 621.0, |
|
"completions/min_terminated_length": 621.0, |
|
"epoch": 0.0034974575220141388, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.434016163555451e-07, |
|
"loss": 0.0, |
|
"num_tokens": 438226.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 1.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 141 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1160.0, |
|
"completions/max_terminated_length": 1160.0, |
|
"completions/mean_length": 1160.0, |
|
"completions/mean_terminated_length": 1160.0, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.0035222621852908347, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.378186779084995e-07, |
|
"loss": 0.0, |
|
"num_tokens": 441480.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 142 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1101.0, |
|
"completions/mean_terminated_length": 1101.0, |
|
"completions/min_length": 1041.0, |
|
"completions/min_terminated_length": 1041.0, |
|
"epoch": 0.0035470668485675307, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.322170810186011e-07, |
|
"loss": 0.0, |
|
"num_tokens": 444570.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 1.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 143 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1160.0, |
|
"completions/max_terminated_length": 1160.0, |
|
"completions/mean_length": 1159.5, |
|
"completions/mean_terminated_length": 1159.5, |
|
"completions/min_length": 1159.0, |
|
"completions/min_terminated_length": 1159.0, |
|
"epoch": 0.0035718715118442266, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.265975840524009e-07, |
|
"loss": 0.0, |
|
"num_tokens": 447825.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 144 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1161.0, |
|
"completions/mean_terminated_length": 1161.0, |
|
"completions/min_length": 1161.0, |
|
"completions/min_terminated_length": 1161.0, |
|
"epoch": 0.0035966761751209226, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.209609477998338e-07, |
|
"loss": 0.0, |
|
"num_tokens": 451023.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 1.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 145 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1136.5, |
|
"completions/mean_terminated_length": 1136.5, |
|
"completions/min_length": 1112.0, |
|
"completions/min_terminated_length": 1112.0, |
|
"epoch": 0.003621480838397619, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.153079353712201e-07, |
|
"loss": 0.0, |
|
"num_tokens": 454258.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 1.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 146 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1160.0, |
|
"completions/max_terminated_length": 1160.0, |
|
"completions/mean_length": 1159.5, |
|
"completions/mean_terminated_length": 1159.5, |
|
"completions/min_length": 1159.0, |
|
"completions/min_terminated_length": 1159.0, |
|
"epoch": 0.003646285501674315, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.096393120939516e-07, |
|
"loss": 0.0, |
|
"num_tokens": 457551.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 147 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1160.5, |
|
"completions/mean_terminated_length": 1160.5, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.003671090164951011, |
|
"grad_norm": 4.276771068572998, |
|
"learning_rate": 6.039558454088795e-07, |
|
"loss": -0.0002, |
|
"num_tokens": 460792.0, |
|
"reward": 0.5, |
|
"reward_std": 0.7071067690849304, |
|
"rewards/accuracy_reward/mean": 0.5, |
|
"rewards/accuracy_reward/std": 0.7071067690849304, |
|
"step": 148 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 744.0, |
|
"completions/max_terminated_length": 744.0, |
|
"completions/mean_length": 658.0, |
|
"completions/mean_terminated_length": 658.0, |
|
"completions/min_length": 572.0, |
|
"completions/min_terminated_length": 572.0, |
|
"epoch": 0.003695894828227707, |
|
"grad_norm": 5.478691101074219, |
|
"learning_rate": 5.98258304766415e-07, |
|
"loss": 0.0653, |
|
"num_tokens": 462960.0, |
|
"reward": 0.5, |
|
"reward_std": 0.7071067690849304, |
|
"rewards/accuracy_reward/mean": 0.5, |
|
"rewards/accuracy_reward/std": 0.7071067690849304, |
|
"step": 149 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1160.5, |
|
"completions/mean_terminated_length": 1160.5, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.0037206994915044028, |
|
"grad_norm": 4.9000935554504395, |
|
"learning_rate": 5.925474615223572e-07, |
|
"loss": 0.0002, |
|
"num_tokens": 466229.0, |
|
"reward": 0.5, |
|
"reward_std": 0.7071067690849304, |
|
"rewards/accuracy_reward/mean": 0.5, |
|
"rewards/accuracy_reward/std": 0.7071067690849304, |
|
"step": 150 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1160.5, |
|
"completions/mean_terminated_length": 1160.5, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.0037455041547810987, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.868240888334652e-07, |
|
"loss": 0.0, |
|
"num_tokens": 469424.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 151 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1160.5, |
|
"completions/mean_terminated_length": 1160.5, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.0037703088180577947, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.810889615527838e-07, |
|
"loss": 0.0, |
|
"num_tokens": 472677.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 152 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1108.0, |
|
"completions/mean_terminated_length": 1108.0, |
|
"completions/min_length": 1055.0, |
|
"completions/min_terminated_length": 1055.0, |
|
"epoch": 0.003795113481334491, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.753428561247415e-07, |
|
"loss": 0.0, |
|
"num_tokens": 475963.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 1.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 153 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 396.0, |
|
"completions/max_terminated_length": 396.0, |
|
"completions/mean_length": 395.0, |
|
"completions/mean_terminated_length": 395.0, |
|
"completions/min_length": 394.0, |
|
"completions/min_terminated_length": 394.0, |
|
"epoch": 0.003819918144611187, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.695865504800327e-07, |
|
"loss": 0.0, |
|
"num_tokens": 477633.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 154 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1161.0, |
|
"completions/mean_terminated_length": 1161.0, |
|
"completions/min_length": 1161.0, |
|
"completions/min_terminated_length": 1161.0, |
|
"epoch": 0.003844722807887883, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.638208239302974e-07, |
|
"loss": 0.0, |
|
"num_tokens": 480869.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 155 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1161.0, |
|
"completions/mean_terminated_length": 1161.0, |
|
"completions/min_length": 1161.0, |
|
"completions/min_terminated_length": 1161.0, |
|
"epoch": 0.003869527471164579, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.580464570626151e-07, |
|
"loss": 0.0, |
|
"num_tokens": 484119.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 156 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1057.0, |
|
"completions/max_terminated_length": 1057.0, |
|
"completions/mean_length": 1056.5, |
|
"completions/mean_terminated_length": 1056.5, |
|
"completions/min_length": 1056.0, |
|
"completions/min_terminated_length": 1056.0, |
|
"epoch": 0.003894332134441275, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.522642316338268e-07, |
|
"loss": 0.0, |
|
"num_tokens": 487110.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 1.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 157 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1160.0, |
|
"completions/max_terminated_length": 1160.0, |
|
"completions/mean_length": 1160.0, |
|
"completions/mean_terminated_length": 1160.0, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.003919136797717971, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.464749304646961e-07, |
|
"loss": 0.0, |
|
"num_tokens": 490274.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 158 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1160.0, |
|
"completions/max_terminated_length": 1160.0, |
|
"completions/mean_length": 1160.0, |
|
"completions/mean_terminated_length": 1160.0, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.003943941460994667, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.406793373339292e-07, |
|
"loss": 0.0, |
|
"num_tokens": 493466.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 159 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1106.5, |
|
"completions/mean_terminated_length": 1106.5, |
|
"completions/min_length": 1052.0, |
|
"completions/min_terminated_length": 1052.0, |
|
"epoch": 0.003968746124271363, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.348782368720625e-07, |
|
"loss": 0.0, |
|
"num_tokens": 496819.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 160 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1160.0, |
|
"completions/max_terminated_length": 1160.0, |
|
"completions/mean_length": 1099.5, |
|
"completions/mean_terminated_length": 1099.5, |
|
"completions/min_length": 1039.0, |
|
"completions/min_terminated_length": 1039.0, |
|
"epoch": 0.003993550787548059, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.290724144552379e-07, |
|
"loss": 0.0, |
|
"num_tokens": 500108.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 1.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 161 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1126.5, |
|
"completions/mean_terminated_length": 1126.5, |
|
"completions/min_length": 1092.0, |
|
"completions/min_terminated_length": 1092.0, |
|
"epoch": 0.004018355450824755, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.232626560988734e-07, |
|
"loss": 0.0, |
|
"num_tokens": 503395.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 162 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1161.0, |
|
"completions/mean_terminated_length": 1161.0, |
|
"completions/min_length": 1161.0, |
|
"completions/min_terminated_length": 1161.0, |
|
"epoch": 0.004043160114101451, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.174497483512505e-07, |
|
"loss": 0.0, |
|
"num_tokens": 506789.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 163 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1160.0, |
|
"completions/max_terminated_length": 1160.0, |
|
"completions/mean_length": 1160.0, |
|
"completions/mean_terminated_length": 1160.0, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.004067964777378147, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.116344781870281e-07, |
|
"loss": 0.0, |
|
"num_tokens": 510067.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 164 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1160.0, |
|
"completions/max_terminated_length": 1160.0, |
|
"completions/mean_length": 1104.0, |
|
"completions/mean_terminated_length": 1104.0, |
|
"completions/min_length": 1048.0, |
|
"completions/min_terminated_length": 1048.0, |
|
"epoch": 0.004092769440654843, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.058176329006985e-07, |
|
"loss": 0.0, |
|
"num_tokens": 513145.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 165 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1160.0, |
|
"completions/max_terminated_length": 1160.0, |
|
"completions/mean_length": 1159.5, |
|
"completions/mean_terminated_length": 1159.5, |
|
"completions/min_length": 1159.0, |
|
"completions/min_terminated_length": 1159.0, |
|
"epoch": 0.004117574103931539, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0, |
|
"num_tokens": 516324.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 166 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1161.0, |
|
"completions/mean_terminated_length": 1161.0, |
|
"completions/min_length": 1161.0, |
|
"completions/min_terminated_length": 1161.0, |
|
"epoch": 0.004142378767208235, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.941823670993015e-07, |
|
"loss": 0.0, |
|
"num_tokens": 519560.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 1.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 167 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1160.0, |
|
"completions/max_terminated_length": 1160.0, |
|
"completions/mean_length": 1134.5, |
|
"completions/mean_terminated_length": 1134.5, |
|
"completions/min_length": 1109.0, |
|
"completions/min_terminated_length": 1109.0, |
|
"epoch": 0.004167183430484931, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.883655218129719e-07, |
|
"loss": 0.0, |
|
"num_tokens": 523121.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 168 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1161.0, |
|
"completions/mean_terminated_length": 1161.0, |
|
"completions/min_length": 1161.0, |
|
"completions/min_terminated_length": 1161.0, |
|
"epoch": 0.004191988093761628, |
|
"grad_norm": 3.9390673637390137, |
|
"learning_rate": 4.825502516487496e-07, |
|
"loss": 0.0, |
|
"num_tokens": 526431.0, |
|
"reward": 0.5, |
|
"reward_std": 0.7071067690849304, |
|
"rewards/accuracy_reward/mean": 0.5, |
|
"rewards/accuracy_reward/std": 0.7071067690849304, |
|
"step": 169 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1160.0, |
|
"completions/max_terminated_length": 1160.0, |
|
"completions/mean_length": 1099.0, |
|
"completions/mean_terminated_length": 1099.0, |
|
"completions/min_length": 1038.0, |
|
"completions/min_terminated_length": 1038.0, |
|
"epoch": 0.004216792757038323, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.7673734390112666e-07, |
|
"loss": 0.0, |
|
"num_tokens": 529535.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 170 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1160.0, |
|
"completions/max_terminated_length": 1160.0, |
|
"completions/mean_length": 1160.0, |
|
"completions/mean_terminated_length": 1160.0, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.0042415974203150195, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.7092758554476206e-07, |
|
"loss": 0.0, |
|
"num_tokens": 532775.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 171 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1161.0, |
|
"completions/mean_terminated_length": 1161.0, |
|
"completions/min_length": 1161.0, |
|
"completions/min_terminated_length": 1161.0, |
|
"epoch": 0.004266402083591715, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.6512176312793735e-07, |
|
"loss": 0.0, |
|
"num_tokens": 536009.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 172 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1160.0, |
|
"completions/mean_terminated_length": 1160.0, |
|
"completions/min_length": 1159.0, |
|
"completions/min_terminated_length": 1159.0, |
|
"epoch": 0.004291206746868411, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.593206626660709e-07, |
|
"loss": 0.0, |
|
"num_tokens": 539229.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 1.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 173 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1082.0, |
|
"completions/max_terminated_length": 1082.0, |
|
"completions/mean_length": 985.0, |
|
"completions/mean_terminated_length": 985.0, |
|
"completions/min_length": 888.0, |
|
"completions/min_terminated_length": 888.0, |
|
"epoch": 0.004316011410145107, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.535250695353039e-07, |
|
"loss": 0.0, |
|
"num_tokens": 542033.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 174 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1161.0, |
|
"completions/mean_terminated_length": 1161.0, |
|
"completions/min_length": 1161.0, |
|
"completions/min_terminated_length": 1161.0, |
|
"epoch": 0.004340816073421803, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.477357683661733e-07, |
|
"loss": 0.0, |
|
"num_tokens": 545369.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 175 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1160.5, |
|
"completions/mean_terminated_length": 1160.5, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.0043656207366985, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.419535429373848e-07, |
|
"loss": 0.0, |
|
"num_tokens": 548690.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 176 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 750.0, |
|
"completions/max_terminated_length": 750.0, |
|
"completions/mean_length": 726.0, |
|
"completions/mean_terminated_length": 726.0, |
|
"completions/min_length": 702.0, |
|
"completions/min_terminated_length": 702.0, |
|
"epoch": 0.004390425399975195, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.3617917606970267e-07, |
|
"loss": 0.0, |
|
"num_tokens": 550988.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 1.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 177 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1101.5, |
|
"completions/mean_terminated_length": 1101.5, |
|
"completions/min_length": 1042.0, |
|
"completions/min_terminated_length": 1042.0, |
|
"epoch": 0.004415230063251892, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.304134495199674e-07, |
|
"loss": 0.0, |
|
"num_tokens": 554031.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 1.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 178 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1160.0, |
|
"completions/max_terminated_length": 1160.0, |
|
"completions/mean_length": 1160.0, |
|
"completions/mean_terminated_length": 1160.0, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.004440034726528587, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.246571438752584e-07, |
|
"loss": 0.0, |
|
"num_tokens": 557221.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 179 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1098.5, |
|
"completions/mean_terminated_length": 1098.5, |
|
"completions/min_length": 1036.0, |
|
"completions/min_terminated_length": 1036.0, |
|
"epoch": 0.0044648393898052835, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.1891103844721634e-07, |
|
"loss": 0.0, |
|
"num_tokens": 560388.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 1.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 180 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1160.5, |
|
"completions/mean_terminated_length": 1160.5, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.004489644053081979, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.131759111665348e-07, |
|
"loss": 0.0, |
|
"num_tokens": 563599.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 181 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1160.0, |
|
"completions/max_terminated_length": 1160.0, |
|
"completions/mean_length": 1160.0, |
|
"completions/mean_terminated_length": 1160.0, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.004514448716358675, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.074525384776428e-07, |
|
"loss": 0.0, |
|
"num_tokens": 566805.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 182 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1160.5, |
|
"completions/mean_terminated_length": 1160.5, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.004539253379635372, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.0174169523358485e-07, |
|
"loss": 0.0, |
|
"num_tokens": 570332.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 183 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1133.5, |
|
"completions/mean_terminated_length": 1133.5, |
|
"completions/min_length": 1106.0, |
|
"completions/min_terminated_length": 1106.0, |
|
"epoch": 0.004564058042912067, |
|
"grad_norm": 0.0, |
|
"learning_rate": 3.960441545911204e-07, |
|
"loss": 0.0, |
|
"num_tokens": 573507.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 184 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1160.0, |
|
"completions/mean_terminated_length": 1160.0, |
|
"completions/min_length": 1159.0, |
|
"completions/min_terminated_length": 1159.0, |
|
"epoch": 0.004588862706188764, |
|
"grad_norm": 0.0, |
|
"learning_rate": 3.9036068790604823e-07, |
|
"loss": 0.0, |
|
"num_tokens": 576655.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 185 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1160.0, |
|
"completions/mean_terminated_length": 1160.0, |
|
"completions/min_length": 1159.0, |
|
"completions/min_terminated_length": 1159.0, |
|
"epoch": 0.004613667369465459, |
|
"grad_norm": 0.0, |
|
"learning_rate": 3.846920646287799e-07, |
|
"loss": 0.0, |
|
"num_tokens": 579969.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 1.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 186 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1161.0, |
|
"completions/mean_terminated_length": 1161.0, |
|
"completions/min_length": 1161.0, |
|
"completions/min_terminated_length": 1161.0, |
|
"epoch": 0.004638472032742156, |
|
"grad_norm": 4.225953578948975, |
|
"learning_rate": 3.790390522001662e-07, |
|
"loss": 0.0, |
|
"num_tokens": 583149.0, |
|
"reward": 0.5, |
|
"reward_std": 0.7071067690849304, |
|
"rewards/accuracy_reward/mean": 0.5, |
|
"rewards/accuracy_reward/std": 0.7071067690849304, |
|
"step": 187 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1023.0, |
|
"completions/max_terminated_length": 1023.0, |
|
"completions/mean_length": 972.5, |
|
"completions/mean_terminated_length": 972.5, |
|
"completions/min_length": 922.0, |
|
"completions/min_terminated_length": 922.0, |
|
"epoch": 0.004663276696018851, |
|
"grad_norm": 0.0, |
|
"learning_rate": 3.734024159475991e-07, |
|
"loss": 0.0, |
|
"num_tokens": 586000.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 1.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 188 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1160.0, |
|
"completions/max_terminated_length": 1160.0, |
|
"completions/mean_length": 1100.5, |
|
"completions/mean_terminated_length": 1100.5, |
|
"completions/min_length": 1041.0, |
|
"completions/min_terminated_length": 1041.0, |
|
"epoch": 0.0046880813592955475, |
|
"grad_norm": 0.0, |
|
"learning_rate": 3.6778291898139903e-07, |
|
"loss": 0.0, |
|
"num_tokens": 589111.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 1.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 189 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1143.0, |
|
"completions/max_terminated_length": 1143.0, |
|
"completions/mean_length": 1094.0, |
|
"completions/mean_terminated_length": 1094.0, |
|
"completions/min_length": 1045.0, |
|
"completions/min_terminated_length": 1045.0, |
|
"epoch": 0.004712886022572244, |
|
"grad_norm": 0.0, |
|
"learning_rate": 3.621813220915004e-07, |
|
"loss": 0.0, |
|
"num_tokens": 592191.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 190 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1160.5, |
|
"completions/mean_terminated_length": 1160.5, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.004737690685848939, |
|
"grad_norm": 0.0, |
|
"learning_rate": 3.56598383644455e-07, |
|
"loss": 0.0, |
|
"num_tokens": 595476.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 191 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1161.0, |
|
"completions/mean_terminated_length": 1161.0, |
|
"completions/min_length": 1161.0, |
|
"completions/min_terminated_length": 1161.0, |
|
"epoch": 0.004762495349125636, |
|
"grad_norm": 0.0, |
|
"learning_rate": 3.5103485948075894e-07, |
|
"loss": 0.0, |
|
"num_tokens": 598740.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 192 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1160.5, |
|
"completions/mean_terminated_length": 1160.5, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.004787300012402331, |
|
"grad_norm": 0.0, |
|
"learning_rate": 3.454915028125263e-07, |
|
"loss": 0.0, |
|
"num_tokens": 602011.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 193 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 599.0, |
|
"completions/max_terminated_length": 599.0, |
|
"completions/mean_length": 596.5, |
|
"completions/mean_terminated_length": 596.5, |
|
"completions/min_length": 594.0, |
|
"completions/min_terminated_length": 594.0, |
|
"epoch": 0.004812104675679028, |
|
"grad_norm": 0.0, |
|
"learning_rate": 3.3996906412151417e-07, |
|
"loss": 0.0, |
|
"num_tokens": 604050.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 1.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 194 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1160.0, |
|
"completions/max_terminated_length": 1160.0, |
|
"completions/mean_length": 1160.0, |
|
"completions/mean_terminated_length": 1160.0, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.004836909338955724, |
|
"grad_norm": 0.0, |
|
"learning_rate": 3.34468291057521e-07, |
|
"loss": 0.0, |
|
"num_tokens": 607196.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 195 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 643.0, |
|
"completions/max_terminated_length": 643.0, |
|
"completions/mean_length": 597.0, |
|
"completions/mean_terminated_length": 597.0, |
|
"completions/min_length": 551.0, |
|
"completions/min_terminated_length": 551.0, |
|
"epoch": 0.00486171400223242, |
|
"grad_norm": 0.0, |
|
"learning_rate": 3.2898992833716563e-07, |
|
"loss": 0.0, |
|
"num_tokens": 609278.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 1.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 196 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1106.5, |
|
"completions/mean_terminated_length": 1106.5, |
|
"completions/min_length": 1052.0, |
|
"completions/min_terminated_length": 1052.0, |
|
"epoch": 0.004886518665509116, |
|
"grad_norm": 0.0, |
|
"learning_rate": 3.235347176430656e-07, |
|
"loss": 0.0, |
|
"num_tokens": 612401.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 197 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1160.0, |
|
"completions/max_terminated_length": 1160.0, |
|
"completions/mean_length": 1160.0, |
|
"completions/mean_terminated_length": 1160.0, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.0049113233287858115, |
|
"grad_norm": 0.0, |
|
"learning_rate": 3.181033975234244e-07, |
|
"loss": 0.0, |
|
"num_tokens": 615635.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 198 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 991.0, |
|
"completions/max_terminated_length": 991.0, |
|
"completions/mean_length": 854.0, |
|
"completions/mean_terminated_length": 854.0, |
|
"completions/min_length": 717.0, |
|
"completions/min_terminated_length": 717.0, |
|
"epoch": 0.004936127992062508, |
|
"grad_norm": 0.0, |
|
"learning_rate": 3.1269670329204393e-07, |
|
"loss": 0.0, |
|
"num_tokens": 618143.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 1.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 199 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1160.5, |
|
"completions/mean_terminated_length": 1160.5, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.004960932655339203, |
|
"grad_norm": 0.0, |
|
"learning_rate": 3.073153669287759e-07, |
|
"loss": 0.0, |
|
"num_tokens": 621340.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 1.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 767.0, |
|
"completions/max_terminated_length": 767.0, |
|
"completions/mean_length": 694.5, |
|
"completions/mean_terminated_length": 694.5, |
|
"completions/min_length": 622.0, |
|
"completions/min_terminated_length": 622.0, |
|
"epoch": 0.0049857373186159, |
|
"grad_norm": 0.0, |
|
"learning_rate": 3.0196011698042156e-07, |
|
"loss": 0.0, |
|
"num_tokens": 623615.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 1.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 201 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1160.0, |
|
"completions/max_terminated_length": 1160.0, |
|
"completions/mean_length": 1100.0, |
|
"completions/mean_terminated_length": 1100.0, |
|
"completions/min_length": 1040.0, |
|
"completions/min_terminated_length": 1040.0, |
|
"epoch": 0.005010541981892596, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.9663167846209996e-07, |
|
"loss": 0.0, |
|
"num_tokens": 626681.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 202 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1143.5, |
|
"completions/mean_terminated_length": 1143.5, |
|
"completions/min_length": 1126.0, |
|
"completions/min_terminated_length": 1126.0, |
|
"epoch": 0.005035346645169292, |
|
"grad_norm": 3.8946311473846436, |
|
"learning_rate": 2.9133077275909107e-07, |
|
"loss": 0.0077, |
|
"num_tokens": 629804.0, |
|
"reward": 0.5, |
|
"reward_std": 0.7071067690849304, |
|
"rewards/accuracy_reward/mean": 0.5, |
|
"rewards/accuracy_reward/std": 0.7071067690849304, |
|
"step": 203 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 194.0, |
|
"completions/max_terminated_length": 194.0, |
|
"completions/mean_length": 193.5, |
|
"completions/mean_terminated_length": 193.5, |
|
"completions/min_length": 193.0, |
|
"completions/min_terminated_length": 193.0, |
|
"epoch": 0.005060151308445988, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.86058117529173e-07, |
|
"loss": 0.0, |
|
"num_tokens": 630997.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 1.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 204 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1160.5, |
|
"completions/mean_terminated_length": 1160.5, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.005084955971722684, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.808144266054612e-07, |
|
"loss": 0.0, |
|
"num_tokens": 634166.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 205 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1160.5, |
|
"completions/mean_terminated_length": 1160.5, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.00510976063499938, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.756004098997689e-07, |
|
"loss": 0.0, |
|
"num_tokens": 637409.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 206 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1160.5, |
|
"completions/mean_terminated_length": 1160.5, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.0051345652982760755, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.7041677330649406e-07, |
|
"loss": 0.0, |
|
"num_tokens": 640584.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 1.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 207 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1161.0, |
|
"completions/mean_terminated_length": 1161.0, |
|
"completions/min_length": 1161.0, |
|
"completions/min_terminated_length": 1161.0, |
|
"epoch": 0.005159369961552772, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.6526421860705473e-07, |
|
"loss": 0.0, |
|
"num_tokens": 643958.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 208 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1160.5, |
|
"completions/mean_terminated_length": 1160.5, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.005184174624829468, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.6014344337487703e-07, |
|
"loss": 0.0, |
|
"num_tokens": 647203.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 209 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1160.5, |
|
"completions/mean_terminated_length": 1160.5, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.005208979288106164, |
|
"grad_norm": 4.012828826904297, |
|
"learning_rate": 2.550551408809565e-07, |
|
"loss": -0.0002, |
|
"num_tokens": 650446.0, |
|
"reward": 0.5, |
|
"reward_std": 0.7071067690849304, |
|
"rewards/accuracy_reward/mean": 0.5, |
|
"rewards/accuracy_reward/std": 0.7071067690849304, |
|
"step": 210 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1160.0, |
|
"completions/max_terminated_length": 1160.0, |
|
"completions/mean_length": 1143.0, |
|
"completions/mean_terminated_length": 1143.0, |
|
"completions/min_length": 1126.0, |
|
"completions/min_terminated_length": 1126.0, |
|
"epoch": 0.00523378395138286, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.500000000000001e-07, |
|
"loss": 0.0, |
|
"num_tokens": 653658.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 211 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1160.5, |
|
"completions/mean_terminated_length": 1160.5, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.005258588614659556, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.4497870511716237e-07, |
|
"loss": 0.0, |
|
"num_tokens": 656809.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 1.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 212 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1098.0, |
|
"completions/max_terminated_length": 1098.0, |
|
"completions/mean_length": 1096.0, |
|
"completions/mean_terminated_length": 1096.0, |
|
"completions/min_length": 1094.0, |
|
"completions/min_terminated_length": 1094.0, |
|
"epoch": 0.005283393277936252, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.399919360353923e-07, |
|
"loss": 0.0, |
|
"num_tokens": 660059.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 213 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1161.0, |
|
"completions/mean_terminated_length": 1161.0, |
|
"completions/min_length": 1161.0, |
|
"completions/min_terminated_length": 1161.0, |
|
"epoch": 0.0053081979412129485, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.350403678833976e-07, |
|
"loss": 0.0, |
|
"num_tokens": 663917.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 214 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1127.0, |
|
"completions/mean_terminated_length": 1127.0, |
|
"completions/min_length": 1093.0, |
|
"completions/min_terminated_length": 1093.0, |
|
"epoch": 0.005333002604489644, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.301246710242437e-07, |
|
"loss": 0.0, |
|
"num_tokens": 667155.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 215 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1073.0, |
|
"completions/max_terminated_length": 1073.0, |
|
"completions/mean_length": 1072.0, |
|
"completions/mean_terminated_length": 1072.0, |
|
"completions/min_length": 1071.0, |
|
"completions/min_terminated_length": 1071.0, |
|
"epoch": 0.00535780726776634, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.25245510964597e-07, |
|
"loss": 0.0, |
|
"num_tokens": 670323.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 216 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1160.0, |
|
"completions/max_terminated_length": 1160.0, |
|
"completions/mean_length": 1159.5, |
|
"completions/mean_terminated_length": 1159.5, |
|
"completions/min_length": 1159.0, |
|
"completions/min_terminated_length": 1159.0, |
|
"epoch": 0.005382611931043036, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.2040354826462664e-07, |
|
"loss": 0.0, |
|
"num_tokens": 673512.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 217 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1161.0, |
|
"completions/mean_terminated_length": 1161.0, |
|
"completions/min_length": 1161.0, |
|
"completions/min_terminated_length": 1161.0, |
|
"epoch": 0.005407416594319732, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.155994384485742e-07, |
|
"loss": 0.0, |
|
"num_tokens": 676684.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 218 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1160.5, |
|
"completions/mean_terminated_length": 1160.5, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.005432221257596428, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.1083383191600672e-07, |
|
"loss": 0.0, |
|
"num_tokens": 679975.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 219 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1160.0, |
|
"completions/max_terminated_length": 1160.0, |
|
"completions/mean_length": 1160.0, |
|
"completions/mean_terminated_length": 1160.0, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.005457025920873124, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.0610737385376348e-07, |
|
"loss": 0.0, |
|
"num_tokens": 683161.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 220 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1160.0, |
|
"completions/max_terminated_length": 1160.0, |
|
"completions/mean_length": 1159.5, |
|
"completions/mean_terminated_length": 1159.5, |
|
"completions/min_length": 1159.0, |
|
"completions/min_terminated_length": 1159.0, |
|
"epoch": 0.0054818305841498206, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.01420704148607e-07, |
|
"loss": 0.0, |
|
"num_tokens": 686346.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 221 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 953.0, |
|
"completions/max_terminated_length": 953.0, |
|
"completions/mean_length": 889.5, |
|
"completions/mean_terminated_length": 889.5, |
|
"completions/min_length": 826.0, |
|
"completions/min_terminated_length": 826.0, |
|
"epoch": 0.005506635247426516, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.9677445730059344e-07, |
|
"loss": 0.0, |
|
"num_tokens": 688939.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 1.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 222 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 466.0, |
|
"completions/max_terminated_length": 466.0, |
|
"completions/mean_length": 454.0, |
|
"completions/mean_terminated_length": 454.0, |
|
"completions/min_length": 442.0, |
|
"completions/min_terminated_length": 442.0, |
|
"epoch": 0.0055314399107032125, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.9216926233717084e-07, |
|
"loss": 0.0, |
|
"num_tokens": 690653.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 1.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 223 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1160.0, |
|
"completions/max_terminated_length": 1160.0, |
|
"completions/mean_length": 1160.0, |
|
"completions/mean_terminated_length": 1160.0, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.005556244573979908, |
|
"grad_norm": 4.9939961433410645, |
|
"learning_rate": 1.8760574272801998e-07, |
|
"loss": 0.0, |
|
"num_tokens": 693923.0, |
|
"reward": 0.5, |
|
"reward_std": 0.7071067690849304, |
|
"rewards/accuracy_reward/mean": 0.5, |
|
"rewards/accuracy_reward/std": 0.7071067690849304, |
|
"step": 224 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 646.0, |
|
"completions/max_terminated_length": 646.0, |
|
"completions/mean_length": 591.5, |
|
"completions/mean_terminated_length": 591.5, |
|
"completions/min_length": 537.0, |
|
"completions/min_terminated_length": 537.0, |
|
"epoch": 0.005581049237256604, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.830845163006448e-07, |
|
"loss": 0.0, |
|
"num_tokens": 695956.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 1.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 225 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1160.0, |
|
"completions/max_terminated_length": 1160.0, |
|
"completions/mean_length": 1113.5, |
|
"completions/mean_terminated_length": 1113.5, |
|
"completions/min_length": 1067.0, |
|
"completions/min_terminated_length": 1067.0, |
|
"epoch": 0.0056058539005333, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.7860619515673032e-07, |
|
"loss": 0.0, |
|
"num_tokens": 699187.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 226 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1161.0, |
|
"completions/mean_terminated_length": 1161.0, |
|
"completions/min_length": 1161.0, |
|
"completions/min_terminated_length": 1161.0, |
|
"epoch": 0.005630658563809996, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.741713855892724e-07, |
|
"loss": 0.0, |
|
"num_tokens": 702425.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 227 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1161.0, |
|
"completions/mean_terminated_length": 1161.0, |
|
"completions/min_length": 1161.0, |
|
"completions/min_terminated_length": 1161.0, |
|
"epoch": 0.005655463227086693, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.697806880004962e-07, |
|
"loss": 0.0, |
|
"num_tokens": 705813.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 228 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1111.0, |
|
"completions/max_terminated_length": 1111.0, |
|
"completions/mean_length": 1075.5, |
|
"completions/mean_terminated_length": 1075.5, |
|
"completions/min_length": 1040.0, |
|
"completions/min_terminated_length": 1040.0, |
|
"epoch": 0.005680267890363388, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.6543469682057104e-07, |
|
"loss": 0.0, |
|
"num_tokens": 708784.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 1.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 229 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 570.0, |
|
"completions/max_terminated_length": 570.0, |
|
"completions/mean_length": 554.5, |
|
"completions/mean_terminated_length": 554.5, |
|
"completions/min_length": 539.0, |
|
"completions/min_terminated_length": 539.0, |
|
"epoch": 0.005705072553640085, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.6113400042713388e-07, |
|
"loss": 0.0, |
|
"num_tokens": 710775.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 1.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 230 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1159.0, |
|
"completions/max_terminated_length": 1159.0, |
|
"completions/mean_length": 1159.0, |
|
"completions/mean_terminated_length": 1159.0, |
|
"completions/min_length": 1159.0, |
|
"completions/min_terminated_length": 1159.0, |
|
"epoch": 0.00572987721691678, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.5687918106563325e-07, |
|
"loss": 0.0, |
|
"num_tokens": 714253.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 1.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 231 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1160.0, |
|
"completions/max_terminated_length": 1160.0, |
|
"completions/mean_length": 1160.0, |
|
"completions/mean_terminated_length": 1160.0, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.0057546818801934765, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.5267081477050131e-07, |
|
"loss": 0.0, |
|
"num_tokens": 717617.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 232 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1160.0, |
|
"completions/max_terminated_length": 1160.0, |
|
"completions/mean_length": 1160.0, |
|
"completions/mean_terminated_length": 1160.0, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.005779486543470172, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.4850947128716911e-07, |
|
"loss": 0.0, |
|
"num_tokens": 720945.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 1.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 233 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1160.0, |
|
"completions/max_terminated_length": 1160.0, |
|
"completions/mean_length": 1159.5, |
|
"completions/mean_terminated_length": 1159.5, |
|
"completions/min_length": 1159.0, |
|
"completions/min_terminated_length": 1159.0, |
|
"epoch": 0.005804291206746868, |
|
"grad_norm": 6.170485496520996, |
|
"learning_rate": 1.4439571399493145e-07, |
|
"loss": -0.0002, |
|
"num_tokens": 724198.0, |
|
"reward": 0.5, |
|
"reward_std": 0.7071067690849304, |
|
"rewards/accuracy_reward/mean": 0.5, |
|
"rewards/accuracy_reward/std": 0.7071067690849304, |
|
"step": 234 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1161.0, |
|
"completions/mean_terminated_length": 1161.0, |
|
"completions/min_length": 1161.0, |
|
"completions/min_terminated_length": 1161.0, |
|
"epoch": 0.005829095870023565, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.4033009983067452e-07, |
|
"loss": 0.0, |
|
"num_tokens": 727450.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 235 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1160.0, |
|
"completions/max_terminated_length": 1160.0, |
|
"completions/mean_length": 1160.0, |
|
"completions/mean_terminated_length": 1160.0, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.00585390053330026, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.3631317921347562e-07, |
|
"loss": 0.0, |
|
"num_tokens": 731058.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 236 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1159.0, |
|
"completions/max_terminated_length": 1159.0, |
|
"completions/mean_length": 1104.0, |
|
"completions/mean_terminated_length": 1104.0, |
|
"completions/min_length": 1049.0, |
|
"completions/min_terminated_length": 1049.0, |
|
"epoch": 0.005878705196576957, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.3234549597008572e-07, |
|
"loss": 0.0, |
|
"num_tokens": 734192.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 1.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 237 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1160.0, |
|
"completions/max_terminated_length": 1160.0, |
|
"completions/mean_length": 1103.0, |
|
"completions/mean_terminated_length": 1103.0, |
|
"completions/min_length": 1046.0, |
|
"completions/min_terminated_length": 1046.0, |
|
"epoch": 0.005903509859853652, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.284275872613028e-07, |
|
"loss": 0.0, |
|
"num_tokens": 737280.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 238 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1159.0, |
|
"completions/max_terminated_length": 1159.0, |
|
"completions/mean_length": 1130.0, |
|
"completions/mean_terminated_length": 1130.0, |
|
"completions/min_length": 1101.0, |
|
"completions/min_terminated_length": 1101.0, |
|
"epoch": 0.005928314523130349, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.245599835092504e-07, |
|
"loss": 0.0, |
|
"num_tokens": 740502.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 239 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1160.0, |
|
"completions/max_terminated_length": 1160.0, |
|
"completions/mean_length": 1160.0, |
|
"completions/mean_terminated_length": 1160.0, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.005953119186407045, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.2074320832556556e-07, |
|
"loss": 0.0, |
|
"num_tokens": 743802.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 240 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 591.0, |
|
"completions/max_terminated_length": 591.0, |
|
"completions/mean_length": 545.0, |
|
"completions/mean_terminated_length": 545.0, |
|
"completions/min_length": 499.0, |
|
"completions/min_terminated_length": 499.0, |
|
"epoch": 0.0059779238496837405, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.1697777844051104e-07, |
|
"loss": 0.0, |
|
"num_tokens": 745740.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 1.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 241 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1053.0, |
|
"completions/max_terminated_length": 1053.0, |
|
"completions/mean_length": 1051.5, |
|
"completions/mean_terminated_length": 1051.5, |
|
"completions/min_length": 1050.0, |
|
"completions/min_terminated_length": 1050.0, |
|
"epoch": 0.006002728512960437, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.1326420363301808e-07, |
|
"loss": 0.0, |
|
"num_tokens": 749029.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 1.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 242 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1160.0, |
|
"completions/max_terminated_length": 1160.0, |
|
"completions/mean_length": 1157.0, |
|
"completions/mean_terminated_length": 1157.0, |
|
"completions/min_length": 1154.0, |
|
"completions/min_terminated_length": 1154.0, |
|
"epoch": 0.006027533176237132, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.096029866616704e-07, |
|
"loss": 0.0, |
|
"num_tokens": 752345.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 243 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1160.0, |
|
"completions/mean_terminated_length": 1160.0, |
|
"completions/min_length": 1159.0, |
|
"completions/min_terminated_length": 1159.0, |
|
"epoch": 0.006052337839513829, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.0599462319663904e-07, |
|
"loss": 0.0, |
|
"num_tokens": 755507.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 244 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1160.0, |
|
"completions/max_terminated_length": 1160.0, |
|
"completions/mean_length": 1160.0, |
|
"completions/mean_terminated_length": 1160.0, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.006077142502790524, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.0243960175257605e-07, |
|
"loss": 0.0, |
|
"num_tokens": 759195.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 245 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1161.0, |
|
"completions/mean_terminated_length": 1161.0, |
|
"completions/min_length": 1161.0, |
|
"completions/min_terminated_length": 1161.0, |
|
"epoch": 0.006101947166067221, |
|
"grad_norm": 4.425199508666992, |
|
"learning_rate": 9.893840362247807e-08, |
|
"loss": 0.0, |
|
"num_tokens": 762363.0, |
|
"reward": 0.5, |
|
"reward_std": 0.7071067690849304, |
|
"rewards/accuracy_reward/mean": 0.5, |
|
"rewards/accuracy_reward/std": 0.7071067690849304, |
|
"step": 246 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1160.5, |
|
"completions/mean_terminated_length": 1160.5, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.006126751829343917, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.549150281252632e-08, |
|
"loss": 0.0, |
|
"num_tokens": 765694.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 247 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1161.0, |
|
"completions/mean_terminated_length": 1161.0, |
|
"completions/min_length": 1161.0, |
|
"completions/min_terminated_length": 1161.0, |
|
"epoch": 0.006151556492620613, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.209936597791407e-08, |
|
"loss": 0.0, |
|
"num_tokens": 768958.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 248 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1160.5, |
|
"completions/mean_terminated_length": 1160.5, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.006176361155897309, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.876245235966883e-08, |
|
"loss": 0.0, |
|
"num_tokens": 772163.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 249 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1160.0, |
|
"completions/max_terminated_length": 1160.0, |
|
"completions/mean_length": 1145.5, |
|
"completions/mean_terminated_length": 1145.5, |
|
"completions/min_length": 1131.0, |
|
"completions/min_terminated_length": 1131.0, |
|
"epoch": 0.0062011658191740045, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.548121372247919e-08, |
|
"loss": 0.0, |
|
"num_tokens": 775316.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 250 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1127.5, |
|
"completions/mean_terminated_length": 1127.5, |
|
"completions/min_length": 1094.0, |
|
"completions/min_terminated_length": 1094.0, |
|
"epoch": 0.006225970482450701, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.225609429353186e-08, |
|
"loss": 0.0, |
|
"num_tokens": 778405.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 251 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1160.0, |
|
"completions/max_terminated_length": 1160.0, |
|
"completions/mean_length": 1160.0, |
|
"completions/mean_terminated_length": 1160.0, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.006250775145727396, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.908753070237124e-08, |
|
"loss": 0.0, |
|
"num_tokens": 781653.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 252 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1103.0, |
|
"completions/mean_terminated_length": 1103.0, |
|
"completions/min_length": 1045.0, |
|
"completions/min_terminated_length": 1045.0, |
|
"epoch": 0.006275579809004093, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.597595192178702e-08, |
|
"loss": 0.0, |
|
"num_tokens": 784733.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 253 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1160.0, |
|
"completions/max_terminated_length": 1160.0, |
|
"completions/mean_length": 1133.5, |
|
"completions/mean_terminated_length": 1133.5, |
|
"completions/min_length": 1107.0, |
|
"completions/min_terminated_length": 1107.0, |
|
"epoch": 0.006300384472280789, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.292177920973724e-08, |
|
"loss": 0.0, |
|
"num_tokens": 787840.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 254 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1160.0, |
|
"completions/max_terminated_length": 1160.0, |
|
"completions/mean_length": 1131.0, |
|
"completions/mean_terminated_length": 1131.0, |
|
"completions/min_length": 1102.0, |
|
"completions/min_terminated_length": 1102.0, |
|
"epoch": 0.006325189135557485, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.992542605231738e-08, |
|
"loss": 0.0, |
|
"num_tokens": 791044.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 1.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 255 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1160.0, |
|
"completions/max_terminated_length": 1160.0, |
|
"completions/mean_length": 1160.0, |
|
"completions/mean_terminated_length": 1160.0, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.006349993798834181, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.698729810778064e-08, |
|
"loss": 0.0, |
|
"num_tokens": 794386.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 256 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1160.5, |
|
"completions/mean_terminated_length": 1160.5, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.006374798462110877, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.410779315161886e-08, |
|
"loss": 0.0, |
|
"num_tokens": 797689.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 257 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1160.0, |
|
"completions/mean_terminated_length": 1160.0, |
|
"completions/min_length": 1159.0, |
|
"completions/min_terminated_length": 1159.0, |
|
"epoch": 0.006399603125387573, |
|
"grad_norm": 7.163500785827637, |
|
"learning_rate": 6.128730102270896e-08, |
|
"loss": 0.0004, |
|
"num_tokens": 800989.0, |
|
"reward": 0.5, |
|
"reward_std": 0.7071067690849304, |
|
"rewards/accuracy_reward/mean": 0.5, |
|
"rewards/accuracy_reward/std": 0.7071067690849304, |
|
"step": 258 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1070.0, |
|
"completions/max_terminated_length": 1070.0, |
|
"completions/mean_length": 1055.0, |
|
"completions/mean_terminated_length": 1055.0, |
|
"completions/min_length": 1040.0, |
|
"completions/min_terminated_length": 1040.0, |
|
"epoch": 0.0064244077886642685, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.8526203570536504e-08, |
|
"loss": 0.0, |
|
"num_tokens": 803933.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 1.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 259 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1160.5, |
|
"completions/mean_terminated_length": 1160.5, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.006449212451940965, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.582487460349805e-08, |
|
"loss": 0.0, |
|
"num_tokens": 807138.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 1.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 260 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1160.0, |
|
"completions/max_terminated_length": 1160.0, |
|
"completions/mean_length": 1159.5, |
|
"completions/mean_terminated_length": 1159.5, |
|
"completions/min_length": 1159.0, |
|
"completions/min_terminated_length": 1159.0, |
|
"epoch": 0.006474017115217661, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.318367983829392e-08, |
|
"loss": 0.0, |
|
"num_tokens": 810345.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 261 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1039.0, |
|
"completions/max_terminated_length": 1039.0, |
|
"completions/mean_length": 913.0, |
|
"completions/mean_terminated_length": 913.0, |
|
"completions/min_length": 787.0, |
|
"completions/min_terminated_length": 787.0, |
|
"epoch": 0.006498821778494357, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.060297685041659e-08, |
|
"loss": 0.0, |
|
"num_tokens": 813127.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 1.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 262 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1099.0, |
|
"completions/mean_terminated_length": 1099.0, |
|
"completions/min_length": 1037.0, |
|
"completions/min_terminated_length": 1037.0, |
|
"epoch": 0.006523626441771053, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.808311502573975e-08, |
|
"loss": 0.0, |
|
"num_tokens": 816265.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 1.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 263 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1162.0, |
|
"completions/max_terminated_length": 1162.0, |
|
"completions/mean_length": 1161.5, |
|
"completions/mean_terminated_length": 1161.5, |
|
"completions/min_length": 1161.0, |
|
"completions/min_terminated_length": 1161.0, |
|
"epoch": 0.006548431105047749, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.5624435513217873e-08, |
|
"loss": 0.0, |
|
"num_tokens": 819548.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 264 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1160.5, |
|
"completions/mean_terminated_length": 1160.5, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.006573235768324445, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.322727117869951e-08, |
|
"loss": 0.0, |
|
"num_tokens": 822761.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 265 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1161.0, |
|
"completions/mean_terminated_length": 1161.0, |
|
"completions/min_length": 1161.0, |
|
"completions/min_terminated_length": 1161.0, |
|
"epoch": 0.0065980404316011414, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.089194655986306e-08, |
|
"loss": 0.0, |
|
"num_tokens": 826049.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 266 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1161.0, |
|
"completions/mean_terminated_length": 1161.0, |
|
"completions/min_length": 1161.0, |
|
"completions/min_terminated_length": 1161.0, |
|
"epoch": 0.006622845094877837, |
|
"grad_norm": 0.0, |
|
"learning_rate": 3.861877782227885e-08, |
|
"loss": 0.0, |
|
"num_tokens": 829275.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 267 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1160.5, |
|
"completions/mean_terminated_length": 1160.5, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.006647649758154533, |
|
"grad_norm": 0.0, |
|
"learning_rate": 3.6408072716606345e-08, |
|
"loss": 0.0, |
|
"num_tokens": 832512.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 268 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1160.0, |
|
"completions/mean_terminated_length": 1160.0, |
|
"completions/min_length": 1159.0, |
|
"completions/min_terminated_length": 1159.0, |
|
"epoch": 0.006672454421431229, |
|
"grad_norm": 0.0, |
|
"learning_rate": 3.426013053692878e-08, |
|
"loss": 0.0, |
|
"num_tokens": 835716.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 269 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1159.0, |
|
"completions/max_terminated_length": 1159.0, |
|
"completions/mean_length": 1115.0, |
|
"completions/mean_terminated_length": 1115.0, |
|
"completions/min_length": 1071.0, |
|
"completions/min_terminated_length": 1071.0, |
|
"epoch": 0.006697259084707925, |
|
"grad_norm": 0.0, |
|
"learning_rate": 3.217524208023431e-08, |
|
"loss": 0.0, |
|
"num_tokens": 838788.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 270 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1160.0, |
|
"completions/max_terminated_length": 1160.0, |
|
"completions/mean_length": 1159.5, |
|
"completions/mean_terminated_length": 1159.5, |
|
"completions/min_length": 1159.0, |
|
"completions/min_terminated_length": 1159.0, |
|
"epoch": 0.006722063747984621, |
|
"grad_norm": 0.0, |
|
"learning_rate": 3.015368960704584e-08, |
|
"loss": 0.0, |
|
"num_tokens": 842011.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 1.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 271 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 880.0, |
|
"completions/max_terminated_length": 880.0, |
|
"completions/mean_length": 832.0, |
|
"completions/mean_terminated_length": 832.0, |
|
"completions/min_length": 784.0, |
|
"completions/min_terminated_length": 784.0, |
|
"epoch": 0.006746868411261317, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.8195746803208242e-08, |
|
"loss": 0.0, |
|
"num_tokens": 844527.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 1.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 272 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1160.0, |
|
"completions/max_terminated_length": 1160.0, |
|
"completions/mean_length": 1156.0, |
|
"completions/mean_terminated_length": 1156.0, |
|
"completions/min_length": 1152.0, |
|
"completions/min_terminated_length": 1152.0, |
|
"epoch": 0.0067716730745380135, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.6301678742835397e-08, |
|
"loss": 0.0, |
|
"num_tokens": 847757.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 1.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 273 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1160.0, |
|
"completions/max_terminated_length": 1160.0, |
|
"completions/mean_length": 1102.0, |
|
"completions/mean_terminated_length": 1102.0, |
|
"completions/min_length": 1044.0, |
|
"completions/min_terminated_length": 1044.0, |
|
"epoch": 0.006796477737814709, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.4471741852423233e-08, |
|
"loss": 0.0, |
|
"num_tokens": 850983.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 274 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1099.0, |
|
"completions/max_terminated_length": 1099.0, |
|
"completions/mean_length": 1043.0, |
|
"completions/mean_terminated_length": 1043.0, |
|
"completions/min_length": 987.0, |
|
"completions/min_terminated_length": 987.0, |
|
"epoch": 0.0068212824010914054, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.2706183876134045e-08, |
|
"loss": 0.0, |
|
"num_tokens": 853931.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 275 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1160.0, |
|
"completions/max_terminated_length": 1160.0, |
|
"completions/mean_length": 1160.0, |
|
"completions/mean_terminated_length": 1160.0, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.006846087064368101, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.100524384225555e-08, |
|
"loss": 0.0, |
|
"num_tokens": 857299.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 276 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1160.0, |
|
"completions/max_terminated_length": 1160.0, |
|
"completions/mean_length": 1160.0, |
|
"completions/mean_terminated_length": 1160.0, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.006870891727644797, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.936915203084055e-08, |
|
"loss": 0.0, |
|
"num_tokens": 860655.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 1.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 277 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1109.0, |
|
"completions/mean_terminated_length": 1109.0, |
|
"completions/min_length": 1057.0, |
|
"completions/min_terminated_length": 1057.0, |
|
"epoch": 0.006895696390921493, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.7798129942530548e-08, |
|
"loss": 0.0, |
|
"num_tokens": 863867.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 278 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1159.0, |
|
"completions/max_terminated_length": 1159.0, |
|
"completions/mean_length": 1102.5, |
|
"completions/mean_terminated_length": 1102.5, |
|
"completions/min_length": 1046.0, |
|
"completions/min_terminated_length": 1046.0, |
|
"epoch": 0.006920501054198189, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.62923902685681e-08, |
|
"loss": 0.0, |
|
"num_tokens": 867012.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 1.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 279 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1160.0, |
|
"completions/max_terminated_length": 1160.0, |
|
"completions/mean_length": 1160.0, |
|
"completions/mean_terminated_length": 1160.0, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.006945305717474886, |
|
"grad_norm": 4.35086727142334, |
|
"learning_rate": 1.4852136862001763e-08, |
|
"loss": 0.0, |
|
"num_tokens": 870196.0, |
|
"reward": 0.5, |
|
"reward_std": 0.7071067690849304, |
|
"rewards/accuracy_reward/mean": 0.5, |
|
"rewards/accuracy_reward/std": 0.7071067690849304, |
|
"step": 280 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1142.5, |
|
"completions/mean_terminated_length": 1142.5, |
|
"completions/min_length": 1124.0, |
|
"completions/min_terminated_length": 1124.0, |
|
"epoch": 0.006970110380751581, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.3477564710088096e-08, |
|
"loss": 0.0, |
|
"num_tokens": 873489.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 281 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1161.0, |
|
"completions/mean_terminated_length": 1161.0, |
|
"completions/min_length": 1161.0, |
|
"completions/min_terminated_length": 1161.0, |
|
"epoch": 0.0069949150440282775, |
|
"grad_norm": 5.316676616668701, |
|
"learning_rate": 1.2168859907892902e-08, |
|
"loss": 0.0, |
|
"num_tokens": 876709.0, |
|
"reward": 0.5, |
|
"reward_std": 0.7071067690849304, |
|
"rewards/accuracy_reward/mean": 0.5, |
|
"rewards/accuracy_reward/std": 0.7071067690849304, |
|
"step": 282 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1160.0, |
|
"completions/max_terminated_length": 1160.0, |
|
"completions/mean_length": 1107.0, |
|
"completions/mean_terminated_length": 1107.0, |
|
"completions/min_length": 1054.0, |
|
"completions/min_terminated_length": 1054.0, |
|
"epoch": 0.007019719707304973, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.0926199633097154e-08, |
|
"loss": 0.0, |
|
"num_tokens": 879787.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 283 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1160.0, |
|
"completions/max_terminated_length": 1160.0, |
|
"completions/mean_length": 1160.0, |
|
"completions/mean_terminated_length": 1160.0, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.0070445243705816694, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.749752122010346e-09, |
|
"loss": 0.0, |
|
"num_tokens": 883013.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 284 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1160.5, |
|
"completions/mean_terminated_length": 1160.5, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.007069329033858365, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.639676646793382e-09, |
|
"loss": 0.0, |
|
"num_tokens": 886160.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 285 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1159.0, |
|
"completions/max_terminated_length": 1159.0, |
|
"completions/mean_length": 1089.0, |
|
"completions/mean_terminated_length": 1089.0, |
|
"completions/min_length": 1019.0, |
|
"completions/min_terminated_length": 1019.0, |
|
"epoch": 0.007094133697135061, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.59612349389599e-09, |
|
"loss": 0.0, |
|
"num_tokens": 889182.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 1.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 286 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1160.5, |
|
"completions/mean_terminated_length": 1160.5, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.007118938360411758, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.61923394371039e-09, |
|
"loss": 0.0, |
|
"num_tokens": 892463.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 287 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1160.0, |
|
"completions/max_terminated_length": 1160.0, |
|
"completions/mean_length": 1159.5, |
|
"completions/mean_terminated_length": 1159.5, |
|
"completions/min_length": 1159.0, |
|
"completions/min_terminated_length": 1159.0, |
|
"epoch": 0.007143743023688453, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.7091402514442e-09, |
|
"loss": 0.0, |
|
"num_tokens": 895656.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 1.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 288 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 727.0, |
|
"completions/max_terminated_length": 727.0, |
|
"completions/mean_length": 661.5, |
|
"completions/mean_terminated_length": 661.5, |
|
"completions/min_length": 596.0, |
|
"completions/min_terminated_length": 596.0, |
|
"epoch": 0.00716854768696515, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.865965629214819e-09, |
|
"loss": 0.0, |
|
"num_tokens": 897853.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 1.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 289 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 590.0, |
|
"completions/max_terminated_length": 590.0, |
|
"completions/mean_length": 577.5, |
|
"completions/mean_terminated_length": 577.5, |
|
"completions/min_length": 565.0, |
|
"completions/min_terminated_length": 565.0, |
|
"epoch": 0.007193352350241845, |
|
"grad_norm": 9.068865776062012, |
|
"learning_rate": 4.089824229369154e-09, |
|
"loss": -0.0108, |
|
"num_tokens": 899930.0, |
|
"reward": 0.5, |
|
"reward_std": 0.7071067690849304, |
|
"rewards/accuracy_reward/mean": 0.5, |
|
"rewards/accuracy_reward/std": 0.7071067690849304, |
|
"step": 290 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1163.0, |
|
"completions/max_terminated_length": 1163.0, |
|
"completions/mean_length": 1161.5, |
|
"completions/mean_terminated_length": 1161.5, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.0072181570135185415, |
|
"grad_norm": 0.0, |
|
"learning_rate": 3.380821129028488e-09, |
|
"loss": 0.0, |
|
"num_tokens": 903191.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 291 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1161.0, |
|
"completions/mean_terminated_length": 1161.0, |
|
"completions/min_length": 1161.0, |
|
"completions/min_terminated_length": 1161.0, |
|
"epoch": 0.007242961676795238, |
|
"grad_norm": 4.983455181121826, |
|
"learning_rate": 2.739052315863355e-09, |
|
"loss": 0.0, |
|
"num_tokens": 906377.0, |
|
"reward": 0.5, |
|
"reward_std": 0.7071067690849304, |
|
"rewards/accuracy_reward/mean": 0.5, |
|
"rewards/accuracy_reward/std": 0.7071067690849304, |
|
"step": 292 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1160.0, |
|
"completions/max_terminated_length": 1160.0, |
|
"completions/mean_length": 1047.0, |
|
"completions/mean_terminated_length": 1047.0, |
|
"completions/min_length": 934.0, |
|
"completions/min_terminated_length": 934.0, |
|
"epoch": 0.0072677663400719334, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.1646046750978253e-09, |
|
"loss": 0.0, |
|
"num_tokens": 909287.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 1.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 293 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1156.5, |
|
"completions/mean_terminated_length": 1156.5, |
|
"completions/min_length": 1152.0, |
|
"completions/min_terminated_length": 1152.0, |
|
"epoch": 0.00729257100334863, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.6575559777469717e-09, |
|
"loss": 0.0, |
|
"num_tokens": 912768.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 294 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1104.0, |
|
"completions/mean_terminated_length": 1104.0, |
|
"completions/min_length": 1047.0, |
|
"completions/min_terminated_length": 1047.0, |
|
"epoch": 0.007317375666625325, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.217974870087901e-09, |
|
"loss": 0.0, |
|
"num_tokens": 915912.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 295 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1160.5, |
|
"completions/mean_terminated_length": 1160.5, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.007342180329902022, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.459208643659121e-10, |
|
"loss": 0.0, |
|
"num_tokens": 919111.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 296 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1061.0, |
|
"completions/max_terminated_length": 1061.0, |
|
"completions/mean_length": 1060.5, |
|
"completions/mean_terminated_length": 1060.5, |
|
"completions/min_length": 1060.0, |
|
"completions/min_terminated_length": 1060.0, |
|
"epoch": 0.007366984993178717, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.41444330737717e-10, |
|
"loss": 0.0, |
|
"num_tokens": 922056.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 1.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 297 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1160.5, |
|
"completions/mean_terminated_length": 1160.5, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.007391789656455414, |
|
"grad_norm": 0.0, |
|
"learning_rate": 3.0458649045211894e-10, |
|
"loss": 0.0, |
|
"num_tokens": 925347.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 298 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1160.5, |
|
"completions/mean_terminated_length": 1160.5, |
|
"completions/min_length": 1160.0, |
|
"completions/min_terminated_length": 1160.0, |
|
"epoch": 0.00741659431973211, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.3537941026914302e-10, |
|
"loss": 0.0, |
|
"num_tokens": 928612.0, |
|
"reward": 0.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 0.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 299 |
|
}, |
|
{ |
|
"clip_ratio/high_max": 0.0, |
|
"clip_ratio/high_mean": 0.0, |
|
"clip_ratio/low_mean": 0.0, |
|
"clip_ratio/low_min": 0.0, |
|
"clip_ratio/region_mean": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1161.0, |
|
"completions/max_terminated_length": 1161.0, |
|
"completions/mean_length": 1102.0, |
|
"completions/mean_terminated_length": 1102.0, |
|
"completions/min_length": 1043.0, |
|
"completions/min_terminated_length": 1043.0, |
|
"epoch": 0.0074413989830088055, |
|
"grad_norm": 0.0, |
|
"learning_rate": 3.3845998118897657e-11, |
|
"loss": 0.0, |
|
"num_tokens": 931688.0, |
|
"reward": 1.0, |
|
"reward_std": 0.0, |
|
"rewards/accuracy_reward/mean": 1.0, |
|
"rewards/accuracy_reward/std": 0.0, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.0074413989830088055, |
|
"step": 300, |
|
"total_flos": 0.0, |
|
"train_loss": 0.00037381448991557895, |
|
"train_runtime": 2088.7955, |
|
"train_samples_per_second": 0.287, |
|
"train_steps_per_second": 0.144 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 300, |
|
"num_input_tokens_seen": 931688, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|