{ "best_global_step": 75, "best_metric": -0.0067981877364218235, "best_model_checkpoint": "outputs/microsoft/Phi-3.5-mini-instruct/countdown/checkpoint-75", "epoch": 3.0, "eval_steps": 5, "global_step": 75, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.2, "grad_norm": 1.5520595297857653e-06, "learning_rate": 9.466666666666666e-07, "loss": 0.0, "step": 5 }, { "epoch": 0.2, "eval_clip_ratio": 0.0, "eval_completion_length": 250.575, "eval_kl": 3.934502601623535e-05, "eval_loss": 0.015041607432067394, "eval_num_tokens": 31688.0, "eval_reward": 0.05, "eval_reward_std": 0.1, "eval_rewards/equation_reward_func": 0.05, "eval_rewards/format_reward_func": 0.0, "eval_runtime": 283.7484, "eval_samples_per_second": 0.07, "eval_steps_per_second": 0.018, "step": 5 }, { "epoch": 0.4, "grad_norm": 0.023073066025972366, "learning_rate": 8.799999999999999e-07, "loss": 0.0033, "step": 10 }, { "epoch": 0.4, "eval_clip_ratio": 0.0, "eval_completion_length": 252.7875, "eval_kl": 3.967881202697754e-05, "eval_loss": -0.00017719810421112925, "eval_num_tokens": 62987.0, "eval_reward": 0.0375, "eval_reward_std": 0.075, "eval_rewards/equation_reward_func": 0.0375, "eval_rewards/format_reward_func": 0.0, "eval_runtime": 282.9691, "eval_samples_per_second": 0.071, "eval_steps_per_second": 0.018, "step": 10 }, { "epoch": 0.6, "grad_norm": 1.7865870631794678e-06, "learning_rate": 8.133333333333333e-07, "loss": 0.0, "step": 15 }, { "epoch": 0.6, "eval_clip_ratio": 0.0, "eval_completion_length": 248.875, "eval_kl": 4.363656044006348e-05, "eval_loss": 1.7436344705856754e-06, "eval_num_tokens": 94591.0, "eval_reward": 0.025, "eval_reward_std": 0.05, "eval_rewards/equation_reward_func": 0.025, "eval_rewards/format_reward_func": 0.0, "eval_runtime": 282.3086, "eval_samples_per_second": 0.071, "eval_steps_per_second": 0.018, "step": 15 }, { "epoch": 0.8, "grad_norm": 0.0, "learning_rate": 7.466666666666667e-07, "loss": 0.0138, "step": 20 }, { "epoch": 0.8, "eval_clip_ratio": 0.0, "eval_completion_length": 253.1875, "eval_kl": 3.9270520210266115e-05, "eval_loss": -0.0007528069545514882, "eval_num_tokens": 125922.0, "eval_reward": 0.0125, "eval_reward_std": 0.025, "eval_rewards/equation_reward_func": 0.0125, "eval_rewards/format_reward_func": 0.0, "eval_runtime": 282.3999, "eval_samples_per_second": 0.071, "eval_steps_per_second": 0.018, "step": 20 }, { "epoch": 1.0, "grad_norm": 0.02649177424609661, "learning_rate": 6.800000000000001e-07, "loss": 0.0059, "step": 25 }, { "epoch": 1.0, "eval_clip_ratio": 0.0, "eval_completion_length": 252.9875, "eval_kl": 3.8030743598937986e-05, "eval_loss": 0.006908929906785488, "eval_num_tokens": 157014.0, "eval_reward": 0.075, "eval_reward_std": 0.12886751294136048, "eval_rewards/equation_reward_func": 0.075, "eval_rewards/format_reward_func": 0.0, "eval_runtime": 282.4923, "eval_samples_per_second": 0.071, "eval_steps_per_second": 0.018, "step": 25 }, { "epoch": 1.2, "grad_norm": 0.017524780705571175, "learning_rate": 6.133333333333332e-07, "loss": 0.0098, "step": 30 }, { "epoch": 1.2, "eval_clip_ratio": 0.0, "eval_completion_length": 251.6625, "eval_kl": 3.592073917388916e-05, "eval_loss": 0.0063271126709878445, "eval_num_tokens": 188533.0, "eval_reward": 0.0375, "eval_reward_std": 0.075, "eval_rewards/equation_reward_func": 0.025, "eval_rewards/format_reward_func": 0.0125, "eval_runtime": 283.0566, "eval_samples_per_second": 0.071, "eval_steps_per_second": 0.018, "step": 30 }, { "epoch": 1.4, "grad_norm": 1.993278601730708e-06, "learning_rate": 5.466666666666666e-07, "loss": 0.0024, "step": 35 }, { "epoch": 1.4, "eval_clip_ratio": 0.0, "eval_completion_length": 253.1, "eval_kl": 4.151761531829834e-05, "eval_loss": 0.009441868402063847, "eval_num_tokens": 219791.0, "eval_reward": 0.05, "eval_reward_std": 0.07886751294136048, "eval_rewards/equation_reward_func": 0.0375, "eval_rewards/format_reward_func": 0.0125, "eval_runtime": 282.8829, "eval_samples_per_second": 0.071, "eval_steps_per_second": 0.018, "step": 35 }, { "epoch": 1.6, "grad_norm": 5.774197120445024e-07, "learning_rate": 4.8e-07, "loss": 0.0106, "step": 40 }, { "epoch": 1.6, "eval_clip_ratio": 0.0, "eval_completion_length": 250.6375, "eval_kl": 4.119873046875e-05, "eval_loss": 1.6474120911880163e-06, "eval_num_tokens": 251168.0, "eval_reward": 0.0, "eval_reward_std": 0.0, "eval_rewards/equation_reward_func": 0.0, "eval_rewards/format_reward_func": 0.0, "eval_runtime": 282.7407, "eval_samples_per_second": 0.071, "eval_steps_per_second": 0.018, "step": 40 }, { "epoch": 1.8, "grad_norm": 4.944530473949271e-07, "learning_rate": 4.1333333333333333e-07, "loss": 0.0019, "step": 45 }, { "epoch": 1.8, "eval_clip_ratio": 0.0, "eval_completion_length": 250.8, "eval_kl": 3.807544708251953e-05, "eval_loss": 0.019552746787667274, "eval_num_tokens": 282579.0, "eval_reward": 0.05, "eval_reward_std": 0.07886751294136048, "eval_rewards/equation_reward_func": 0.0375, "eval_rewards/format_reward_func": 0.0125, "eval_runtime": 282.3595, "eval_samples_per_second": 0.071, "eval_steps_per_second": 0.018, "step": 45 }, { "epoch": 2.0, "grad_norm": 0.0208375733345747, "learning_rate": 3.4666666666666665e-07, "loss": 0.0081, "step": 50 }, { "epoch": 2.0, "eval_clip_ratio": 0.0, "eval_completion_length": 254.1, "eval_kl": 3.8304924964904784e-05, "eval_loss": 0.004257645923644304, "eval_num_tokens": 313952.0, "eval_reward": 0.0125, "eval_reward_std": 0.025, "eval_rewards/equation_reward_func": 0.0125, "eval_rewards/format_reward_func": 0.0, "eval_runtime": 282.4466, "eval_samples_per_second": 0.071, "eval_steps_per_second": 0.018, "step": 50 }, { "epoch": 2.2, "grad_norm": 0.027176212519407272, "learning_rate": 2.8e-07, "loss": 0.0077, "step": 55 }, { "epoch": 2.2, "eval_clip_ratio": 0.0, "eval_completion_length": 251.9625, "eval_kl": 4.739761352539063e-05, "eval_loss": 0.00685427850112319, "eval_num_tokens": 345452.0, "eval_reward": 0.025, "eval_reward_std": 0.05, "eval_rewards/equation_reward_func": 0.0125, "eval_rewards/format_reward_func": 0.0125, "eval_runtime": 283.0534, "eval_samples_per_second": 0.071, "eval_steps_per_second": 0.018, "step": 55 }, { "epoch": 2.4, "grad_norm": 0.0, "learning_rate": 2.1333333333333334e-07, "loss": 0.0039, "step": 60 }, { "epoch": 2.4, "eval_clip_ratio": 0.0, "eval_completion_length": 251.025, "eval_kl": 3.7276744842529296e-05, "eval_loss": 0.0007022842764854431, "eval_num_tokens": 376777.0, "eval_reward": 0.0125, "eval_reward_std": 0.025, "eval_rewards/equation_reward_func": 0.0125, "eval_rewards/format_reward_func": 0.0, "eval_runtime": 283.0247, "eval_samples_per_second": 0.071, "eval_steps_per_second": 0.018, "step": 60 }, { "epoch": 2.6, "grad_norm": 0.01980462484061718, "learning_rate": 1.4666666666666666e-07, "loss": 0.016, "step": 65 }, { "epoch": 2.6, "eval_clip_ratio": 0.0, "eval_completion_length": 251.0625, "eval_kl": 4.115998744964599e-05, "eval_loss": 0.010943427681922913, "eval_num_tokens": 407811.0, "eval_reward": 0.0625, "eval_reward_std": 0.125, "eval_rewards/equation_reward_func": 0.05, "eval_rewards/format_reward_func": 0.0125, "eval_runtime": 283.1384, "eval_samples_per_second": 0.071, "eval_steps_per_second": 0.018, "step": 65 }, { "epoch": 2.8, "grad_norm": 8.191254892153665e-07, "learning_rate": 8e-08, "loss": 0.0001, "step": 70 }, { "epoch": 2.8, "eval_clip_ratio": 0.0, "eval_completion_length": 251.125, "eval_kl": 4.7346949577331544e-05, "eval_loss": 0.0024404774885624647, "eval_num_tokens": 439212.0, "eval_reward": 0.0375, "eval_reward_std": 0.075, "eval_rewards/equation_reward_func": 0.0375, "eval_rewards/format_reward_func": 0.0, "eval_runtime": 283.0414, "eval_samples_per_second": 0.071, "eval_steps_per_second": 0.018, "step": 70 }, { "epoch": 3.0, "grad_norm": 0.02634088508784771, "learning_rate": 1.3333333333333334e-08, "loss": 0.0145, "step": 75 }, { "epoch": 3.0, "eval_clip_ratio": 0.0, "eval_completion_length": 252.075, "eval_kl": 4.2390823364257815e-05, "eval_loss": -0.0067981877364218235, "eval_num_tokens": 470623.0, "eval_reward": 0.0125, "eval_reward_std": 0.025, "eval_rewards/equation_reward_func": 0.0125, "eval_rewards/format_reward_func": 0.0, "eval_runtime": 283.5777, "eval_samples_per_second": 0.071, "eval_steps_per_second": 0.018, "step": 75 } ], "logging_steps": 5, "max_steps": 75, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 5, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }