{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9984, "eval_steps": 100, "global_step": 156, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 3.125e-07, "logits/chosen": -2.5929787158966064, "logits/rejected": -2.3793699741363525, "logps/chosen": -356.751953125, "logps/rejected": -256.8883972167969, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.06, "learning_rate": 3.125e-06, "logits/chosen": -2.6083781719207764, "logits/rejected": -2.5400590896606445, "logps/chosen": -281.386474609375, "logps/rejected": -274.1568603515625, "loss": 0.6914, "rewards/accuracies": 0.5243055820465088, "rewards/chosen": 0.0022270558401942253, "rewards/margins": 0.003119000233709812, "rewards/rejected": -0.0008919446263462305, "step": 10 }, { "epoch": 0.13, "learning_rate": 4.989935734988098e-06, "logits/chosen": -2.486896276473999, "logits/rejected": -2.3741025924682617, "logps/chosen": -291.9944152832031, "logps/rejected": -283.07464599609375, "loss": 0.6597, "rewards/accuracies": 0.625, "rewards/chosen": -0.09328436851501465, "rewards/margins": 0.07672096788883209, "rewards/rejected": -0.17000532150268555, "step": 20 }, { "epoch": 0.19, "learning_rate": 4.8776412907378845e-06, "logits/chosen": -2.1997439861297607, "logits/rejected": -2.036345958709717, "logps/chosen": -316.4314880371094, "logps/rejected": -308.1321716308594, "loss": 0.6302, "rewards/accuracies": 0.671875, "rewards/chosen": -0.1515791118144989, "rewards/margins": 0.188942089676857, "rewards/rejected": -0.3405211865901947, "step": 30 }, { "epoch": 0.26, "learning_rate": 4.646121984004666e-06, "logits/chosen": -1.8272978067398071, "logits/rejected": -1.5499597787857056, "logps/chosen": -305.67230224609375, "logps/rejected": -331.5001525878906, "loss": 0.5798, "rewards/accuracies": 0.734375, "rewards/chosen": -0.24510908126831055, "rewards/margins": 0.3586480915546417, "rewards/rejected": -0.6037572026252747, "step": 40 }, { "epoch": 0.32, "learning_rate": 4.3069871595684795e-06, "logits/chosen": -1.7314808368682861, "logits/rejected": -1.506835699081421, "logps/chosen": -304.4190979003906, "logps/rejected": -337.5626220703125, "loss": 0.5634, "rewards/accuracies": 0.734375, "rewards/chosen": -0.17085711658000946, "rewards/margins": 0.44418996572494507, "rewards/rejected": -0.6150471568107605, "step": 50 }, { "epoch": 0.38, "learning_rate": 3.8772424536302565e-06, "logits/chosen": -1.5548713207244873, "logits/rejected": -1.3132641315460205, "logps/chosen": -330.44970703125, "logps/rejected": -364.156982421875, "loss": 0.5846, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.3525199890136719, "rewards/margins": 0.41284093260765076, "rewards/rejected": -0.765360951423645, "step": 60 }, { "epoch": 0.45, "learning_rate": 3.3784370602033572e-06, "logits/chosen": -1.5187108516693115, "logits/rejected": -1.3706837892532349, "logps/chosen": -297.07177734375, "logps/rejected": -337.2922058105469, "loss": 0.5864, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.18305912613868713, "rewards/margins": 0.4047318398952484, "rewards/rejected": -0.5877909660339355, "step": 70 }, { "epoch": 0.51, "learning_rate": 2.835583164544139e-06, "logits/chosen": -1.4858272075653076, "logits/rejected": -1.1535447835922241, "logps/chosen": -327.71722412109375, "logps/rejected": -370.8907165527344, "loss": 0.5698, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.3798934817314148, "rewards/margins": 0.4692471921443939, "rewards/rejected": -0.8491406440734863, "step": 80 }, { "epoch": 0.58, "learning_rate": 2.2759017277414165e-06, "logits/chosen": -1.491620421409607, "logits/rejected": -1.186694860458374, "logps/chosen": -359.1547546386719, "logps/rejected": -382.53643798828125, "loss": 0.5597, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.4140586853027344, "rewards/margins": 0.5172749161720276, "rewards/rejected": -0.9313337206840515, "step": 90 }, { "epoch": 0.64, "learning_rate": 1.7274575140626318e-06, "logits/chosen": -1.4925267696380615, "logits/rejected": -1.1801958084106445, "logps/chosen": -324.1100158691406, "logps/rejected": -351.0680236816406, "loss": 0.5505, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.26746606826782227, "rewards/margins": 0.5357456803321838, "rewards/rejected": -0.8032118082046509, "step": 100 }, { "epoch": 0.64, "eval_logits/chosen": -1.4883873462677002, "eval_logits/rejected": -1.14678955078125, "eval_logps/chosen": -336.07965087890625, "eval_logps/rejected": -357.667724609375, "eval_loss": 0.5654380917549133, "eval_rewards/accuracies": 0.6980000138282776, "eval_rewards/chosen": -0.3523660898208618, "eval_rewards/margins": 0.4186323583126068, "eval_rewards/rejected": -0.7709984183311462, "eval_runtime": 384.3678, "eval_samples_per_second": 5.203, "eval_steps_per_second": 0.65, "step": 100 }, { "epoch": 0.7, "learning_rate": 1.217751806485235e-06, "logits/chosen": -1.4903645515441895, "logits/rejected": -1.2258186340332031, "logps/chosen": -337.6324462890625, "logps/rejected": -371.979736328125, "loss": 0.5624, "rewards/accuracies": 0.6781250238418579, "rewards/chosen": -0.31235161423683167, "rewards/margins": 0.42215317487716675, "rewards/rejected": -0.7345048189163208, "step": 110 }, { "epoch": 0.77, "learning_rate": 7.723433775328385e-07, "logits/chosen": -1.3396466970443726, "logits/rejected": -1.0177139043807983, "logps/chosen": -356.86505126953125, "logps/rejected": -363.9344177246094, "loss": 0.5657, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.4063766598701477, "rewards/margins": 0.3915051817893982, "rewards/rejected": -0.7978818416595459, "step": 120 }, { "epoch": 0.83, "learning_rate": 4.1356686569674344e-07, "logits/chosen": -1.36991286277771, "logits/rejected": -1.0875458717346191, "logps/chosen": -304.4653015136719, "logps/rejected": -349.29547119140625, "loss": 0.5471, "rewards/accuracies": 0.734375, "rewards/chosen": -0.30865973234176636, "rewards/margins": 0.5011726021766663, "rewards/rejected": -0.8098322749137878, "step": 130 }, { "epoch": 0.9, "learning_rate": 1.59412823400657e-07, "logits/chosen": -1.2946439981460571, "logits/rejected": -0.9784806370735168, "logps/chosen": -325.2296142578125, "logps/rejected": -366.6056823730469, "loss": 0.5506, "rewards/accuracies": 0.71875, "rewards/chosen": -0.31376656889915466, "rewards/margins": 0.5263808369636536, "rewards/rejected": -0.8401473760604858, "step": 140 }, { "epoch": 0.96, "learning_rate": 2.262559558016325e-08, "logits/chosen": -1.3523244857788086, "logits/rejected": -1.0796093940734863, "logps/chosen": -307.21832275390625, "logps/rejected": -342.11212158203125, "loss": 0.5508, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.39782753586769104, "rewards/margins": 0.5240110158920288, "rewards/rejected": -0.9218384623527527, "step": 150 }, { "epoch": 1.0, "step": 156, "total_flos": 0.0, "train_loss": 0.5835713033492749, "train_runtime": 7184.2955, "train_samples_per_second": 2.784, "train_steps_per_second": 0.022 } ], "logging_steps": 10, "max_steps": 156, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }