|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9984, |
|
"eval_steps": 100, |
|
"global_step": 156, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.125e-07, |
|
"logits/chosen": -2.5929787158966064, |
|
"logits/rejected": -2.3793699741363525, |
|
"logps/chosen": -356.751953125, |
|
"logps/rejected": -256.8883972167969, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.125e-06, |
|
"logits/chosen": -2.6083781719207764, |
|
"logits/rejected": -2.5400590896606445, |
|
"logps/chosen": -281.386474609375, |
|
"logps/rejected": -274.1568603515625, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.5243055820465088, |
|
"rewards/chosen": 0.0022270558401942253, |
|
"rewards/margins": 0.003119000233709812, |
|
"rewards/rejected": -0.0008919446263462305, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.989935734988098e-06, |
|
"logits/chosen": -2.486896276473999, |
|
"logits/rejected": -2.3741025924682617, |
|
"logps/chosen": -291.9944152832031, |
|
"logps/rejected": -283.07464599609375, |
|
"loss": 0.6597, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.09328436851501465, |
|
"rewards/margins": 0.07672096788883209, |
|
"rewards/rejected": -0.17000532150268555, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.8776412907378845e-06, |
|
"logits/chosen": -2.1997439861297607, |
|
"logits/rejected": -2.036345958709717, |
|
"logps/chosen": -316.4314880371094, |
|
"logps/rejected": -308.1321716308594, |
|
"loss": 0.6302, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -0.1515791118144989, |
|
"rewards/margins": 0.188942089676857, |
|
"rewards/rejected": -0.3405211865901947, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.646121984004666e-06, |
|
"logits/chosen": -1.8272978067398071, |
|
"logits/rejected": -1.5499597787857056, |
|
"logps/chosen": -305.67230224609375, |
|
"logps/rejected": -331.5001525878906, |
|
"loss": 0.5798, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.24510908126831055, |
|
"rewards/margins": 0.3586480915546417, |
|
"rewards/rejected": -0.6037572026252747, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.3069871595684795e-06, |
|
"logits/chosen": -1.7314808368682861, |
|
"logits/rejected": -1.506835699081421, |
|
"logps/chosen": -304.4190979003906, |
|
"logps/rejected": -337.5626220703125, |
|
"loss": 0.5634, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.17085711658000946, |
|
"rewards/margins": 0.44418996572494507, |
|
"rewards/rejected": -0.6150471568107605, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.8772424536302565e-06, |
|
"logits/chosen": -1.5548713207244873, |
|
"logits/rejected": -1.3132641315460205, |
|
"logps/chosen": -330.44970703125, |
|
"logps/rejected": -364.156982421875, |
|
"loss": 0.5846, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.3525199890136719, |
|
"rewards/margins": 0.41284093260765076, |
|
"rewards/rejected": -0.765360951423645, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.3784370602033572e-06, |
|
"logits/chosen": -1.5187108516693115, |
|
"logits/rejected": -1.3706837892532349, |
|
"logps/chosen": -297.07177734375, |
|
"logps/rejected": -337.2922058105469, |
|
"loss": 0.5864, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.18305912613868713, |
|
"rewards/margins": 0.4047318398952484, |
|
"rewards/rejected": -0.5877909660339355, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.835583164544139e-06, |
|
"logits/chosen": -1.4858272075653076, |
|
"logits/rejected": -1.1535447835922241, |
|
"logps/chosen": -327.71722412109375, |
|
"logps/rejected": -370.8907165527344, |
|
"loss": 0.5698, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.3798934817314148, |
|
"rewards/margins": 0.4692471921443939, |
|
"rewards/rejected": -0.8491406440734863, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.2759017277414165e-06, |
|
"logits/chosen": -1.491620421409607, |
|
"logits/rejected": -1.186694860458374, |
|
"logps/chosen": -359.1547546386719, |
|
"logps/rejected": -382.53643798828125, |
|
"loss": 0.5597, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.4140586853027344, |
|
"rewards/margins": 0.5172749161720276, |
|
"rewards/rejected": -0.9313337206840515, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7274575140626318e-06, |
|
"logits/chosen": -1.4925267696380615, |
|
"logits/rejected": -1.1801958084106445, |
|
"logps/chosen": -324.1100158691406, |
|
"logps/rejected": -351.0680236816406, |
|
"loss": 0.5505, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.26746606826782227, |
|
"rewards/margins": 0.5357456803321838, |
|
"rewards/rejected": -0.8032118082046509, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_logits/chosen": -1.4883873462677002, |
|
"eval_logits/rejected": -1.14678955078125, |
|
"eval_logps/chosen": -336.07965087890625, |
|
"eval_logps/rejected": -357.667724609375, |
|
"eval_loss": 0.5654380917549133, |
|
"eval_rewards/accuracies": 0.6980000138282776, |
|
"eval_rewards/chosen": -0.3523660898208618, |
|
"eval_rewards/margins": 0.4186323583126068, |
|
"eval_rewards/rejected": -0.7709984183311462, |
|
"eval_runtime": 384.3678, |
|
"eval_samples_per_second": 5.203, |
|
"eval_steps_per_second": 0.65, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.217751806485235e-06, |
|
"logits/chosen": -1.4903645515441895, |
|
"logits/rejected": -1.2258186340332031, |
|
"logps/chosen": -337.6324462890625, |
|
"logps/rejected": -371.979736328125, |
|
"loss": 0.5624, |
|
"rewards/accuracies": 0.6781250238418579, |
|
"rewards/chosen": -0.31235161423683167, |
|
"rewards/margins": 0.42215317487716675, |
|
"rewards/rejected": -0.7345048189163208, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.723433775328385e-07, |
|
"logits/chosen": -1.3396466970443726, |
|
"logits/rejected": -1.0177139043807983, |
|
"logps/chosen": -356.86505126953125, |
|
"logps/rejected": -363.9344177246094, |
|
"loss": 0.5657, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.4063766598701477, |
|
"rewards/margins": 0.3915051817893982, |
|
"rewards/rejected": -0.7978818416595459, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.1356686569674344e-07, |
|
"logits/chosen": -1.36991286277771, |
|
"logits/rejected": -1.0875458717346191, |
|
"logps/chosen": -304.4653015136719, |
|
"logps/rejected": -349.29547119140625, |
|
"loss": 0.5471, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.30865973234176636, |
|
"rewards/margins": 0.5011726021766663, |
|
"rewards/rejected": -0.8098322749137878, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.59412823400657e-07, |
|
"logits/chosen": -1.2946439981460571, |
|
"logits/rejected": -0.9784806370735168, |
|
"logps/chosen": -325.2296142578125, |
|
"logps/rejected": -366.6056823730469, |
|
"loss": 0.5506, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.31376656889915466, |
|
"rewards/margins": 0.5263808369636536, |
|
"rewards/rejected": -0.8401473760604858, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.262559558016325e-08, |
|
"logits/chosen": -1.3523244857788086, |
|
"logits/rejected": -1.0796093940734863, |
|
"logps/chosen": -307.21832275390625, |
|
"logps/rejected": -342.11212158203125, |
|
"loss": 0.5508, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.39782753586769104, |
|
"rewards/margins": 0.5240110158920288, |
|
"rewards/rejected": -0.9218384623527527, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 156, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5835713033492749, |
|
"train_runtime": 7184.2955, |
|
"train_samples_per_second": 2.784, |
|
"train_steps_per_second": 0.022 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 156, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|