vanilla_dpo_iter_5 / trainer_state.json
YYYYYYibo's picture
Model save
4ee5962 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9984,
"eval_steps": 100,
"global_step": 156,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 3.125e-07,
"logits/chosen": -2.5929787158966064,
"logits/rejected": -2.3793699741363525,
"logps/chosen": -356.751953125,
"logps/rejected": -256.8883972167969,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.06,
"learning_rate": 3.125e-06,
"logits/chosen": -2.6083781719207764,
"logits/rejected": -2.5400590896606445,
"logps/chosen": -281.386474609375,
"logps/rejected": -274.1568603515625,
"loss": 0.6914,
"rewards/accuracies": 0.5243055820465088,
"rewards/chosen": 0.0022270558401942253,
"rewards/margins": 0.003119000233709812,
"rewards/rejected": -0.0008919446263462305,
"step": 10
},
{
"epoch": 0.13,
"learning_rate": 4.989935734988098e-06,
"logits/chosen": -2.486896276473999,
"logits/rejected": -2.3741025924682617,
"logps/chosen": -291.9944152832031,
"logps/rejected": -283.07464599609375,
"loss": 0.6597,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.09328436851501465,
"rewards/margins": 0.07672096788883209,
"rewards/rejected": -0.17000532150268555,
"step": 20
},
{
"epoch": 0.19,
"learning_rate": 4.8776412907378845e-06,
"logits/chosen": -2.1997439861297607,
"logits/rejected": -2.036345958709717,
"logps/chosen": -316.4314880371094,
"logps/rejected": -308.1321716308594,
"loss": 0.6302,
"rewards/accuracies": 0.671875,
"rewards/chosen": -0.1515791118144989,
"rewards/margins": 0.188942089676857,
"rewards/rejected": -0.3405211865901947,
"step": 30
},
{
"epoch": 0.26,
"learning_rate": 4.646121984004666e-06,
"logits/chosen": -1.8272978067398071,
"logits/rejected": -1.5499597787857056,
"logps/chosen": -305.67230224609375,
"logps/rejected": -331.5001525878906,
"loss": 0.5798,
"rewards/accuracies": 0.734375,
"rewards/chosen": -0.24510908126831055,
"rewards/margins": 0.3586480915546417,
"rewards/rejected": -0.6037572026252747,
"step": 40
},
{
"epoch": 0.32,
"learning_rate": 4.3069871595684795e-06,
"logits/chosen": -1.7314808368682861,
"logits/rejected": -1.506835699081421,
"logps/chosen": -304.4190979003906,
"logps/rejected": -337.5626220703125,
"loss": 0.5634,
"rewards/accuracies": 0.734375,
"rewards/chosen": -0.17085711658000946,
"rewards/margins": 0.44418996572494507,
"rewards/rejected": -0.6150471568107605,
"step": 50
},
{
"epoch": 0.38,
"learning_rate": 3.8772424536302565e-06,
"logits/chosen": -1.5548713207244873,
"logits/rejected": -1.3132641315460205,
"logps/chosen": -330.44970703125,
"logps/rejected": -364.156982421875,
"loss": 0.5846,
"rewards/accuracies": 0.6937500238418579,
"rewards/chosen": -0.3525199890136719,
"rewards/margins": 0.41284093260765076,
"rewards/rejected": -0.765360951423645,
"step": 60
},
{
"epoch": 0.45,
"learning_rate": 3.3784370602033572e-06,
"logits/chosen": -1.5187108516693115,
"logits/rejected": -1.3706837892532349,
"logps/chosen": -297.07177734375,
"logps/rejected": -337.2922058105469,
"loss": 0.5864,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": -0.18305912613868713,
"rewards/margins": 0.4047318398952484,
"rewards/rejected": -0.5877909660339355,
"step": 70
},
{
"epoch": 0.51,
"learning_rate": 2.835583164544139e-06,
"logits/chosen": -1.4858272075653076,
"logits/rejected": -1.1535447835922241,
"logps/chosen": -327.71722412109375,
"logps/rejected": -370.8907165527344,
"loss": 0.5698,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": -0.3798934817314148,
"rewards/margins": 0.4692471921443939,
"rewards/rejected": -0.8491406440734863,
"step": 80
},
{
"epoch": 0.58,
"learning_rate": 2.2759017277414165e-06,
"logits/chosen": -1.491620421409607,
"logits/rejected": -1.186694860458374,
"logps/chosen": -359.1547546386719,
"logps/rejected": -382.53643798828125,
"loss": 0.5597,
"rewards/accuracies": 0.6937500238418579,
"rewards/chosen": -0.4140586853027344,
"rewards/margins": 0.5172749161720276,
"rewards/rejected": -0.9313337206840515,
"step": 90
},
{
"epoch": 0.64,
"learning_rate": 1.7274575140626318e-06,
"logits/chosen": -1.4925267696380615,
"logits/rejected": -1.1801958084106445,
"logps/chosen": -324.1100158691406,
"logps/rejected": -351.0680236816406,
"loss": 0.5505,
"rewards/accuracies": 0.731249988079071,
"rewards/chosen": -0.26746606826782227,
"rewards/margins": 0.5357456803321838,
"rewards/rejected": -0.8032118082046509,
"step": 100
},
{
"epoch": 0.64,
"eval_logits/chosen": -1.4883873462677002,
"eval_logits/rejected": -1.14678955078125,
"eval_logps/chosen": -336.07965087890625,
"eval_logps/rejected": -357.667724609375,
"eval_loss": 0.5654380917549133,
"eval_rewards/accuracies": 0.6980000138282776,
"eval_rewards/chosen": -0.3523660898208618,
"eval_rewards/margins": 0.4186323583126068,
"eval_rewards/rejected": -0.7709984183311462,
"eval_runtime": 384.3678,
"eval_samples_per_second": 5.203,
"eval_steps_per_second": 0.65,
"step": 100
},
{
"epoch": 0.7,
"learning_rate": 1.217751806485235e-06,
"logits/chosen": -1.4903645515441895,
"logits/rejected": -1.2258186340332031,
"logps/chosen": -337.6324462890625,
"logps/rejected": -371.979736328125,
"loss": 0.5624,
"rewards/accuracies": 0.6781250238418579,
"rewards/chosen": -0.31235161423683167,
"rewards/margins": 0.42215317487716675,
"rewards/rejected": -0.7345048189163208,
"step": 110
},
{
"epoch": 0.77,
"learning_rate": 7.723433775328385e-07,
"logits/chosen": -1.3396466970443726,
"logits/rejected": -1.0177139043807983,
"logps/chosen": -356.86505126953125,
"logps/rejected": -363.9344177246094,
"loss": 0.5657,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": -0.4063766598701477,
"rewards/margins": 0.3915051817893982,
"rewards/rejected": -0.7978818416595459,
"step": 120
},
{
"epoch": 0.83,
"learning_rate": 4.1356686569674344e-07,
"logits/chosen": -1.36991286277771,
"logits/rejected": -1.0875458717346191,
"logps/chosen": -304.4653015136719,
"logps/rejected": -349.29547119140625,
"loss": 0.5471,
"rewards/accuracies": 0.734375,
"rewards/chosen": -0.30865973234176636,
"rewards/margins": 0.5011726021766663,
"rewards/rejected": -0.8098322749137878,
"step": 130
},
{
"epoch": 0.9,
"learning_rate": 1.59412823400657e-07,
"logits/chosen": -1.2946439981460571,
"logits/rejected": -0.9784806370735168,
"logps/chosen": -325.2296142578125,
"logps/rejected": -366.6056823730469,
"loss": 0.5506,
"rewards/accuracies": 0.71875,
"rewards/chosen": -0.31376656889915466,
"rewards/margins": 0.5263808369636536,
"rewards/rejected": -0.8401473760604858,
"step": 140
},
{
"epoch": 0.96,
"learning_rate": 2.262559558016325e-08,
"logits/chosen": -1.3523244857788086,
"logits/rejected": -1.0796093940734863,
"logps/chosen": -307.21832275390625,
"logps/rejected": -342.11212158203125,
"loss": 0.5508,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.39782753586769104,
"rewards/margins": 0.5240110158920288,
"rewards/rejected": -0.9218384623527527,
"step": 150
},
{
"epoch": 1.0,
"step": 156,
"total_flos": 0.0,
"train_loss": 0.5835713033492749,
"train_runtime": 7184.2955,
"train_samples_per_second": 2.784,
"train_steps_per_second": 0.022
}
],
"logging_steps": 10,
"max_steps": 156,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}