GPT2_sft_and_dpo_tuned / trainer_state.json
RaushanTurganbay's picture
Upload folder using huggingface_hub
7d3e531
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.6019110676397562,
"eval_steps": 500,
"global_step": 1000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.12,
"learning_rate": 8.000000000000001e-06,
"logits/chosen": -58.747344970703125,
"logits/rejected": -59.84019470214844,
"logps/chosen": -150.3143768310547,
"logps/rejected": -179.38966369628906,
"loss": 0.5314,
"rewards/accuracies": 0.7318750023841858,
"rewards/chosen": -0.8851571679115295,
"rewards/margins": 0.6559739708900452,
"rewards/rejected": -1.5411310195922852,
"step": 200
},
{
"epoch": 0.24,
"learning_rate": 9.68421052631579e-06,
"logits/chosen": -64.05355072021484,
"logits/rejected": -65.13407897949219,
"logps/chosen": -155.71795654296875,
"logps/rejected": -190.02166748046875,
"loss": 0.4485,
"rewards/accuracies": 0.7715625166893005,
"rewards/chosen": -1.3824467658996582,
"rewards/margins": 1.1924123764038086,
"rewards/rejected": -2.5748589038848877,
"step": 400
},
{
"epoch": 0.36,
"learning_rate": 9.263157894736842e-06,
"logits/chosen": -64.59612274169922,
"logits/rejected": -65.59567260742188,
"logps/chosen": -158.51266479492188,
"logps/rejected": -191.44497680664062,
"loss": 0.4208,
"rewards/accuracies": 0.7871875166893005,
"rewards/chosen": -1.4413859844207764,
"rewards/margins": 1.3812304735183716,
"rewards/rejected": -2.8226163387298584,
"step": 600
},
{
"epoch": 0.48,
"learning_rate": 8.842105263157895e-06,
"logits/chosen": -65.70255279541016,
"logits/rejected": -66.5833969116211,
"logps/chosen": -156.6477508544922,
"logps/rejected": -195.2394561767578,
"loss": 0.4062,
"rewards/accuracies": 0.7973437309265137,
"rewards/chosen": -1.4948838949203491,
"rewards/margins": 1.513500690460205,
"rewards/rejected": -3.0083847045898438,
"step": 800
},
{
"epoch": 0.6,
"learning_rate": 8.421052631578948e-06,
"logits/chosen": -65.27395629882812,
"logits/rejected": -66.06521606445312,
"logps/chosen": -156.45945739746094,
"logps/rejected": -196.63790893554688,
"loss": 0.3898,
"rewards/accuracies": 0.7978125214576721,
"rewards/chosen": -1.6718982458114624,
"rewards/margins": 1.6537814140319824,
"rewards/rejected": -3.3256795406341553,
"step": 1000
}
],
"logging_steps": 200,
"max_steps": 5000,
"num_train_epochs": 4,
"save_steps": 500,
"total_flos": 0.0,
"trial_name": null,
"trial_params": null
}