dpo-selective-buffer-spo-shift / all_results.json
wxzhang's picture
Model save
5d84c8e verified
raw
history blame contribute delete
858 Bytes
{
"epoch": 1.0,
"eval_logits/chosen": -2.0824697017669678,
"eval_logits/rejected": -1.830767273902893,
"eval_logps/chosen": -131.0029296875,
"eval_logps/rejected": -92.43484497070312,
"eval_loss": 0.6776853799819946,
"eval_rewards/accuracies": 0.4693247675895691,
"eval_rewards/chosen": -0.13705651462078094,
"eval_rewards/margins": -0.0540921576321125,
"eval_rewards/rejected": -0.08296435326337814,
"eval_rewards/safe_rewards": -0.1332445591688156,
"eval_rewards/unsafe_rewards": -0.12632378935813904,
"eval_runtime": 2192.4744,
"eval_samples": 33044,
"eval_samples_per_second": 15.072,
"eval_steps_per_second": 0.942,
"train_loss": 67.04043597990268,
"train_runtime": 46860.0347,
"train_samples": 59478,
"train_samples_per_second": 1.269,
"train_steps_per_second": 0.04
}