llama31-8b-dpo-qlora-test / all_results.json
deepakkarkala's picture
End of training
e8508f6 verified
raw
history blame contribute delete
766 Bytes
{
"epoch": 0.9934640522875817,
"eval_logits/chosen": -0.3643321692943573,
"eval_logits/rejected": -0.4401712417602539,
"eval_logps/chosen": -394.849853515625,
"eval_logps/rejected": -451.100830078125,
"eval_loss": 0.6792144775390625,
"eval_rewards/accuracies": 0.75,
"eval_rewards/chosen": -0.15057459473609924,
"eval_rewards/margins": 0.030382413417100906,
"eval_rewards/rejected": -0.18095700442790985,
"eval_runtime": 28.9938,
"eval_samples": 20,
"eval_samples_per_second": 0.69,
"eval_steps_per_second": 0.172,
"total_flos": 0.0,
"train_loss": 0.6808264882940995,
"train_runtime": 1624.4693,
"train_samples": 612,
"train_samples_per_second": 0.377,
"train_steps_per_second": 0.047
}