{ "epoch": 0.9990600241708071, "eval_logits/chosen": -15.815413475036621, "eval_logits/rejected": -15.843463897705078, "eval_logps/chosen": -1.809948444366455, "eval_logps/rejected": -2.2990801334381104, "eval_loss": 2.546043872833252, "eval_rewards/accuracies": 0.7827869057655334, "eval_rewards/chosen": -18.099485397338867, "eval_rewards/margins": 4.891315937042236, "eval_rewards/rejected": -22.990798950195312, "eval_runtime": 107.9864, "eval_samples": 1941, "eval_samples_per_second": 17.974, "eval_steps_per_second": 1.13 }