{ "epoch": 0.9989701338825953, "eval_logits/chosen": -2.072295904159546, "eval_logits/rejected": -1.9723700284957886, "eval_logps/chosen": -326.1224060058594, "eval_logps/rejected": -410.1681213378906, "eval_loss": 0.530189573764801, "eval_rewards/accuracies": 0.7421875, "eval_rewards/chosen": -1.0375562906265259, "eval_rewards/margins": 0.7862817049026489, "eval_rewards/rejected": -1.8238379955291748, "eval_runtime": 67.7016, "eval_samples_per_second": 14.771, "eval_steps_per_second": 0.236 }