{ "epoch": 0.9984168865435357, "eval_logits/chosen": -1.955096960067749, "eval_logits/rejected": -1.8253438472747803, "eval_logps/chosen": -507.80810546875, "eval_logps/rejected": -551.9722900390625, "eval_loss": 0.621675431728363, "eval_rewards/accuracies": 0.6619433164596558, "eval_rewards/chosen": -2.3081862926483154, "eval_rewards/margins": 0.3548743426799774, "eval_rewards/rejected": -2.663060188293457, "eval_runtime": 317.6956, "eval_samples": 1976, "eval_samples_per_second": 6.22, "eval_steps_per_second": 1.555 }