{ "epoch": 0.9982851866508377, "eval_logits/chosen": -3.2231318950653076, "eval_logits/rejected": -3.1088078022003174, "eval_logps/chosen": -623.1680297851562, "eval_logps/rejected": -685.9901733398438, "eval_loss": 0.6219658255577087, "eval_rewards/accuracies": 0.6350806355476379, "eval_rewards/chosen": -3.605348587036133, "eval_rewards/margins": 0.5176258683204651, "eval_rewards/rejected": -4.122974872589111, "eval_runtime": 146.9573, "eval_samples": 1976, "eval_samples_per_second": 13.446, "eval_steps_per_second": 0.844, "total_flos": 0.0, "train_loss": 0.545083115015171, "train_runtime": 9073.2474, "train_samples": 60643, "train_samples_per_second": 6.684, "train_steps_per_second": 0.052 }