{ "epoch": 0.9997382884061764, "eval_logits/chosen": -1.103013515472412, "eval_logits/rejected": -0.990799069404602, "eval_logps/chosen": -520.2218017578125, "eval_logps/rejected": -606.2373046875, "eval_loss": 0.4944952428340912, "eval_rewards/accuracies": 0.7777777910232544, "eval_rewards/chosen": -2.5530030727386475, "eval_rewards/margins": 1.0629230737686157, "eval_rewards/rejected": -3.6159262657165527, "eval_runtime": 318.8167, "eval_samples": 2000, "eval_samples_per_second": 6.273, "eval_steps_per_second": 0.198, "total_flos": 0.0, "train_loss": 0.5319095570379527, "train_runtime": 23762.0752, "train_samples": 61134, "train_samples_per_second": 2.573, "train_steps_per_second": 0.04 }