{ "epoch": 0.9934640522875817, "eval_logits/chosen": -0.3643321692943573, "eval_logits/rejected": -0.4401712417602539, "eval_logps/chosen": -394.849853515625, "eval_logps/rejected": -451.100830078125, "eval_loss": 0.6792144775390625, "eval_rewards/accuracies": 0.75, "eval_rewards/chosen": -0.15057459473609924, "eval_rewards/margins": 0.030382413417100906, "eval_rewards/rejected": -0.18095700442790985, "eval_runtime": 28.9938, "eval_samples": 20, "eval_samples_per_second": 0.69, "eval_steps_per_second": 0.172, "total_flos": 0.0, "train_loss": 0.6808264882940995, "train_runtime": 1624.4693, "train_samples": 612, "train_samples_per_second": 0.377, "train_steps_per_second": 0.047 }