{ "epoch": 0.9982631930527722, "eval_logits/chosen": -1.711490511894226, "eval_logits/rejected": -1.721152424812317, "eval_logps/chosen": -2.1099510192871094, "eval_logps/rejected": -2.973426342010498, "eval_loss": 1.1311538219451904, "eval_rewards/accuracies": 0.869918704032898, "eval_rewards/chosen": -21.09950828552246, "eval_rewards/margins": 8.634754180908203, "eval_rewards/rejected": -29.734262466430664, "eval_runtime": 110.5362, "eval_samples": 1961, "eval_samples_per_second": 17.741, "eval_steps_per_second": 1.113, "total_flos": 0.0, "train_loss": 2.0977548006004643, "train_runtime": 12789.999, "train_samples": 59876, "train_samples_per_second": 4.681, "train_steps_per_second": 0.037 }