{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 48, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.020833333333333332, "grad_norm": 8.362677556536177, "learning_rate": 1e-07, "logits/chosen": -2.8258156776428223, "logits/rejected": -2.859372615814209, "logps/chosen": -247.59397888183594, "logps/pi_response": -72.04409790039062, "logps/ref_response": -72.04409790039062, "logps/rejected": -180.8425750732422, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.20833333333333334, "grad_norm": 9.049468118004324, "learning_rate": 4.83504027183137e-07, "logits/chosen": -2.75138783454895, "logits/rejected": -2.7332353591918945, "logps/chosen": -219.43704223632812, "logps/pi_response": -65.90155029296875, "logps/ref_response": -65.22713470458984, "logps/rejected": -164.3558807373047, "loss": 0.6878, "rewards/accuracies": 0.5868055820465088, "rewards/chosen": 0.015296363271772861, "rewards/margins": 0.014029696583747864, "rewards/rejected": 0.0012666649417951703, "step": 10 }, { "epoch": 0.4166666666666667, "grad_norm": 6.837906491432947, "learning_rate": 3.643105808261596e-07, "logits/chosen": -2.722661256790161, "logits/rejected": -2.6922712326049805, "logps/chosen": -250.12081909179688, "logps/pi_response": -79.08014678955078, "logps/ref_response": -66.51771545410156, "logps/rejected": -163.59561157226562, "loss": 0.6554, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": 0.03164178133010864, "rewards/margins": 0.09398074448108673, "rewards/rejected": -0.06233896687626839, "step": 20 }, { "epoch": 0.625, "grad_norm": 6.600290233933933, "learning_rate": 1.8676665440207977e-07, "logits/chosen": -2.642151355743408, "logits/rejected": -2.608363389968872, "logps/chosen": -258.04754638671875, "logps/pi_response": -108.9358139038086, "logps/ref_response": -70.92167663574219, "logps/rejected": -199.85496520996094, "loss": 0.6245, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.08075634390115738, "rewards/margins": 0.20849159359931946, "rewards/rejected": -0.28924795985221863, "step": 30 }, { "epoch": 0.8333333333333334, "grad_norm": 6.86872847356136, "learning_rate": 4.1500545527530544e-08, "logits/chosen": -2.6706020832061768, "logits/rejected": -2.640864849090576, "logps/chosen": -272.2802734375, "logps/pi_response": -128.96005249023438, "logps/ref_response": -76.34043884277344, "logps/rejected": -220.1116943359375, "loss": 0.6048, "rewards/accuracies": 0.7093750238418579, "rewards/chosen": -0.16046889126300812, "rewards/margins": 0.2594737112522125, "rewards/rejected": -0.41994261741638184, "step": 40 }, { "epoch": 1.0, "step": 48, "total_flos": 0.0, "train_loss": 0.6321128209431967, "train_runtime": 2177.6416, "train_samples_per_second": 5.615, "train_steps_per_second": 0.022 } ], "logging_steps": 10, "max_steps": 48, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }