{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.6019110676397562, "eval_steps": 500, "global_step": 1000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.12, "learning_rate": 8.000000000000001e-06, "logits/chosen": -58.747344970703125, "logits/rejected": -59.84019470214844, "logps/chosen": -150.3143768310547, "logps/rejected": -179.38966369628906, "loss": 0.5314, "rewards/accuracies": 0.7318750023841858, "rewards/chosen": -0.8851571679115295, "rewards/margins": 0.6559739708900452, "rewards/rejected": -1.5411310195922852, "step": 200 }, { "epoch": 0.24, "learning_rate": 9.68421052631579e-06, "logits/chosen": -64.05355072021484, "logits/rejected": -65.13407897949219, "logps/chosen": -155.71795654296875, "logps/rejected": -190.02166748046875, "loss": 0.4485, "rewards/accuracies": 0.7715625166893005, "rewards/chosen": -1.3824467658996582, "rewards/margins": 1.1924123764038086, "rewards/rejected": -2.5748589038848877, "step": 400 }, { "epoch": 0.36, "learning_rate": 9.263157894736842e-06, "logits/chosen": -64.59612274169922, "logits/rejected": -65.59567260742188, "logps/chosen": -158.51266479492188, "logps/rejected": -191.44497680664062, "loss": 0.4208, "rewards/accuracies": 0.7871875166893005, "rewards/chosen": -1.4413859844207764, "rewards/margins": 1.3812304735183716, "rewards/rejected": -2.8226163387298584, "step": 600 }, { "epoch": 0.48, "learning_rate": 8.842105263157895e-06, "logits/chosen": -65.70255279541016, "logits/rejected": -66.5833969116211, "logps/chosen": -156.6477508544922, "logps/rejected": -195.2394561767578, "loss": 0.4062, "rewards/accuracies": 0.7973437309265137, "rewards/chosen": -1.4948838949203491, "rewards/margins": 1.513500690460205, "rewards/rejected": -3.0083847045898438, "step": 800 }, { "epoch": 0.6, "learning_rate": 8.421052631578948e-06, "logits/chosen": -65.27395629882812, "logits/rejected": -66.06521606445312, "logps/chosen": -156.45945739746094, "logps/rejected": -196.63790893554688, "loss": 0.3898, "rewards/accuracies": 0.7978125214576721, "rewards/chosen": -1.6718982458114624, "rewards/margins": 1.6537814140319824, "rewards/rejected": -3.3256795406341553, "step": 1000 } ], "logging_steps": 200, "max_steps": 5000, "num_train_epochs": 4, "save_steps": 500, "total_flos": 0.0, "trial_name": null, "trial_params": null }