{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.99581589958159, "eval_steps": 500, "global_step": 119, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 4.166666666666666e-08, "logits/chosen": -2.305778980255127, "logits/rejected": -2.224325656890869, "logps/chosen": -270.25244140625, "logps/rejected": -383.8268127441406, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.08, "learning_rate": 4.1666666666666667e-07, "logits/chosen": -2.394684314727783, "logits/rejected": -2.4029245376586914, "logps/chosen": -217.4110565185547, "logps/rejected": -323.976318359375, "loss": 0.6897, "rewards/accuracies": 0.5416666865348816, "rewards/chosen": -0.010575544089078903, "rewards/margins": 0.014531731605529785, "rewards/rejected": -0.02510727569460869, "step": 10 }, { "epoch": 0.17, "learning_rate": 4.931352528237397e-07, "logits/chosen": -2.293762683868408, "logits/rejected": -2.2932822704315186, "logps/chosen": -260.174560546875, "logps/rejected": -345.7673034667969, "loss": 0.6705, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.30990877747535706, "rewards/margins": 0.16011206805706024, "rewards/rejected": -0.47002077102661133, "step": 20 }, { "epoch": 0.25, "learning_rate": 4.658920803689553e-07, "logits/chosen": -2.2889437675476074, "logits/rejected": -2.2858946323394775, "logps/chosen": -283.4850158691406, "logps/rejected": -348.92822265625, "loss": 0.6862, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.3383755683898926, "rewards/margins": 0.19907911121845245, "rewards/rejected": -0.5374546647071838, "step": 30 }, { "epoch": 0.33, "learning_rate": 4.201712553872657e-07, "logits/chosen": -2.295836925506592, "logits/rejected": -2.288512706756592, "logps/chosen": -269.2564392089844, "logps/rejected": -314.68768310546875, "loss": 0.6686, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.06676869094371796, "rewards/margins": 0.07462646812200546, "rewards/rejected": -0.14139513671398163, "step": 40 }, { "epoch": 0.42, "learning_rate": 3.598859066780754e-07, "logits/chosen": -2.2943179607391357, "logits/rejected": -2.2712674140930176, "logps/chosen": -272.5740051269531, "logps/rejected": -336.09210205078125, "loss": 0.6653, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.16639836132526398, "rewards/margins": 0.1392066478729248, "rewards/rejected": -0.30560502409935, "step": 50 }, { "epoch": 0.5, "learning_rate": 2.9019570347986706e-07, "logits/chosen": -2.3181185722351074, "logits/rejected": -2.3116087913513184, "logps/chosen": -279.1436462402344, "logps/rejected": -351.61798095703125, "loss": 0.6473, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.30815738439559937, "rewards/margins": 0.14108426868915558, "rewards/rejected": -0.44924163818359375, "step": 60 }, { "epoch": 0.59, "learning_rate": 2.1706525253979534e-07, "logits/chosen": -2.3251430988311768, "logits/rejected": -2.2963356971740723, "logps/chosen": -280.5970458984375, "logps/rejected": -364.6551208496094, "loss": 0.651, "rewards/accuracies": 0.625, "rewards/chosen": -0.3270416855812073, "rewards/margins": 0.1887568086385727, "rewards/rejected": -0.5157985091209412, "step": 70 }, { "epoch": 0.67, "learning_rate": 1.4675360263490295e-07, "logits/chosen": -2.308258056640625, "logits/rejected": -2.2962486743927, "logps/chosen": -282.85528564453125, "logps/rejected": -327.85491943359375, "loss": 0.6311, "rewards/accuracies": 0.5625, "rewards/chosen": -0.2814248204231262, "rewards/margins": 0.1915736347436905, "rewards/rejected": -0.4729984700679779, "step": 80 }, { "epoch": 0.75, "learning_rate": 8.527854855097224e-08, "logits/chosen": -2.2949376106262207, "logits/rejected": -2.3052544593811035, "logps/chosen": -250.7218017578125, "logps/rejected": -336.38140869140625, "loss": 0.6149, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.2139570415019989, "rewards/margins": 0.24394384026527405, "rewards/rejected": -0.45790091156959534, "step": 90 }, { "epoch": 0.84, "learning_rate": 3.790158337517127e-08, "logits/chosen": -2.319019317626953, "logits/rejected": -2.298459529876709, "logps/chosen": -291.30572509765625, "logps/rejected": -360.62750244140625, "loss": 0.6427, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.31846240162849426, "rewards/margins": 0.2448347508907318, "rewards/rejected": -0.5632971525192261, "step": 100 }, { "epoch": 0.92, "learning_rate": 8.677580722139671e-09, "logits/chosen": -2.3022727966308594, "logits/rejected": -2.3168272972106934, "logps/chosen": -281.0862731933594, "logps/rejected": -347.0366516113281, "loss": 0.6439, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.29117727279663086, "rewards/margins": 0.17553743720054626, "rewards/rejected": -0.4667147099971771, "step": 110 }, { "epoch": 1.0, "step": 119, "total_flos": 0.0, "train_loss": 0.6554931792892328, "train_runtime": 1987.3183, "train_samples_per_second": 7.69, "train_steps_per_second": 0.06 } ], "logging_steps": 10, "max_steps": 119, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }