{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.99581589958159, "eval_steps": 500, "global_step": 119, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 4.166666666666666e-08, "logits/chosen": -2.7608747482299805, "logits/rejected": -2.7489399909973145, "logps/chosen": -156.13702392578125, "logps/rejected": -214.59707641601562, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.08, "learning_rate": 4.1666666666666667e-07, "logits/chosen": -2.7645390033721924, "logits/rejected": -2.704571008682251, "logps/chosen": -192.93963623046875, "logps/rejected": -218.26573181152344, "loss": 0.6928, "rewards/accuracies": 0.4444444477558136, "rewards/chosen": 0.0014097224920988083, "rewards/margins": 0.0015009460039436817, "rewards/rejected": -9.122348274104297e-05, "step": 10 }, { "epoch": 0.17, "learning_rate": 4.931352528237397e-07, "logits/chosen": -2.6521761417388916, "logits/rejected": -2.614973545074463, "logps/chosen": -236.9346466064453, "logps/rejected": -204.9691162109375, "loss": 0.686, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.07020659744739532, "rewards/margins": 0.029431456699967384, "rewards/rejected": -0.09963803738355637, "step": 20 }, { "epoch": 0.25, "learning_rate": 4.658920803689553e-07, "logits/chosen": -2.56916880607605, "logits/rejected": -2.5743496417999268, "logps/chosen": -238.0299072265625, "logps/rejected": -208.71658325195312, "loss": 0.685, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.2192213088274002, "rewards/margins": 0.04515828937292099, "rewards/rejected": -0.2643795907497406, "step": 30 }, { "epoch": 0.33, "learning_rate": 4.201712553872657e-07, "logits/chosen": -2.51991605758667, "logits/rejected": -2.5045738220214844, "logps/chosen": -258.259033203125, "logps/rejected": -245.266845703125, "loss": 0.6645, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.2803400158882141, "rewards/margins": 0.07451216131448746, "rewards/rejected": -0.35485216975212097, "step": 40 }, { "epoch": 0.42, "learning_rate": 3.598859066780754e-07, "logits/chosen": -2.519888401031494, "logits/rejected": -2.50127911567688, "logps/chosen": -257.92901611328125, "logps/rejected": -247.06884765625, "loss": 0.6622, "rewards/accuracies": 0.59375, "rewards/chosen": -0.3478809893131256, "rewards/margins": 0.08011214435100555, "rewards/rejected": -0.42799311876296997, "step": 50 }, { "epoch": 0.5, "learning_rate": 2.9019570347986706e-07, "logits/chosen": -2.436131000518799, "logits/rejected": -2.4517111778259277, "logps/chosen": -284.99066162109375, "logps/rejected": -233.84280395507812, "loss": 0.66, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.3800959289073944, "rewards/margins": 0.2344251424074173, "rewards/rejected": -0.6145211458206177, "step": 60 }, { "epoch": 0.59, "learning_rate": 2.1706525253979534e-07, "logits/chosen": -2.4360086917877197, "logits/rejected": -2.440263032913208, "logps/chosen": -264.9581604003906, "logps/rejected": -267.7023010253906, "loss": 0.6551, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.4374156892299652, "rewards/margins": 0.17235831916332245, "rewards/rejected": -0.6097739338874817, "step": 70 }, { "epoch": 0.67, "learning_rate": 1.4675360263490295e-07, "logits/chosen": -2.4380290508270264, "logits/rejected": -2.4010090827941895, "logps/chosen": -236.95388793945312, "logps/rejected": -257.6996154785156, "loss": 0.6616, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.4010644853115082, "rewards/margins": 0.13567090034484863, "rewards/rejected": -0.536735475063324, "step": 80 }, { "epoch": 0.75, "learning_rate": 8.527854855097224e-08, "logits/chosen": -2.469369888305664, "logits/rejected": -2.4206488132476807, "logps/chosen": -275.33831787109375, "logps/rejected": -252.06930541992188, "loss": 0.6579, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.4325632154941559, "rewards/margins": 0.17887099087238312, "rewards/rejected": -0.611434280872345, "step": 90 }, { "epoch": 0.84, "learning_rate": 3.790158337517127e-08, "logits/chosen": -2.4000065326690674, "logits/rejected": -2.405733585357666, "logps/chosen": -256.33343505859375, "logps/rejected": -261.50762939453125, "loss": 0.6521, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.44618505239486694, "rewards/margins": 0.14844560623168945, "rewards/rejected": -0.5946307182312012, "step": 100 }, { "epoch": 0.92, "learning_rate": 8.677580722139671e-09, "logits/chosen": -2.432537078857422, "logits/rejected": -2.425053358078003, "logps/chosen": -262.3310546875, "logps/rejected": -261.58697509765625, "loss": 0.6741, "rewards/accuracies": 0.53125, "rewards/chosen": -0.6105460524559021, "rewards/margins": -0.03789714723825455, "rewards/rejected": -0.5726489424705505, "step": 110 }, { "epoch": 1.0, "step": 119, "total_flos": 0.0, "train_loss": 0.66752010233262, "train_runtime": 1991.7897, "train_samples_per_second": 7.673, "train_steps_per_second": 0.06 } ], "logging_steps": 10, "max_steps": 119, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }