{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 1250, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0008, "grad_norm": 226.62270447693234, "learning_rate": 0.0, "logits/chosen": -0.91796875, "logits/rejected": -1.0625, "logps/chosen": -448.0, "logps/rejected": -404.0, "loss": 0.6934, "nll_loss": 2.625, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.008, "grad_norm": 183.46648450042892, "learning_rate": 3.6e-08, "logits/chosen": -0.9357638955116272, "logits/rejected": -1.0073784589767456, "logps/chosen": -528.4444580078125, "logps/rejected": -375.77777099609375, "loss": 0.7075, "nll_loss": 1.7126736640930176, "rewards/accuracies": 0.3055555522441864, "rewards/chosen": -0.0180528424680233, "rewards/margins": -0.006869846023619175, "rewards/rejected": -0.011135525070130825, "step": 10 }, { "epoch": 0.016, "grad_norm": 195.8050525947057, "learning_rate": 7.599999999999999e-08, "logits/chosen": -0.8521484136581421, "logits/rejected": -1.0, "logps/chosen": -412.79998779296875, "logps/rejected": -408.6000061035156, "loss": 0.6575, "nll_loss": 2.35546875, "rewards/accuracies": 0.5, "rewards/chosen": -0.005053711123764515, "rewards/margins": 0.09255371242761612, "rewards/rejected": -0.09763183444738388, "step": 20 }, { "epoch": 0.024, "grad_norm": 141.87495839689677, "learning_rate": 1.16e-07, "logits/chosen": -0.848437488079071, "logits/rejected": -0.8382812738418579, "logps/chosen": -521.5999755859375, "logps/rejected": -380.20001220703125, "loss": 0.5052, "nll_loss": 1.78515625, "rewards/accuracies": 0.862500011920929, "rewards/chosen": 0.14396972954273224, "rewards/margins": 0.4627929627895355, "rewards/rejected": -0.31914061307907104, "step": 30 }, { "epoch": 0.032, "grad_norm": 125.39420097312498, "learning_rate": 1.56e-07, "logits/chosen": -0.8394531011581421, "logits/rejected": -0.969921886920929, "logps/chosen": -529.0, "logps/rejected": -401.6000061035156, "loss": 0.3009, "nll_loss": 1.8390624523162842, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.3280273377895355, "rewards/margins": 1.265039086341858, "rewards/rejected": -0.9378906488418579, "step": 40 }, { "epoch": 0.04, "grad_norm": 51.38963253406093, "learning_rate": 1.96e-07, "logits/chosen": -0.811328113079071, "logits/rejected": -0.971484363079071, "logps/chosen": -459.20001220703125, "logps/rejected": -411.6000061035156, "loss": 0.179, "nll_loss": 2.192187547683716, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.33916014432907104, "rewards/margins": 2.1109375953674316, "rewards/rejected": -1.771875023841858, "step": 50 }, { "epoch": 0.048, "grad_norm": 41.54844453932047, "learning_rate": 2.3599999999999997e-07, "logits/chosen": -0.710888683795929, "logits/rejected": -0.852734386920929, "logps/chosen": -450.20001220703125, "logps/rejected": -423.3999938964844, "loss": 0.0706, "nll_loss": 2.2906250953674316, "rewards/accuracies": 1.0, "rewards/chosen": -0.0625, "rewards/margins": 3.278125047683716, "rewards/rejected": -3.340625047683716, "step": 60 }, { "epoch": 0.056, "grad_norm": 2.8975377631106283, "learning_rate": 2.7600000000000004e-07, "logits/chosen": -0.756054699420929, "logits/rejected": -0.7701171636581421, "logps/chosen": -454.79998779296875, "logps/rejected": -425.79998779296875, "loss": 0.0485, "nll_loss": 2.09375, "rewards/accuracies": 1.0, "rewards/chosen": -0.03144531324505806, "rewards/margins": 4.420312404632568, "rewards/rejected": -4.453125, "step": 70 }, { "epoch": 0.064, "grad_norm": 1.9819883761762331, "learning_rate": 3.1599999999999997e-07, "logits/chosen": -0.6548827886581421, "logits/rejected": -0.584765613079071, "logps/chosen": -516.2000122070312, "logps/rejected": -442.0, "loss": 0.0136, "nll_loss": 1.96875, "rewards/accuracies": 1.0, "rewards/chosen": 0.05791015550494194, "rewards/margins": 5.881249904632568, "rewards/rejected": -5.818749904632568, "step": 80 }, { "epoch": 0.072, "grad_norm": 40.33622312322741, "learning_rate": 3.5599999999999996e-07, "logits/chosen": -0.5289062261581421, "logits/rejected": -0.5241454839706421, "logps/chosen": -460.3999938964844, "logps/rejected": -450.3999938964844, "loss": 0.0126, "nll_loss": 2.077343702316284, "rewards/accuracies": 1.0, "rewards/chosen": 0.30329591035842896, "rewards/margins": 6.993750095367432, "rewards/rejected": -6.690625190734863, "step": 90 }, { "epoch": 0.08, "grad_norm": 3.381840804687941, "learning_rate": 3.96e-07, "logits/chosen": -0.6162109375, "logits/rejected": -0.5665038824081421, "logps/chosen": -516.5999755859375, "logps/rejected": -461.6000061035156, "loss": 0.0103, "nll_loss": 2.063281297683716, "rewards/accuracies": 1.0, "rewards/chosen": 0.47880858182907104, "rewards/margins": 8.15625, "rewards/rejected": -7.675000190734863, "step": 100 }, { "epoch": 0.088, "grad_norm": 0.022971788286646016, "learning_rate": 4.36e-07, "logits/chosen": -0.555468738079071, "logits/rejected": -0.558398425579071, "logps/chosen": -509.6000061035156, "logps/rejected": -469.3999938964844, "loss": 0.0069, "nll_loss": 1.841406226158142, "rewards/accuracies": 1.0, "rewards/chosen": 0.911425769329071, "rewards/margins": 9.612500190734863, "rewards/rejected": -8.693750381469727, "step": 110 }, { "epoch": 0.096, "grad_norm": 0.6089138686903683, "learning_rate": 4.76e-07, "logits/chosen": -0.43085938692092896, "logits/rejected": -0.44287109375, "logps/chosen": -460.3999938964844, "logps/rejected": -492.6000061035156, "loss": 0.0024, "nll_loss": 1.9765625, "rewards/accuracies": 1.0, "rewards/chosen": 1.300390601158142, "rewards/margins": 10.537500381469727, "rewards/rejected": -9.240625381469727, "step": 120 }, { "epoch": 0.104, "grad_norm": 5.072145285053, "learning_rate": 4.982222222222223e-07, "logits/chosen": -0.39960938692092896, "logits/rejected": -0.3809570372104645, "logps/chosen": -470.6000061035156, "logps/rejected": -472.20001220703125, "loss": 0.0046, "nll_loss": 2.043750047683716, "rewards/accuracies": 1.0, "rewards/chosen": 2.507031202316284, "rewards/margins": 10.509374618530273, "rewards/rejected": -8.009374618530273, "step": 130 }, { "epoch": 0.112, "grad_norm": 0.6864472420728707, "learning_rate": 4.937777777777777e-07, "logits/chosen": -0.26445311307907104, "logits/rejected": -0.293212890625, "logps/chosen": -411.0, "logps/rejected": -460.79998779296875, "loss": 0.0023, "nll_loss": 1.904687523841858, "rewards/accuracies": 1.0, "rewards/chosen": 3.862499952316284, "rewards/margins": 11.34375, "rewards/rejected": -7.487500190734863, "step": 140 }, { "epoch": 0.12, "grad_norm": 0.04499814020001256, "learning_rate": 4.893333333333333e-07, "logits/chosen": -0.2533935606479645, "logits/rejected": -0.30218505859375, "logps/chosen": -407.20001220703125, "logps/rejected": -467.20001220703125, "loss": 0.003, "nll_loss": 1.5851562023162842, "rewards/accuracies": 1.0, "rewards/chosen": 5.1015625, "rewards/margins": 12.21875, "rewards/rejected": -7.109375, "step": 150 }, { "epoch": 0.128, "grad_norm": 0.12456933423635125, "learning_rate": 4.848888888888888e-07, "logits/chosen": -0.5003906488418579, "logits/rejected": -0.4310546815395355, "logps/chosen": -409.79998779296875, "logps/rejected": -465.6000061035156, "loss": 0.0118, "nll_loss": 1.7273437976837158, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 5.496874809265137, "rewards/margins": 12.262499809265137, "rewards/rejected": -6.771874904632568, "step": 160 }, { "epoch": 0.136, "grad_norm": 0.10810136696878427, "learning_rate": 4.804444444444444e-07, "logits/chosen": -0.4216064512729645, "logits/rejected": -0.3792480528354645, "logps/chosen": -384.0, "logps/rejected": -446.3999938964844, "loss": 0.0022, "nll_loss": 1.6687500476837158, "rewards/accuracies": 1.0, "rewards/chosen": 6.856249809265137, "rewards/margins": 12.15625, "rewards/rejected": -5.293749809265137, "step": 170 }, { "epoch": 0.144, "grad_norm": 2.1017761962617016, "learning_rate": 4.76e-07, "logits/chosen": -0.3038574159145355, "logits/rejected": -0.32097166776657104, "logps/chosen": -426.20001220703125, "logps/rejected": -432.79998779296875, "loss": 0.0031, "nll_loss": 1.6843750476837158, "rewards/accuracies": 1.0, "rewards/chosen": 8.0, "rewards/margins": 12.050000190734863, "rewards/rejected": -4.051562309265137, "step": 180 }, { "epoch": 0.152, "grad_norm": 0.03472137716422411, "learning_rate": 4.7155555555555556e-07, "logits/chosen": -0.27385252714157104, "logits/rejected": -0.274169921875, "logps/chosen": -383.3999938964844, "logps/rejected": -433.0, "loss": 0.002, "nll_loss": 1.497656226158142, "rewards/accuracies": 1.0, "rewards/chosen": 9.46875, "rewards/margins": 14.081250190734863, "rewards/rejected": -4.620312690734863, "step": 190 }, { "epoch": 0.16, "grad_norm": 0.05628090345177454, "learning_rate": 4.6711111111111104e-07, "logits/chosen": -0.3796752989292145, "logits/rejected": -0.342529296875, "logps/chosen": -361.0, "logps/rejected": -430.0, "loss": 0.0017, "nll_loss": 1.515625, "rewards/accuracies": 1.0, "rewards/chosen": 9.209375381469727, "rewards/margins": 13.231249809265137, "rewards/rejected": -4.025781154632568, "step": 200 }, { "epoch": 0.168, "grad_norm": 0.03469897920654829, "learning_rate": 4.6266666666666663e-07, "logits/chosen": -0.30029296875, "logits/rejected": -0.2890380918979645, "logps/chosen": -372.70001220703125, "logps/rejected": -418.20001220703125, "loss": 0.0019, "nll_loss": 1.4328124523162842, "rewards/accuracies": 1.0, "rewards/chosen": 10.162500381469727, "rewards/margins": 13.068750381469727, "rewards/rejected": -2.887939453125, "step": 210 }, { "epoch": 0.176, "grad_norm": 0.03297401557091858, "learning_rate": 4.5822222222222216e-07, "logits/chosen": -0.4486328065395355, "logits/rejected": -0.3726562559604645, "logps/chosen": -398.6000061035156, "logps/rejected": -430.79998779296875, "loss": 0.0016, "nll_loss": 1.484375, "rewards/accuracies": 1.0, "rewards/chosen": 10.693750381469727, "rewards/margins": 14.068750381469727, "rewards/rejected": -3.3890624046325684, "step": 220 }, { "epoch": 0.184, "grad_norm": 0.03928262686521911, "learning_rate": 4.5377777777777775e-07, "logits/chosen": -0.31196290254592896, "logits/rejected": -0.2955078184604645, "logps/chosen": -329.6000061035156, "logps/rejected": -432.6000061035156, "loss": 0.0013, "nll_loss": 1.2960937023162842, "rewards/accuracies": 1.0, "rewards/chosen": 11.287500381469727, "rewards/margins": 15.443750381469727, "rewards/rejected": -4.154687404632568, "step": 230 }, { "epoch": 0.192, "grad_norm": 0.012808641512081923, "learning_rate": 4.493333333333333e-07, "logits/chosen": -0.28032225370407104, "logits/rejected": -0.2730468809604645, "logps/chosen": -390.1000061035156, "logps/rejected": -418.20001220703125, "loss": 0.0017, "nll_loss": 1.353124976158142, "rewards/accuracies": 1.0, "rewards/chosen": 11.774999618530273, "rewards/margins": 14.8125, "rewards/rejected": -3.0433592796325684, "step": 240 }, { "epoch": 0.2, "grad_norm": 0.06025316567815392, "learning_rate": 4.4488888888888887e-07, "logits/chosen": -0.2566772401332855, "logits/rejected": -0.19821777939796448, "logps/chosen": -371.20001220703125, "logps/rejected": -412.6000061035156, "loss": 0.0014, "nll_loss": 1.3125, "rewards/accuracies": 1.0, "rewards/chosen": 11.987500190734863, "rewards/margins": 14.800000190734863, "rewards/rejected": -2.801953077316284, "step": 250 }, { "epoch": 0.208, "grad_norm": 0.018239198980973682, "learning_rate": 4.4044444444444445e-07, "logits/chosen": -0.3375000059604645, "logits/rejected": -0.22773437201976776, "logps/chosen": -367.70001220703125, "logps/rejected": -419.79998779296875, "loss": 0.0016, "nll_loss": 1.3914062976837158, "rewards/accuracies": 1.0, "rewards/chosen": 13.081250190734863, "rewards/margins": 15.800000190734863, "rewards/rejected": -2.72265625, "step": 260 }, { "epoch": 0.216, "grad_norm": 0.06735412914324118, "learning_rate": 4.36e-07, "logits/chosen": -0.30003660917282104, "logits/rejected": -0.3238281309604645, "logps/chosen": -345.8999938964844, "logps/rejected": -427.0, "loss": 0.01, "nll_loss": 1.340234398841858, "rewards/accuracies": 1.0, "rewards/chosen": 11.768750190734863, "rewards/margins": 15.125, "rewards/rejected": -3.364453077316284, "step": 270 }, { "epoch": 0.224, "grad_norm": 0.01881116895313125, "learning_rate": 4.3155555555555557e-07, "logits/chosen": -0.29327392578125, "logits/rejected": -0.25737303495407104, "logps/chosen": -365.0, "logps/rejected": -431.3999938964844, "loss": 0.0036, "nll_loss": 1.2589843273162842, "rewards/accuracies": 1.0, "rewards/chosen": 12.668749809265137, "rewards/margins": 16.03125, "rewards/rejected": -3.3645873069763184, "step": 280 }, { "epoch": 0.232, "grad_norm": 0.033150944419182336, "learning_rate": 4.271111111111111e-07, "logits/chosen": -0.24541015923023224, "logits/rejected": -0.2640136778354645, "logps/chosen": -323.0, "logps/rejected": -439.0, "loss": 0.0014, "nll_loss": 1.3125, "rewards/accuracies": 1.0, "rewards/chosen": 12.925000190734863, "rewards/margins": 16.3125, "rewards/rejected": -3.383984327316284, "step": 290 }, { "epoch": 0.24, "grad_norm": 0.07622395451753027, "learning_rate": 4.226666666666667e-07, "logits/chosen": -0.34736329317092896, "logits/rejected": -0.31829530000686646, "logps/chosen": -317.1000061035156, "logps/rejected": -420.0, "loss": 0.0014, "nll_loss": 1.361718773841858, "rewards/accuracies": 1.0, "rewards/chosen": 13.631250381469727, "rewards/margins": 16.15625, "rewards/rejected": -2.510937452316284, "step": 300 }, { "epoch": 0.248, "grad_norm": 0.018683331713432866, "learning_rate": 4.1822222222222217e-07, "logits/chosen": -0.3045898377895355, "logits/rejected": -0.24216309189796448, "logps/chosen": -341.8999938964844, "logps/rejected": -413.0, "loss": 0.002, "nll_loss": 1.350000023841858, "rewards/accuracies": 1.0, "rewards/chosen": 13.699999809265137, "rewards/margins": 16.631250381469727, "rewards/rejected": -2.9205079078674316, "step": 310 }, { "epoch": 0.256, "grad_norm": 0.023574936630402193, "learning_rate": 4.1377777777777776e-07, "logits/chosen": -0.21635742485523224, "logits/rejected": -0.23710937798023224, "logps/chosen": -434.3999938964844, "logps/rejected": -407.3999938964844, "loss": 0.0022, "nll_loss": 1.337499976158142, "rewards/accuracies": 1.0, "rewards/chosen": 13.274999618530273, "rewards/margins": 15.524999618530273, "rewards/rejected": -2.2476563453674316, "step": 320 }, { "epoch": 0.264, "grad_norm": 0.03038779144828818, "learning_rate": 4.093333333333333e-07, "logits/chosen": -0.1950538605451584, "logits/rejected": -0.179931640625, "logps/chosen": -350.8999938964844, "logps/rejected": -419.20001220703125, "loss": 0.0127, "nll_loss": 1.1906249523162842, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 13.899999618530273, "rewards/margins": 16.506250381469727, "rewards/rejected": -2.590625047683716, "step": 330 }, { "epoch": 0.272, "grad_norm": 0.04436049431080709, "learning_rate": 4.048888888888889e-07, "logits/chosen": -0.24531249701976776, "logits/rejected": -0.2109375, "logps/chosen": -297.79998779296875, "logps/rejected": -413.6000061035156, "loss": 0.0016, "nll_loss": 1.203125, "rewards/accuracies": 1.0, "rewards/chosen": 14.46875, "rewards/margins": 16.325000762939453, "rewards/rejected": -1.864843726158142, "step": 340 }, { "epoch": 0.28, "grad_norm": 0.017501398014684644, "learning_rate": 4.004444444444444e-07, "logits/chosen": -0.329833984375, "logits/rejected": -0.34770506620407104, "logps/chosen": -308.5, "logps/rejected": -421.6000061035156, "loss": 0.0012, "nll_loss": 1.205468773841858, "rewards/accuracies": 1.0, "rewards/chosen": 14.143750190734863, "rewards/margins": 16.381250381469727, "rewards/rejected": -2.240673780441284, "step": 350 }, { "epoch": 0.288, "grad_norm": 0.028368395543364983, "learning_rate": 3.96e-07, "logits/chosen": -0.19560547173023224, "logits/rejected": -0.0811767578125, "logps/chosen": -310.3999938964844, "logps/rejected": -401.6000061035156, "loss": 0.0013, "nll_loss": 1.228906273841858, "rewards/accuracies": 1.0, "rewards/chosen": 15.112500190734863, "rewards/margins": 16.625, "rewards/rejected": -1.511328101158142, "step": 360 }, { "epoch": 0.296, "grad_norm": 0.03826354878861224, "learning_rate": 3.9155555555555553e-07, "logits/chosen": -0.21774902939796448, "logits/rejected": -0.11984863132238388, "logps/chosen": -343.70001220703125, "logps/rejected": -396.79998779296875, "loss": 0.0015, "nll_loss": 1.181249976158142, "rewards/accuracies": 1.0, "rewards/chosen": 14.868749618530273, "rewards/margins": 15.931249618530273, "rewards/rejected": -1.049218773841858, "step": 370 }, { "epoch": 0.304, "grad_norm": 0.015874061291675685, "learning_rate": 3.871111111111111e-07, "logits/chosen": -0.1663818359375, "logits/rejected": -0.06330566108226776, "logps/chosen": -333.70001220703125, "logps/rejected": -423.0, "loss": 0.0013, "nll_loss": 1.303125023841858, "rewards/accuracies": 1.0, "rewards/chosen": 14.887499809265137, "rewards/margins": 17.950000762939453, "rewards/rejected": -3.07421875, "step": 380 }, { "epoch": 0.312, "grad_norm": 0.019028625638946934, "learning_rate": 3.8266666666666665e-07, "logits/chosen": -0.13876953721046448, "logits/rejected": -0.10498046875, "logps/chosen": -383.70001220703125, "logps/rejected": -405.3999938964844, "loss": 0.0012, "nll_loss": 1.1964843273162842, "rewards/accuracies": 1.0, "rewards/chosen": 14.25, "rewards/margins": 16.649999618530273, "rewards/rejected": -2.4046874046325684, "step": 390 }, { "epoch": 0.32, "grad_norm": 0.019036097748468133, "learning_rate": 3.7822222222222224e-07, "logits/chosen": -0.3286499083042145, "logits/rejected": -0.28095704317092896, "logps/chosen": -329.79998779296875, "logps/rejected": -426.3999938964844, "loss": 0.0013, "nll_loss": 1.2902343273162842, "rewards/accuracies": 1.0, "rewards/chosen": 16.049999237060547, "rewards/margins": 18.825000762939453, "rewards/rejected": -2.77069091796875, "step": 400 }, { "epoch": 0.328, "grad_norm": 0.016730568465352296, "learning_rate": 3.7377777777777777e-07, "logits/chosen": -0.11003418266773224, "logits/rejected": -0.12824706733226776, "logps/chosen": -382.3999938964844, "logps/rejected": -416.0, "loss": 0.0026, "nll_loss": 1.211328148841858, "rewards/accuracies": 1.0, "rewards/chosen": 15.206250190734863, "rewards/margins": 17.387500762939453, "rewards/rejected": -2.163281202316284, "step": 410 }, { "epoch": 0.336, "grad_norm": 0.01850424460734669, "learning_rate": 3.693333333333333e-07, "logits/chosen": -0.16533203423023224, "logits/rejected": -0.1209716796875, "logps/chosen": -356.3999938964844, "logps/rejected": -413.79998779296875, "loss": 0.0012, "nll_loss": 1.1242187023162842, "rewards/accuracies": 1.0, "rewards/chosen": 15.637499809265137, "rewards/margins": 17.71875, "rewards/rejected": -2.0859375, "step": 420 }, { "epoch": 0.344, "grad_norm": 0.039652389470085536, "learning_rate": 3.6488888888888884e-07, "logits/chosen": -0.10117187350988388, "logits/rejected": -0.05829467624425888, "logps/chosen": -298.20001220703125, "logps/rejected": -420.6000061035156, "loss": 0.0012, "nll_loss": 1.1785156726837158, "rewards/accuracies": 1.0, "rewards/chosen": 15.581250190734863, "rewards/margins": 18.375, "rewards/rejected": -2.7708983421325684, "step": 430 }, { "epoch": 0.352, "grad_norm": 0.008419647898269315, "learning_rate": 3.604444444444444e-07, "logits/chosen": -0.24436035752296448, "logits/rejected": -0.1181640625, "logps/chosen": -296.29998779296875, "logps/rejected": -423.20001220703125, "loss": 0.0011, "nll_loss": 1.058984398841858, "rewards/accuracies": 1.0, "rewards/chosen": 16.268749237060547, "rewards/margins": 18.568750381469727, "rewards/rejected": -2.2855467796325684, "step": 440 }, { "epoch": 0.36, "grad_norm": 0.008531494272693656, "learning_rate": 3.5599999999999996e-07, "logits/chosen": -0.06640625, "logits/rejected": -0.0052490234375, "logps/chosen": -329.1000061035156, "logps/rejected": -404.6000061035156, "loss": 0.0016, "nll_loss": 1.16796875, "rewards/accuracies": 1.0, "rewards/chosen": 16.049999237060547, "rewards/margins": 18.012500762939453, "rewards/rejected": -1.9445312023162842, "step": 450 }, { "epoch": 0.368, "grad_norm": 0.021562461095979032, "learning_rate": 3.5155555555555554e-07, "logits/chosen": -0.17312011122703552, "logits/rejected": -0.07587890326976776, "logps/chosen": -316.8999938964844, "logps/rejected": -412.79998779296875, "loss": 0.0012, "nll_loss": 1.1652343273162842, "rewards/accuracies": 1.0, "rewards/chosen": 15.381250381469727, "rewards/margins": 17.799999237060547, "rewards/rejected": -2.4085936546325684, "step": 460 }, { "epoch": 0.376, "grad_norm": 0.07020322384992712, "learning_rate": 3.471111111111111e-07, "logits/chosen": -0.14414063096046448, "logits/rejected": -0.12218017876148224, "logps/chosen": -299.70001220703125, "logps/rejected": -428.0, "loss": 0.0017, "nll_loss": 1.25, "rewards/accuracies": 1.0, "rewards/chosen": 15.856249809265137, "rewards/margins": 18.456249237060547, "rewards/rejected": -2.5699219703674316, "step": 470 }, { "epoch": 0.384, "grad_norm": 0.012596113248962468, "learning_rate": 3.4266666666666666e-07, "logits/chosen": -0.08417968451976776, "logits/rejected": -0.03115234337747097, "logps/chosen": -302.79998779296875, "logps/rejected": -405.0, "loss": 0.0012, "nll_loss": 1.0558593273162842, "rewards/accuracies": 1.0, "rewards/chosen": 15.668749809265137, "rewards/margins": 17.862499237060547, "rewards/rejected": -2.1884765625, "step": 480 }, { "epoch": 0.392, "grad_norm": 0.007929870216062266, "learning_rate": 3.382222222222222e-07, "logits/chosen": -0.10902099311351776, "logits/rejected": -0.11533202975988388, "logps/chosen": -397.20001220703125, "logps/rejected": -404.6000061035156, "loss": 0.0038, "nll_loss": 1.135156273841858, "rewards/accuracies": 1.0, "rewards/chosen": 15.993749618530273, "rewards/margins": 18.862499237060547, "rewards/rejected": -2.874706983566284, "step": 490 }, { "epoch": 0.4, "grad_norm": 0.008630639702085227, "learning_rate": 3.337777777777778e-07, "logits/chosen": -0.26744383573532104, "logits/rejected": -0.14018554985523224, "logps/chosen": -318.6000061035156, "logps/rejected": -434.20001220703125, "loss": 0.0012, "nll_loss": 1.166406273841858, "rewards/accuracies": 1.0, "rewards/chosen": 16.037500381469727, "rewards/margins": 19.012500762939453, "rewards/rejected": -2.995312452316284, "step": 500 }, { "epoch": 0.408, "grad_norm": 0.016229293972070177, "learning_rate": 3.293333333333333e-07, "logits/chosen": -0.204833984375, "logits/rejected": -0.13259276747703552, "logps/chosen": -322.0, "logps/rejected": -407.79998779296875, "loss": 0.0022, "nll_loss": 1.116796851158142, "rewards/accuracies": 1.0, "rewards/chosen": 15.512499809265137, "rewards/margins": 18.28125, "rewards/rejected": -2.77734375, "step": 510 }, { "epoch": 0.416, "grad_norm": 0.5999767196042933, "learning_rate": 3.248888888888889e-07, "logits/chosen": -0.171142578125, "logits/rejected": -0.15626220405101776, "logps/chosen": -320.6000061035156, "logps/rejected": -428.20001220703125, "loss": 0.0013, "nll_loss": 1.1687500476837158, "rewards/accuracies": 1.0, "rewards/chosen": 16.162500381469727, "rewards/margins": 19.862499237060547, "rewards/rejected": -3.703125, "step": 520 }, { "epoch": 0.424, "grad_norm": 0.30677660404469975, "learning_rate": 3.204444444444444e-07, "logits/chosen": -0.20744629204273224, "logits/rejected": -0.15806885063648224, "logps/chosen": -359.0, "logps/rejected": -433.20001220703125, "loss": 0.0017, "nll_loss": 1.25390625, "rewards/accuracies": 1.0, "rewards/chosen": 15.912500381469727, "rewards/margins": 19.4375, "rewards/rejected": -3.549999952316284, "step": 530 }, { "epoch": 0.432, "grad_norm": 0.016384405120536898, "learning_rate": 3.1599999999999997e-07, "logits/chosen": -0.11655273288488388, "logits/rejected": -0.13643798232078552, "logps/chosen": -284.1000061035156, "logps/rejected": -408.6000061035156, "loss": 0.001, "nll_loss": 1.033593773841858, "rewards/accuracies": 1.0, "rewards/chosen": 16.856250762939453, "rewards/margins": 19.043750762939453, "rewards/rejected": -2.197338819503784, "step": 540 }, { "epoch": 0.44, "grad_norm": 0.02552403140799525, "learning_rate": 3.115555555555555e-07, "logits/chosen": -0.1761474609375, "logits/rejected": -0.08870239555835724, "logps/chosen": -308.8999938964844, "logps/rejected": -428.6000061035156, "loss": 0.0012, "nll_loss": 1.185156226158142, "rewards/accuracies": 1.0, "rewards/chosen": 17.200000762939453, "rewards/margins": 19.887500762939453, "rewards/rejected": -2.680468797683716, "step": 550 }, { "epoch": 0.448, "grad_norm": 0.012418272439184573, "learning_rate": 3.071111111111111e-07, "logits/chosen": -0.25361329317092896, "logits/rejected": -0.15983887016773224, "logps/chosen": -371.29998779296875, "logps/rejected": -401.3999938964844, "loss": 0.0012, "nll_loss": 1.234375, "rewards/accuracies": 1.0, "rewards/chosen": 16.643749237060547, "rewards/margins": 18.518749237060547, "rewards/rejected": -1.90234375, "step": 560 }, { "epoch": 0.456, "grad_norm": 0.008584798479513097, "learning_rate": 3.026666666666666e-07, "logits/chosen": -0.10489501804113388, "logits/rejected": 0.01859130896627903, "logps/chosen": -305.3999938964844, "logps/rejected": -411.20001220703125, "loss": 0.0012, "nll_loss": 1.1484375, "rewards/accuracies": 1.0, "rewards/chosen": 17.762500762939453, "rewards/margins": 19.325000762939453, "rewards/rejected": -1.558447241783142, "step": 570 }, { "epoch": 0.464, "grad_norm": 0.054564295459296766, "learning_rate": 2.982222222222222e-07, "logits/chosen": -0.12746581435203552, "logits/rejected": 0.02424316480755806, "logps/chosen": -309.5, "logps/rejected": -403.6000061035156, "loss": 0.0013, "nll_loss": 1.109765648841858, "rewards/accuracies": 1.0, "rewards/chosen": 16.78125, "rewards/margins": 18.862499237060547, "rewards/rejected": -2.0740723609924316, "step": 580 }, { "epoch": 0.472, "grad_norm": 0.021190418364761706, "learning_rate": 2.937777777777778e-07, "logits/chosen": -0.12922362983226776, "logits/rejected": -0.007800293155014515, "logps/chosen": -327.8999938964844, "logps/rejected": -372.6000061035156, "loss": 0.0012, "nll_loss": 1.1261718273162842, "rewards/accuracies": 1.0, "rewards/chosen": 16.299999237060547, "rewards/margins": 17.212499618530273, "rewards/rejected": -0.9365234375, "step": 590 }, { "epoch": 0.48, "grad_norm": 0.1273137285714855, "learning_rate": 2.8933333333333333e-07, "logits/chosen": -0.02729492262005806, "logits/rejected": 0.03876953199505806, "logps/chosen": -349.6000061035156, "logps/rejected": -420.6000061035156, "loss": 0.0013, "nll_loss": 1.221093773841858, "rewards/accuracies": 1.0, "rewards/chosen": 16.512500762939453, "rewards/margins": 20.143749237060547, "rewards/rejected": -3.608593702316284, "step": 600 }, { "epoch": 0.488, "grad_norm": 0.11206326927393837, "learning_rate": 2.848888888888889e-07, "logits/chosen": -0.05439453199505806, "logits/rejected": 0.04978637769818306, "logps/chosen": -288.8999938964844, "logps/rejected": -419.0, "loss": 0.0011, "nll_loss": 1.078125, "rewards/accuracies": 1.0, "rewards/chosen": 16.268749237060547, "rewards/margins": 18.625, "rewards/rejected": -2.3518919944763184, "step": 610 }, { "epoch": 0.496, "grad_norm": 0.006737027469415457, "learning_rate": 2.8044444444444445e-07, "logits/chosen": -0.06257323920726776, "logits/rejected": -0.02427978441119194, "logps/chosen": -282.6000061035156, "logps/rejected": -422.20001220703125, "loss": 0.0011, "nll_loss": 1.094140648841858, "rewards/accuracies": 1.0, "rewards/chosen": 16.34375, "rewards/margins": 19.587499618530273, "rewards/rejected": -3.24609375, "step": 620 }, { "epoch": 0.504, "grad_norm": 0.007960696014384233, "learning_rate": 2.7600000000000004e-07, "logits/chosen": -0.26665037870407104, "logits/rejected": -0.16494140028953552, "logps/chosen": -271.0, "logps/rejected": -415.3999938964844, "loss": 0.0011, "nll_loss": 1.0574219226837158, "rewards/accuracies": 1.0, "rewards/chosen": 17.25, "rewards/margins": 19.387500762939453, "rewards/rejected": -2.118847608566284, "step": 630 }, { "epoch": 0.512, "grad_norm": 0.012795263557863883, "learning_rate": 2.715555555555555e-07, "logits/chosen": -0.03950195387005806, "logits/rejected": 0.03125, "logps/chosen": -325.8999938964844, "logps/rejected": -407.79998779296875, "loss": 0.0111, "nll_loss": 1.0187499523162842, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 17.393749237060547, "rewards/margins": 19.674999237060547, "rewards/rejected": -2.2586913108825684, "step": 640 }, { "epoch": 0.52, "grad_norm": 0.01127355606086418, "learning_rate": 2.671111111111111e-07, "logits/chosen": -0.078369140625, "logits/rejected": -0.0323486328125, "logps/chosen": -289.0, "logps/rejected": -417.0, "loss": 0.001, "nll_loss": 1.0125000476837158, "rewards/accuracies": 1.0, "rewards/chosen": 17.318750381469727, "rewards/margins": 19.424999237060547, "rewards/rejected": -2.0975098609924316, "step": 650 }, { "epoch": 0.528, "grad_norm": 0.019808530454758836, "learning_rate": 2.6266666666666664e-07, "logits/chosen": -0.04782714694738388, "logits/rejected": 0.09812011569738388, "logps/chosen": -323.6000061035156, "logps/rejected": -409.6000061035156, "loss": 0.0011, "nll_loss": 1.1015625, "rewards/accuracies": 1.0, "rewards/chosen": 17.774999618530273, "rewards/margins": 20.549999237060547, "rewards/rejected": -2.7822265625, "step": 660 }, { "epoch": 0.536, "grad_norm": 0.021732913992860835, "learning_rate": 2.582222222222222e-07, "logits/chosen": 0.01522216759622097, "logits/rejected": 0.11748047173023224, "logps/chosen": -285.20001220703125, "logps/rejected": -419.20001220703125, "loss": 0.0011, "nll_loss": 1.078125, "rewards/accuracies": 1.0, "rewards/chosen": 17.850000381469727, "rewards/margins": 19.487499237060547, "rewards/rejected": -1.6281249523162842, "step": 670 }, { "epoch": 0.544, "grad_norm": 0.005724681559177148, "learning_rate": 2.5377777777777776e-07, "logits/chosen": 0.0006347656017169356, "logits/rejected": 0.07487793266773224, "logps/chosen": -316.0, "logps/rejected": -392.0, "loss": 0.0024, "nll_loss": 0.9683593511581421, "rewards/accuracies": 1.0, "rewards/chosen": 17.106250762939453, "rewards/margins": 18.868749618530273, "rewards/rejected": -1.749609351158142, "step": 680 }, { "epoch": 0.552, "grad_norm": 0.00968898581539461, "learning_rate": 2.493333333333333e-07, "logits/chosen": -0.03961181640625, "logits/rejected": 0.116943359375, "logps/chosen": -282.0, "logps/rejected": -428.20001220703125, "loss": 0.0011, "nll_loss": 1.0832030773162842, "rewards/accuracies": 1.0, "rewards/chosen": 17.96875, "rewards/margins": 21.024999618530273, "rewards/rejected": -3.087207078933716, "step": 690 }, { "epoch": 0.56, "grad_norm": 0.02206940002406898, "learning_rate": 2.448888888888889e-07, "logits/chosen": 0.05800781399011612, "logits/rejected": 0.04348144680261612, "logps/chosen": -305.8999938964844, "logps/rejected": -411.20001220703125, "loss": 0.0012, "nll_loss": 1.172265648841858, "rewards/accuracies": 1.0, "rewards/chosen": 17.575000762939453, "rewards/margins": 19.975000381469727, "rewards/rejected": -2.412109375, "step": 700 }, { "epoch": 0.568, "grad_norm": 0.01284076138380296, "learning_rate": 2.404444444444444e-07, "logits/chosen": -0.03164062649011612, "logits/rejected": 0.05482788011431694, "logps/chosen": -294.6000061035156, "logps/rejected": -401.6000061035156, "loss": 0.0011, "nll_loss": 1.074609398841858, "rewards/accuracies": 1.0, "rewards/chosen": 18.200000762939453, "rewards/margins": 19.387500762939453, "rewards/rejected": -1.1970703601837158, "step": 710 }, { "epoch": 0.576, "grad_norm": 0.007678737859862069, "learning_rate": 2.3599999999999997e-07, "logits/chosen": -0.03760986402630806, "logits/rejected": 0.05534667894244194, "logps/chosen": -310.20001220703125, "logps/rejected": -413.79998779296875, "loss": 0.0013, "nll_loss": 1.169531226158142, "rewards/accuracies": 1.0, "rewards/chosen": 17.600000381469727, "rewards/margins": 20.212499618530273, "rewards/rejected": -2.596874952316284, "step": 720 }, { "epoch": 0.584, "grad_norm": 0.020744736487327976, "learning_rate": 2.3155555555555553e-07, "logits/chosen": -0.02609863318502903, "logits/rejected": 0.0072021484375, "logps/chosen": -287.0, "logps/rejected": -421.79998779296875, "loss": 0.001, "nll_loss": 1.001953125, "rewards/accuracies": 1.0, "rewards/chosen": 17.375, "rewards/margins": 20.887500762939453, "rewards/rejected": -3.515942335128784, "step": 730 }, { "epoch": 0.592, "grad_norm": 0.010296671008794668, "learning_rate": 2.2711111111111112e-07, "logits/chosen": -0.06264648586511612, "logits/rejected": -0.03441772609949112, "logps/chosen": -281.20001220703125, "logps/rejected": -420.79998779296875, "loss": 0.001, "nll_loss": 0.9945312738418579, "rewards/accuracies": 1.0, "rewards/chosen": 17.787500381469727, "rewards/margins": 20.075000762939453, "rewards/rejected": -2.292163133621216, "step": 740 }, { "epoch": 0.6, "grad_norm": 0.01984847155435085, "learning_rate": 2.2266666666666668e-07, "logits/chosen": -0.0015625000232830644, "logits/rejected": 0.05145263671875, "logps/chosen": -328.6000061035156, "logps/rejected": -402.20001220703125, "loss": 0.0013, "nll_loss": 0.98046875, "rewards/accuracies": 1.0, "rewards/chosen": 17.600000381469727, "rewards/margins": 19.737499237060547, "rewards/rejected": -2.1128907203674316, "step": 750 }, { "epoch": 0.608, "grad_norm": 0.011414762899716836, "learning_rate": 2.1822222222222224e-07, "logits/chosen": 0.06040038913488388, "logits/rejected": -0.010241699405014515, "logps/chosen": -287.70001220703125, "logps/rejected": -402.79998779296875, "loss": 0.001, "nll_loss": 1.03125, "rewards/accuracies": 1.0, "rewards/chosen": 17.362499237060547, "rewards/margins": 19.549999237060547, "rewards/rejected": -2.1845703125, "step": 760 }, { "epoch": 0.616, "grad_norm": 0.011751018637081315, "learning_rate": 2.1377777777777777e-07, "logits/chosen": -0.10244140774011612, "logits/rejected": -0.09792480617761612, "logps/chosen": -279.29998779296875, "logps/rejected": -444.20001220703125, "loss": 0.0011, "nll_loss": 1.045312523841858, "rewards/accuracies": 1.0, "rewards/chosen": 18.112499237060547, "rewards/margins": 21.924999237060547, "rewards/rejected": -3.8414063453674316, "step": 770 }, { "epoch": 0.624, "grad_norm": 0.010739426686833121, "learning_rate": 2.0933333333333333e-07, "logits/chosen": -0.06641845405101776, "logits/rejected": -0.03793945163488388, "logps/chosen": -283.6000061035156, "logps/rejected": -433.6000061035156, "loss": 0.001, "nll_loss": 1.0441405773162842, "rewards/accuracies": 1.0, "rewards/chosen": 18.043750762939453, "rewards/margins": 22.350000381469727, "rewards/rejected": -4.30859375, "step": 780 }, { "epoch": 0.632, "grad_norm": 0.008894669609579957, "learning_rate": 2.048888888888889e-07, "logits/chosen": -0.13934326171875, "logits/rejected": 0.03299560397863388, "logps/chosen": -269.0, "logps/rejected": -435.0, "loss": 0.0026, "nll_loss": 1.0304687023162842, "rewards/accuracies": 1.0, "rewards/chosen": 17.850000381469727, "rewards/margins": 21.799999237060547, "rewards/rejected": -3.9339842796325684, "step": 790 }, { "epoch": 0.64, "grad_norm": 0.00781796371528786, "learning_rate": 2.0044444444444445e-07, "logits/chosen": -0.136871337890625, "logits/rejected": -0.06427001953125, "logps/chosen": -301.79998779296875, "logps/rejected": -441.20001220703125, "loss": 0.0011, "nll_loss": 1.0535156726837158, "rewards/accuracies": 1.0, "rewards/chosen": 17.856250762939453, "rewards/margins": 22.362499237060547, "rewards/rejected": -4.500781059265137, "step": 800 }, { "epoch": 0.648, "grad_norm": 0.006941095936510498, "learning_rate": 1.96e-07, "logits/chosen": -0.06254883110523224, "logits/rejected": -0.03428955003619194, "logps/chosen": -317.79998779296875, "logps/rejected": -424.6000061035156, "loss": 0.001, "nll_loss": 1.033593773841858, "rewards/accuracies": 1.0, "rewards/chosen": 17.875, "rewards/margins": 20.450000762939453, "rewards/rejected": -2.5609374046325684, "step": 810 }, { "epoch": 0.656, "grad_norm": 0.01161314226877997, "learning_rate": 1.9155555555555554e-07, "logits/chosen": -0.06083984300494194, "logits/rejected": 0.07171630859375, "logps/chosen": -300.0, "logps/rejected": -426.3999938964844, "loss": 0.0011, "nll_loss": 1.101171851158142, "rewards/accuracies": 1.0, "rewards/chosen": 18.8125, "rewards/margins": 22.337499618530273, "rewards/rejected": -3.5337891578674316, "step": 820 }, { "epoch": 0.664, "grad_norm": 0.04491917017356749, "learning_rate": 1.871111111111111e-07, "logits/chosen": -0.02180175855755806, "logits/rejected": -0.0004760742303915322, "logps/chosen": -272.1000061035156, "logps/rejected": -424.20001220703125, "loss": 0.001, "nll_loss": 1.0390625, "rewards/accuracies": 1.0, "rewards/chosen": 18.962499618530273, "rewards/margins": 22.612499237060547, "rewards/rejected": -3.6480469703674316, "step": 830 }, { "epoch": 0.672, "grad_norm": 0.014327809455407186, "learning_rate": 1.8266666666666666e-07, "logits/chosen": -0.06074218824505806, "logits/rejected": -0.0008728027460165322, "logps/chosen": -304.8999938964844, "logps/rejected": -395.3999938964844, "loss": 0.0016, "nll_loss": 1.0378906726837158, "rewards/accuracies": 1.0, "rewards/chosen": 18.612499237060547, "rewards/margins": 19.75, "rewards/rejected": -1.1486327648162842, "step": 840 }, { "epoch": 0.68, "grad_norm": 0.010011390264830125, "learning_rate": 1.7822222222222222e-07, "logits/chosen": -0.143798828125, "logits/rejected": -0.04060058668255806, "logps/chosen": -308.1000061035156, "logps/rejected": -433.6000061035156, "loss": 0.0031, "nll_loss": 1.0398437976837158, "rewards/accuracies": 1.0, "rewards/chosen": 18.887500762939453, "rewards/margins": 21.774999618530273, "rewards/rejected": -2.8993163108825684, "step": 850 }, { "epoch": 0.688, "grad_norm": 0.006838199221939372, "learning_rate": 1.7377777777777778e-07, "logits/chosen": 0.041259765625, "logits/rejected": 0.13017578423023224, "logps/chosen": -293.0, "logps/rejected": -432.0, "loss": 0.001, "nll_loss": 1.0363280773162842, "rewards/accuracies": 1.0, "rewards/chosen": 18.375, "rewards/margins": 21.825000762939453, "rewards/rejected": -3.4496092796325684, "step": 860 }, { "epoch": 0.696, "grad_norm": 0.011644152757164477, "learning_rate": 1.6933333333333334e-07, "logits/chosen": 0.08931884914636612, "logits/rejected": 0.07216797024011612, "logps/chosen": -299.3999938964844, "logps/rejected": -411.0, "loss": 0.001, "nll_loss": 0.9429687261581421, "rewards/accuracies": 1.0, "rewards/chosen": 19.487499237060547, "rewards/margins": 21.487499237060547, "rewards/rejected": -2.010546922683716, "step": 870 }, { "epoch": 0.704, "grad_norm": 0.01033541957175912, "learning_rate": 1.6488888888888887e-07, "logits/chosen": 0.09145507961511612, "logits/rejected": 0.15330810844898224, "logps/chosen": -304.79998779296875, "logps/rejected": -400.20001220703125, "loss": 0.0009, "nll_loss": 0.8675781488418579, "rewards/accuracies": 1.0, "rewards/chosen": 18.4375, "rewards/margins": 20.612499237060547, "rewards/rejected": -2.170703172683716, "step": 880 }, { "epoch": 0.712, "grad_norm": 0.008717665163684966, "learning_rate": 1.6044444444444443e-07, "logits/chosen": 0.10676269233226776, "logits/rejected": 0.12166748195886612, "logps/chosen": -274.5, "logps/rejected": -426.6000061035156, "loss": 0.001, "nll_loss": 0.9921875, "rewards/accuracies": 1.0, "rewards/chosen": 19.149999618530273, "rewards/margins": 22.774999618530273, "rewards/rejected": -3.625, "step": 890 }, { "epoch": 0.72, "grad_norm": 0.007924558579110246, "learning_rate": 1.56e-07, "logits/chosen": -0.11391601711511612, "logits/rejected": -0.02934570237994194, "logps/chosen": -266.8999938964844, "logps/rejected": -427.20001220703125, "loss": 0.0035, "nll_loss": 0.981249988079071, "rewards/accuracies": 1.0, "rewards/chosen": 18.662500381469727, "rewards/margins": 21.200000762939453, "rewards/rejected": -2.541015625, "step": 900 }, { "epoch": 0.728, "grad_norm": 0.07578643720533651, "learning_rate": 1.5155555555555555e-07, "logits/chosen": 0.07387695461511612, "logits/rejected": 0.15861816704273224, "logps/chosen": -239.5500030517578, "logps/rejected": -417.3999938964844, "loss": 0.0011, "nll_loss": 1.0226562023162842, "rewards/accuracies": 1.0, "rewards/chosen": 18.8125, "rewards/margins": 21.987499237060547, "rewards/rejected": -3.1800780296325684, "step": 910 }, { "epoch": 0.736, "grad_norm": 0.009494584129256885, "learning_rate": 1.4711111111111111e-07, "logits/chosen": -0.004748535342514515, "logits/rejected": 0.05303344875574112, "logps/chosen": -327.79998779296875, "logps/rejected": -390.79998779296875, "loss": 0.001, "nll_loss": 1.0050780773162842, "rewards/accuracies": 1.0, "rewards/chosen": 19.056249618530273, "rewards/margins": 20.575000762939453, "rewards/rejected": -1.507421851158142, "step": 920 }, { "epoch": 0.744, "grad_norm": 0.017667443139199403, "learning_rate": 1.4266666666666665e-07, "logits/chosen": -0.054931640625, "logits/rejected": 0.02968749962747097, "logps/chosen": -261.0, "logps/rejected": -418.0, "loss": 0.001, "nll_loss": 0.9671875238418579, "rewards/accuracies": 1.0, "rewards/chosen": 19.137500762939453, "rewards/margins": 21.331249237060547, "rewards/rejected": -2.1927733421325684, "step": 930 }, { "epoch": 0.752, "grad_norm": 0.010899226100416153, "learning_rate": 1.382222222222222e-07, "logits/chosen": -0.0184326171875, "logits/rejected": 0.11514892429113388, "logps/chosen": -288.6000061035156, "logps/rejected": -408.3999938964844, "loss": 0.0465, "nll_loss": 1.074609398841858, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 19.024999618530273, "rewards/margins": 21.262500762939453, "rewards/rejected": -2.2400145530700684, "step": 940 }, { "epoch": 0.76, "grad_norm": 0.025779260274360533, "learning_rate": 1.3377777777777777e-07, "logits/chosen": 0.02338867262005806, "logits/rejected": 0.10743407905101776, "logps/chosen": -330.6000061035156, "logps/rejected": -398.20001220703125, "loss": 0.001, "nll_loss": 1.0070312023162842, "rewards/accuracies": 1.0, "rewards/chosen": 18.756250381469727, "rewards/margins": 20.049999237060547, "rewards/rejected": -1.2770264148712158, "step": 950 }, { "epoch": 0.768, "grad_norm": 0.025413812683072193, "learning_rate": 1.2933333333333333e-07, "logits/chosen": 0.05156249925494194, "logits/rejected": 0.17954102158546448, "logps/chosen": -291.3999938964844, "logps/rejected": -413.0, "loss": 0.001, "nll_loss": 0.958984375, "rewards/accuracies": 1.0, "rewards/chosen": 19.924999237060547, "rewards/margins": 21.637500762939453, "rewards/rejected": -1.715429663658142, "step": 960 }, { "epoch": 0.776, "grad_norm": 0.008806349125691143, "learning_rate": 1.2488888888888889e-07, "logits/chosen": 0.02752685546875, "logits/rejected": 0.08073730766773224, "logps/chosen": -264.6000061035156, "logps/rejected": -407.6000061035156, "loss": 0.0009, "nll_loss": 0.8984375, "rewards/accuracies": 1.0, "rewards/chosen": 19.0, "rewards/margins": 20.237499237060547, "rewards/rejected": -1.255761742591858, "step": 970 }, { "epoch": 0.784, "grad_norm": 0.0098690579475145, "learning_rate": 1.2044444444444445e-07, "logits/chosen": 0.01823730394244194, "logits/rejected": 0.11467285454273224, "logps/chosen": -310.20001220703125, "logps/rejected": -413.6000061035156, "loss": 0.001, "nll_loss": 0.9722656011581421, "rewards/accuracies": 1.0, "rewards/chosen": 20.237499237060547, "rewards/margins": 22.325000762939453, "rewards/rejected": -2.0926756858825684, "step": 980 }, { "epoch": 0.792, "grad_norm": 0.010627117006645067, "learning_rate": 1.16e-07, "logits/chosen": -0.07388915866613388, "logits/rejected": 0.07421875, "logps/chosen": -342.8999938964844, "logps/rejected": -412.79998779296875, "loss": 0.001, "nll_loss": 1.040624976158142, "rewards/accuracies": 1.0, "rewards/chosen": 18.96875, "rewards/margins": 21.049999237060547, "rewards/rejected": -2.0562500953674316, "step": 990 }, { "epoch": 0.8, "grad_norm": 0.04218956204057828, "learning_rate": 1.1155555555555555e-07, "logits/chosen": -0.0010253905784338713, "logits/rejected": 0.06143798679113388, "logps/chosen": -290.70001220703125, "logps/rejected": -412.6000061035156, "loss": 0.0011, "nll_loss": 1.09375, "rewards/accuracies": 1.0, "rewards/chosen": 19.962499618530273, "rewards/margins": 22.0, "rewards/rejected": -2.0201172828674316, "step": 1000 }, { "epoch": 0.808, "grad_norm": 0.014065236545009229, "learning_rate": 1.0711111111111111e-07, "logits/chosen": 0.20627442002296448, "logits/rejected": 0.24067382514476776, "logps/chosen": -263.79998779296875, "logps/rejected": -406.0, "loss": 0.0009, "nll_loss": 0.910937488079071, "rewards/accuracies": 1.0, "rewards/chosen": 19.4375, "rewards/margins": 21.524999618530273, "rewards/rejected": -2.0770020484924316, "step": 1010 }, { "epoch": 0.816, "grad_norm": 0.008101550660192351, "learning_rate": 1.0266666666666666e-07, "logits/chosen": 0.13178710639476776, "logits/rejected": 0.11772461235523224, "logps/chosen": -241.75, "logps/rejected": -394.20001220703125, "loss": 0.0009, "nll_loss": 0.893359363079071, "rewards/accuracies": 1.0, "rewards/chosen": 20.100000381469727, "rewards/margins": 20.950000762939453, "rewards/rejected": -0.851611316204071, "step": 1020 }, { "epoch": 0.824, "grad_norm": 0.020212312188331298, "learning_rate": 9.822222222222222e-08, "logits/chosen": 0.03201904147863388, "logits/rejected": 0.09085693210363388, "logps/chosen": -283.8999938964844, "logps/rejected": -410.79998779296875, "loss": 0.001, "nll_loss": 0.98828125, "rewards/accuracies": 1.0, "rewards/chosen": 20.662500381469727, "rewards/margins": 22.450000762939453, "rewards/rejected": -1.788671851158142, "step": 1030 }, { "epoch": 0.832, "grad_norm": 0.01018779281956963, "learning_rate": 9.377777777777778e-08, "logits/chosen": 0.005297851748764515, "logits/rejected": 0.03730468824505806, "logps/chosen": -293.1000061035156, "logps/rejected": -418.20001220703125, "loss": 0.0391, "nll_loss": 1.017578125, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 20.100000381469727, "rewards/margins": 21.225000381469727, "rewards/rejected": -1.11474609375, "step": 1040 }, { "epoch": 0.84, "grad_norm": 0.02497328379034309, "learning_rate": 8.933333333333333e-08, "logits/chosen": 0.03041992150247097, "logits/rejected": 0.05394287034869194, "logps/chosen": -302.5, "logps/rejected": -412.79998779296875, "loss": 0.0011, "nll_loss": 1.077734351158142, "rewards/accuracies": 1.0, "rewards/chosen": 20.100000381469727, "rewards/margins": 21.3125, "rewards/rejected": -1.21630859375, "step": 1050 }, { "epoch": 0.848, "grad_norm": 0.012861381907556958, "learning_rate": 8.488888888888889e-08, "logits/chosen": 0.03743896633386612, "logits/rejected": 0.131591796875, "logps/chosen": -272.79998779296875, "logps/rejected": -407.20001220703125, "loss": 0.0009, "nll_loss": 0.9085937738418579, "rewards/accuracies": 1.0, "rewards/chosen": 20.412500381469727, "rewards/margins": 21.924999237060547, "rewards/rejected": -1.5066406726837158, "step": 1060 }, { "epoch": 0.856, "grad_norm": 0.009207668604585216, "learning_rate": 8.044444444444445e-08, "logits/chosen": 0.04277343675494194, "logits/rejected": 0.11362304538488388, "logps/chosen": -288.20001220703125, "logps/rejected": -404.6000061035156, "loss": 0.0011, "nll_loss": 1.010156273841858, "rewards/accuracies": 1.0, "rewards/chosen": 19.450000762939453, "rewards/margins": 20.737499237060547, "rewards/rejected": -1.2658202648162842, "step": 1070 }, { "epoch": 0.864, "grad_norm": 0.014789555122865826, "learning_rate": 7.599999999999999e-08, "logits/chosen": -0.03659667819738388, "logits/rejected": 0.09859619289636612, "logps/chosen": -269.79998779296875, "logps/rejected": -416.20001220703125, "loss": 0.001, "nll_loss": 1.0222656726837158, "rewards/accuracies": 1.0, "rewards/chosen": 19.387500762939453, "rewards/margins": 21.487499237060547, "rewards/rejected": -2.081835985183716, "step": 1080 }, { "epoch": 0.872, "grad_norm": 0.01967144963219391, "learning_rate": 7.155555555555555e-08, "logits/chosen": 0.050048828125, "logits/rejected": 0.12807616591453552, "logps/chosen": -283.79998779296875, "logps/rejected": -398.3999938964844, "loss": 0.001, "nll_loss": 0.9683593511581421, "rewards/accuracies": 1.0, "rewards/chosen": 19.975000381469727, "rewards/margins": 21.174999237060547, "rewards/rejected": -1.1923339366912842, "step": 1090 }, { "epoch": 0.88, "grad_norm": 0.01262612387400558, "learning_rate": 6.71111111111111e-08, "logits/chosen": -0.08295898139476776, "logits/rejected": 0.06423339992761612, "logps/chosen": -340.5, "logps/rejected": -409.3999938964844, "loss": 0.001, "nll_loss": 1.01171875, "rewards/accuracies": 1.0, "rewards/chosen": 19.274999618530273, "rewards/margins": 21.024999618530273, "rewards/rejected": -1.7332031726837158, "step": 1100 }, { "epoch": 0.888, "grad_norm": 0.013672600008616023, "learning_rate": 6.266666666666666e-08, "logits/chosen": 0.16115722060203552, "logits/rejected": 0.17141112685203552, "logps/chosen": -246.89999389648438, "logps/rejected": -415.20001220703125, "loss": 0.0009, "nll_loss": 0.9058593511581421, "rewards/accuracies": 1.0, "rewards/chosen": 19.475000381469727, "rewards/margins": 21.612499237060547, "rewards/rejected": -2.133984327316284, "step": 1110 }, { "epoch": 0.896, "grad_norm": 0.01751993042439502, "learning_rate": 5.822222222222222e-08, "logits/chosen": 0.06224365159869194, "logits/rejected": 0.15957030653953552, "logps/chosen": -226.8000030517578, "logps/rejected": -421.79998779296875, "loss": 0.001, "nll_loss": 0.981249988079071, "rewards/accuracies": 1.0, "rewards/chosen": 21.274999618530273, "rewards/margins": 22.875, "rewards/rejected": -1.5966796875, "step": 1120 }, { "epoch": 0.904, "grad_norm": 0.014517125852295223, "learning_rate": 5.377777777777778e-08, "logits/chosen": -0.0188446044921875, "logits/rejected": 0.1414794921875, "logps/chosen": -283.5, "logps/rejected": -416.20001220703125, "loss": 0.001, "nll_loss": 0.995312511920929, "rewards/accuracies": 1.0, "rewards/chosen": 19.987499237060547, "rewards/margins": 22.587499618530273, "rewards/rejected": -2.6039061546325684, "step": 1130 }, { "epoch": 0.912, "grad_norm": 0.02080473594604604, "learning_rate": 4.933333333333333e-08, "logits/chosen": 0.041839599609375, "logits/rejected": 0.14179687201976776, "logps/chosen": -338.70001220703125, "logps/rejected": -391.3999938964844, "loss": 0.0009, "nll_loss": 0.916796863079071, "rewards/accuracies": 1.0, "rewards/chosen": 19.575000762939453, "rewards/margins": 21.100000381469727, "rewards/rejected": -1.5293457508087158, "step": 1140 }, { "epoch": 0.92, "grad_norm": 0.020719630349212197, "learning_rate": 4.4888888888888885e-08, "logits/chosen": 0.04411621019244194, "logits/rejected": 0.07084961235523224, "logps/chosen": -269.3999938964844, "logps/rejected": -404.79998779296875, "loss": 0.001, "nll_loss": 0.961718738079071, "rewards/accuracies": 1.0, "rewards/chosen": 19.725000381469727, "rewards/margins": 21.475000381469727, "rewards/rejected": -1.7581055164337158, "step": 1150 }, { "epoch": 0.928, "grad_norm": 0.008502519454361718, "learning_rate": 4.044444444444444e-08, "logits/chosen": -0.06318359076976776, "logits/rejected": 0.095306396484375, "logps/chosen": -291.3999938964844, "logps/rejected": -408.0, "loss": 0.0009, "nll_loss": 0.875781238079071, "rewards/accuracies": 1.0, "rewards/chosen": 20.287500381469727, "rewards/margins": 22.037500381469727, "rewards/rejected": -1.7532227039337158, "step": 1160 }, { "epoch": 0.936, "grad_norm": 0.010182005409104732, "learning_rate": 3.6e-08, "logits/chosen": -0.02708740159869194, "logits/rejected": 0.07539062201976776, "logps/chosen": -297.70001220703125, "logps/rejected": -411.3999938964844, "loss": 0.0011, "nll_loss": 1.0515625476837158, "rewards/accuracies": 1.0, "rewards/chosen": 19.549999237060547, "rewards/margins": 21.25, "rewards/rejected": -1.7091796398162842, "step": 1170 }, { "epoch": 0.944, "grad_norm": 0.014883019162230187, "learning_rate": 3.155555555555556e-08, "logits/chosen": -0.02933349646627903, "logits/rejected": 0.08122558891773224, "logps/chosen": -297.70001220703125, "logps/rejected": -410.0, "loss": 0.0017, "nll_loss": 0.9140625, "rewards/accuracies": 1.0, "rewards/chosen": 19.962499618530273, "rewards/margins": 21.512500762939453, "rewards/rejected": -1.5568358898162842, "step": 1180 }, { "epoch": 0.952, "grad_norm": 0.006673830289265544, "learning_rate": 2.7111111111111108e-08, "logits/chosen": -0.1202392578125, "logits/rejected": -0.01416015625, "logps/chosen": -267.1000061035156, "logps/rejected": -421.79998779296875, "loss": 0.0015, "nll_loss": 0.9007812738418579, "rewards/accuracies": 1.0, "rewards/chosen": 18.825000762939453, "rewards/margins": 21.350000381469727, "rewards/rejected": -2.530468702316284, "step": 1190 }, { "epoch": 0.96, "grad_norm": 0.016858387886967145, "learning_rate": 2.2666666666666668e-08, "logits/chosen": 0.02562255784869194, "logits/rejected": 0.12241210788488388, "logps/chosen": -303.20001220703125, "logps/rejected": -397.0, "loss": 0.001, "nll_loss": 1.024999976158142, "rewards/accuracies": 1.0, "rewards/chosen": 20.774999618530273, "rewards/margins": 22.137500762939453, "rewards/rejected": -1.382421851158142, "step": 1200 }, { "epoch": 0.968, "grad_norm": 0.014032183883427854, "learning_rate": 1.822222222222222e-08, "logits/chosen": 0.02207031287252903, "logits/rejected": 0.04111327975988388, "logps/chosen": -264.3999938964844, "logps/rejected": -418.20001220703125, "loss": 0.0009, "nll_loss": 0.9339843988418579, "rewards/accuracies": 1.0, "rewards/chosen": 19.412500381469727, "rewards/margins": 22.100000381469727, "rewards/rejected": -2.688281297683716, "step": 1210 }, { "epoch": 0.976, "grad_norm": 0.013396264787771736, "learning_rate": 1.3777777777777778e-08, "logits/chosen": 0.06943359225988388, "logits/rejected": 0.160491943359375, "logps/chosen": -273.25, "logps/rejected": -424.6000061035156, "loss": 0.001, "nll_loss": 0.955859363079071, "rewards/accuracies": 1.0, "rewards/chosen": 20.075000762939453, "rewards/margins": 22.850000381469727, "rewards/rejected": -2.7822265625, "step": 1220 }, { "epoch": 0.984, "grad_norm": 0.01902150518424847, "learning_rate": 9.333333333333334e-09, "logits/chosen": 0.12221679836511612, "logits/rejected": 0.22767333686351776, "logps/chosen": -282.20001220703125, "logps/rejected": -382.79998779296875, "loss": 0.0008, "nll_loss": 0.8285156488418579, "rewards/accuracies": 1.0, "rewards/chosen": 18.587499618530273, "rewards/margins": 19.037500381469727, "rewards/rejected": -0.44819337129592896, "step": 1230 }, { "epoch": 0.992, "grad_norm": 0.008528899989414147, "learning_rate": 4.888888888888888e-09, "logits/chosen": 0.05415039137005806, "logits/rejected": 0.09858398139476776, "logps/chosen": -258.70001220703125, "logps/rejected": -415.20001220703125, "loss": 0.0009, "nll_loss": 0.880859375, "rewards/accuracies": 1.0, "rewards/chosen": 20.700000762939453, "rewards/margins": 22.612499237060547, "rewards/rejected": -1.9093749523162842, "step": 1240 }, { "epoch": 1.0, "grad_norm": 0.012294810094826115, "learning_rate": 4.4444444444444443e-10, "logits/chosen": -0.02900390699505806, "logits/rejected": 0.1044921875, "logps/chosen": -266.1000061035156, "logps/rejected": -404.0, "loss": 0.0011, "nll_loss": 0.961718738079071, "rewards/accuracies": 1.0, "rewards/chosen": 20.8125, "rewards/margins": 21.975000381469727, "rewards/rejected": -1.18310546875, "step": 1250 }, { "epoch": 1.0, "eval_logits/chosen": -0.11485877633094788, "eval_logits/rejected": 0.04485614597797394, "eval_logps/chosen": -333.0769348144531, "eval_logps/rejected": -407.5384521484375, "eval_loss": 0.0016917419852688909, "eval_nll_loss": 0.98046875, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 18.94230842590332, "eval_rewards/margins": 20.269229888916016, "eval_rewards/rejected": -1.33984375, "eval_runtime": 8.6236, "eval_samples_per_second": 11.596, "eval_steps_per_second": 1.507, "step": 1250 }, { "epoch": 1.0, "step": 1250, "total_flos": 0.0, "train_loss": 0.0, "train_runtime": 0.984, "train_samples_per_second": 10161.128, "train_steps_per_second": 1270.268 } ], "logging_steps": 10, "max_steps": 1250, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }