|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 1250, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0008, |
|
"grad_norm": 226.62270447693234, |
|
"learning_rate": 0.0, |
|
"logits/chosen": -0.91796875, |
|
"logits/rejected": -1.0625, |
|
"logps/chosen": -448.0, |
|
"logps/rejected": -404.0, |
|
"loss": 0.6934, |
|
"nll_loss": 2.625, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.008, |
|
"grad_norm": 183.46648450042892, |
|
"learning_rate": 3.6e-08, |
|
"logits/chosen": -0.9357638955116272, |
|
"logits/rejected": -1.0073784589767456, |
|
"logps/chosen": -528.4444580078125, |
|
"logps/rejected": -375.77777099609375, |
|
"loss": 0.7075, |
|
"nll_loss": 1.7126736640930176, |
|
"rewards/accuracies": 0.3055555522441864, |
|
"rewards/chosen": -0.0180528424680233, |
|
"rewards/margins": -0.006869846023619175, |
|
"rewards/rejected": -0.011135525070130825, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.016, |
|
"grad_norm": 195.8050525947057, |
|
"learning_rate": 7.599999999999999e-08, |
|
"logits/chosen": -0.8521484136581421, |
|
"logits/rejected": -1.0, |
|
"logps/chosen": -412.79998779296875, |
|
"logps/rejected": -408.6000061035156, |
|
"loss": 0.6575, |
|
"nll_loss": 2.35546875, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.005053711123764515, |
|
"rewards/margins": 0.09255371242761612, |
|
"rewards/rejected": -0.09763183444738388, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.024, |
|
"grad_norm": 141.87495839689677, |
|
"learning_rate": 1.16e-07, |
|
"logits/chosen": -0.848437488079071, |
|
"logits/rejected": -0.8382812738418579, |
|
"logps/chosen": -521.5999755859375, |
|
"logps/rejected": -380.20001220703125, |
|
"loss": 0.5052, |
|
"nll_loss": 1.78515625, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.14396972954273224, |
|
"rewards/margins": 0.4627929627895355, |
|
"rewards/rejected": -0.31914061307907104, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.032, |
|
"grad_norm": 125.39420097312498, |
|
"learning_rate": 1.56e-07, |
|
"logits/chosen": -0.8394531011581421, |
|
"logits/rejected": -0.969921886920929, |
|
"logps/chosen": -529.0, |
|
"logps/rejected": -401.6000061035156, |
|
"loss": 0.3009, |
|
"nll_loss": 1.8390624523162842, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.3280273377895355, |
|
"rewards/margins": 1.265039086341858, |
|
"rewards/rejected": -0.9378906488418579, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 51.38963253406093, |
|
"learning_rate": 1.96e-07, |
|
"logits/chosen": -0.811328113079071, |
|
"logits/rejected": -0.971484363079071, |
|
"logps/chosen": -459.20001220703125, |
|
"logps/rejected": -411.6000061035156, |
|
"loss": 0.179, |
|
"nll_loss": 2.192187547683716, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 0.33916014432907104, |
|
"rewards/margins": 2.1109375953674316, |
|
"rewards/rejected": -1.771875023841858, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.048, |
|
"grad_norm": 41.54844453932047, |
|
"learning_rate": 2.3599999999999997e-07, |
|
"logits/chosen": -0.710888683795929, |
|
"logits/rejected": -0.852734386920929, |
|
"logps/chosen": -450.20001220703125, |
|
"logps/rejected": -423.3999938964844, |
|
"loss": 0.0706, |
|
"nll_loss": 2.2906250953674316, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.0625, |
|
"rewards/margins": 3.278125047683716, |
|
"rewards/rejected": -3.340625047683716, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.056, |
|
"grad_norm": 2.8975377631106283, |
|
"learning_rate": 2.7600000000000004e-07, |
|
"logits/chosen": -0.756054699420929, |
|
"logits/rejected": -0.7701171636581421, |
|
"logps/chosen": -454.79998779296875, |
|
"logps/rejected": -425.79998779296875, |
|
"loss": 0.0485, |
|
"nll_loss": 2.09375, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.03144531324505806, |
|
"rewards/margins": 4.420312404632568, |
|
"rewards/rejected": -4.453125, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.064, |
|
"grad_norm": 1.9819883761762331, |
|
"learning_rate": 3.1599999999999997e-07, |
|
"logits/chosen": -0.6548827886581421, |
|
"logits/rejected": -0.584765613079071, |
|
"logps/chosen": -516.2000122070312, |
|
"logps/rejected": -442.0, |
|
"loss": 0.0136, |
|
"nll_loss": 1.96875, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.05791015550494194, |
|
"rewards/margins": 5.881249904632568, |
|
"rewards/rejected": -5.818749904632568, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.072, |
|
"grad_norm": 40.33622312322741, |
|
"learning_rate": 3.5599999999999996e-07, |
|
"logits/chosen": -0.5289062261581421, |
|
"logits/rejected": -0.5241454839706421, |
|
"logps/chosen": -460.3999938964844, |
|
"logps/rejected": -450.3999938964844, |
|
"loss": 0.0126, |
|
"nll_loss": 2.077343702316284, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.30329591035842896, |
|
"rewards/margins": 6.993750095367432, |
|
"rewards/rejected": -6.690625190734863, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 3.381840804687941, |
|
"learning_rate": 3.96e-07, |
|
"logits/chosen": -0.6162109375, |
|
"logits/rejected": -0.5665038824081421, |
|
"logps/chosen": -516.5999755859375, |
|
"logps/rejected": -461.6000061035156, |
|
"loss": 0.0103, |
|
"nll_loss": 2.063281297683716, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.47880858182907104, |
|
"rewards/margins": 8.15625, |
|
"rewards/rejected": -7.675000190734863, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.088, |
|
"grad_norm": 0.022971788286646016, |
|
"learning_rate": 4.36e-07, |
|
"logits/chosen": -0.555468738079071, |
|
"logits/rejected": -0.558398425579071, |
|
"logps/chosen": -509.6000061035156, |
|
"logps/rejected": -469.3999938964844, |
|
"loss": 0.0069, |
|
"nll_loss": 1.841406226158142, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.911425769329071, |
|
"rewards/margins": 9.612500190734863, |
|
"rewards/rejected": -8.693750381469727, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.096, |
|
"grad_norm": 0.6089138686903683, |
|
"learning_rate": 4.76e-07, |
|
"logits/chosen": -0.43085938692092896, |
|
"logits/rejected": -0.44287109375, |
|
"logps/chosen": -460.3999938964844, |
|
"logps/rejected": -492.6000061035156, |
|
"loss": 0.0024, |
|
"nll_loss": 1.9765625, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.300390601158142, |
|
"rewards/margins": 10.537500381469727, |
|
"rewards/rejected": -9.240625381469727, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.104, |
|
"grad_norm": 5.072145285053, |
|
"learning_rate": 4.982222222222223e-07, |
|
"logits/chosen": -0.39960938692092896, |
|
"logits/rejected": -0.3809570372104645, |
|
"logps/chosen": -470.6000061035156, |
|
"logps/rejected": -472.20001220703125, |
|
"loss": 0.0046, |
|
"nll_loss": 2.043750047683716, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.507031202316284, |
|
"rewards/margins": 10.509374618530273, |
|
"rewards/rejected": -8.009374618530273, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.112, |
|
"grad_norm": 0.6864472420728707, |
|
"learning_rate": 4.937777777777777e-07, |
|
"logits/chosen": -0.26445311307907104, |
|
"logits/rejected": -0.293212890625, |
|
"logps/chosen": -411.0, |
|
"logps/rejected": -460.79998779296875, |
|
"loss": 0.0023, |
|
"nll_loss": 1.904687523841858, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.862499952316284, |
|
"rewards/margins": 11.34375, |
|
"rewards/rejected": -7.487500190734863, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.04499814020001256, |
|
"learning_rate": 4.893333333333333e-07, |
|
"logits/chosen": -0.2533935606479645, |
|
"logits/rejected": -0.30218505859375, |
|
"logps/chosen": -407.20001220703125, |
|
"logps/rejected": -467.20001220703125, |
|
"loss": 0.003, |
|
"nll_loss": 1.5851562023162842, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 5.1015625, |
|
"rewards/margins": 12.21875, |
|
"rewards/rejected": -7.109375, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.128, |
|
"grad_norm": 0.12456933423635125, |
|
"learning_rate": 4.848888888888888e-07, |
|
"logits/chosen": -0.5003906488418579, |
|
"logits/rejected": -0.4310546815395355, |
|
"logps/chosen": -409.79998779296875, |
|
"logps/rejected": -465.6000061035156, |
|
"loss": 0.0118, |
|
"nll_loss": 1.7273437976837158, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 5.496874809265137, |
|
"rewards/margins": 12.262499809265137, |
|
"rewards/rejected": -6.771874904632568, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.136, |
|
"grad_norm": 0.10810136696878427, |
|
"learning_rate": 4.804444444444444e-07, |
|
"logits/chosen": -0.4216064512729645, |
|
"logits/rejected": -0.3792480528354645, |
|
"logps/chosen": -384.0, |
|
"logps/rejected": -446.3999938964844, |
|
"loss": 0.0022, |
|
"nll_loss": 1.6687500476837158, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 6.856249809265137, |
|
"rewards/margins": 12.15625, |
|
"rewards/rejected": -5.293749809265137, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.144, |
|
"grad_norm": 2.1017761962617016, |
|
"learning_rate": 4.76e-07, |
|
"logits/chosen": -0.3038574159145355, |
|
"logits/rejected": -0.32097166776657104, |
|
"logps/chosen": -426.20001220703125, |
|
"logps/rejected": -432.79998779296875, |
|
"loss": 0.0031, |
|
"nll_loss": 1.6843750476837158, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 8.0, |
|
"rewards/margins": 12.050000190734863, |
|
"rewards/rejected": -4.051562309265137, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.152, |
|
"grad_norm": 0.03472137716422411, |
|
"learning_rate": 4.7155555555555556e-07, |
|
"logits/chosen": -0.27385252714157104, |
|
"logits/rejected": -0.274169921875, |
|
"logps/chosen": -383.3999938964844, |
|
"logps/rejected": -433.0, |
|
"loss": 0.002, |
|
"nll_loss": 1.497656226158142, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 9.46875, |
|
"rewards/margins": 14.081250190734863, |
|
"rewards/rejected": -4.620312690734863, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.05628090345177454, |
|
"learning_rate": 4.6711111111111104e-07, |
|
"logits/chosen": -0.3796752989292145, |
|
"logits/rejected": -0.342529296875, |
|
"logps/chosen": -361.0, |
|
"logps/rejected": -430.0, |
|
"loss": 0.0017, |
|
"nll_loss": 1.515625, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 9.209375381469727, |
|
"rewards/margins": 13.231249809265137, |
|
"rewards/rejected": -4.025781154632568, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.168, |
|
"grad_norm": 0.03469897920654829, |
|
"learning_rate": 4.6266666666666663e-07, |
|
"logits/chosen": -0.30029296875, |
|
"logits/rejected": -0.2890380918979645, |
|
"logps/chosen": -372.70001220703125, |
|
"logps/rejected": -418.20001220703125, |
|
"loss": 0.0019, |
|
"nll_loss": 1.4328124523162842, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 10.162500381469727, |
|
"rewards/margins": 13.068750381469727, |
|
"rewards/rejected": -2.887939453125, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.176, |
|
"grad_norm": 0.03297401557091858, |
|
"learning_rate": 4.5822222222222216e-07, |
|
"logits/chosen": -0.4486328065395355, |
|
"logits/rejected": -0.3726562559604645, |
|
"logps/chosen": -398.6000061035156, |
|
"logps/rejected": -430.79998779296875, |
|
"loss": 0.0016, |
|
"nll_loss": 1.484375, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 10.693750381469727, |
|
"rewards/margins": 14.068750381469727, |
|
"rewards/rejected": -3.3890624046325684, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.184, |
|
"grad_norm": 0.03928262686521911, |
|
"learning_rate": 4.5377777777777775e-07, |
|
"logits/chosen": -0.31196290254592896, |
|
"logits/rejected": -0.2955078184604645, |
|
"logps/chosen": -329.6000061035156, |
|
"logps/rejected": -432.6000061035156, |
|
"loss": 0.0013, |
|
"nll_loss": 1.2960937023162842, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 11.287500381469727, |
|
"rewards/margins": 15.443750381469727, |
|
"rewards/rejected": -4.154687404632568, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.192, |
|
"grad_norm": 0.012808641512081923, |
|
"learning_rate": 4.493333333333333e-07, |
|
"logits/chosen": -0.28032225370407104, |
|
"logits/rejected": -0.2730468809604645, |
|
"logps/chosen": -390.1000061035156, |
|
"logps/rejected": -418.20001220703125, |
|
"loss": 0.0017, |
|
"nll_loss": 1.353124976158142, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 11.774999618530273, |
|
"rewards/margins": 14.8125, |
|
"rewards/rejected": -3.0433592796325684, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.06025316567815392, |
|
"learning_rate": 4.4488888888888887e-07, |
|
"logits/chosen": -0.2566772401332855, |
|
"logits/rejected": -0.19821777939796448, |
|
"logps/chosen": -371.20001220703125, |
|
"logps/rejected": -412.6000061035156, |
|
"loss": 0.0014, |
|
"nll_loss": 1.3125, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 11.987500190734863, |
|
"rewards/margins": 14.800000190734863, |
|
"rewards/rejected": -2.801953077316284, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.208, |
|
"grad_norm": 0.018239198980973682, |
|
"learning_rate": 4.4044444444444445e-07, |
|
"logits/chosen": -0.3375000059604645, |
|
"logits/rejected": -0.22773437201976776, |
|
"logps/chosen": -367.70001220703125, |
|
"logps/rejected": -419.79998779296875, |
|
"loss": 0.0016, |
|
"nll_loss": 1.3914062976837158, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 13.081250190734863, |
|
"rewards/margins": 15.800000190734863, |
|
"rewards/rejected": -2.72265625, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.216, |
|
"grad_norm": 0.06735412914324118, |
|
"learning_rate": 4.36e-07, |
|
"logits/chosen": -0.30003660917282104, |
|
"logits/rejected": -0.3238281309604645, |
|
"logps/chosen": -345.8999938964844, |
|
"logps/rejected": -427.0, |
|
"loss": 0.01, |
|
"nll_loss": 1.340234398841858, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 11.768750190734863, |
|
"rewards/margins": 15.125, |
|
"rewards/rejected": -3.364453077316284, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.224, |
|
"grad_norm": 0.01881116895313125, |
|
"learning_rate": 4.3155555555555557e-07, |
|
"logits/chosen": -0.29327392578125, |
|
"logits/rejected": -0.25737303495407104, |
|
"logps/chosen": -365.0, |
|
"logps/rejected": -431.3999938964844, |
|
"loss": 0.0036, |
|
"nll_loss": 1.2589843273162842, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 12.668749809265137, |
|
"rewards/margins": 16.03125, |
|
"rewards/rejected": -3.3645873069763184, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.232, |
|
"grad_norm": 0.033150944419182336, |
|
"learning_rate": 4.271111111111111e-07, |
|
"logits/chosen": -0.24541015923023224, |
|
"logits/rejected": -0.2640136778354645, |
|
"logps/chosen": -323.0, |
|
"logps/rejected": -439.0, |
|
"loss": 0.0014, |
|
"nll_loss": 1.3125, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 12.925000190734863, |
|
"rewards/margins": 16.3125, |
|
"rewards/rejected": -3.383984327316284, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.07622395451753027, |
|
"learning_rate": 4.226666666666667e-07, |
|
"logits/chosen": -0.34736329317092896, |
|
"logits/rejected": -0.31829530000686646, |
|
"logps/chosen": -317.1000061035156, |
|
"logps/rejected": -420.0, |
|
"loss": 0.0014, |
|
"nll_loss": 1.361718773841858, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 13.631250381469727, |
|
"rewards/margins": 16.15625, |
|
"rewards/rejected": -2.510937452316284, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.248, |
|
"grad_norm": 0.018683331713432866, |
|
"learning_rate": 4.1822222222222217e-07, |
|
"logits/chosen": -0.3045898377895355, |
|
"logits/rejected": -0.24216309189796448, |
|
"logps/chosen": -341.8999938964844, |
|
"logps/rejected": -413.0, |
|
"loss": 0.002, |
|
"nll_loss": 1.350000023841858, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 13.699999809265137, |
|
"rewards/margins": 16.631250381469727, |
|
"rewards/rejected": -2.9205079078674316, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.256, |
|
"grad_norm": 0.023574936630402193, |
|
"learning_rate": 4.1377777777777776e-07, |
|
"logits/chosen": -0.21635742485523224, |
|
"logits/rejected": -0.23710937798023224, |
|
"logps/chosen": -434.3999938964844, |
|
"logps/rejected": -407.3999938964844, |
|
"loss": 0.0022, |
|
"nll_loss": 1.337499976158142, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 13.274999618530273, |
|
"rewards/margins": 15.524999618530273, |
|
"rewards/rejected": -2.2476563453674316, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.264, |
|
"grad_norm": 0.03038779144828818, |
|
"learning_rate": 4.093333333333333e-07, |
|
"logits/chosen": -0.1950538605451584, |
|
"logits/rejected": -0.179931640625, |
|
"logps/chosen": -350.8999938964844, |
|
"logps/rejected": -419.20001220703125, |
|
"loss": 0.0127, |
|
"nll_loss": 1.1906249523162842, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 13.899999618530273, |
|
"rewards/margins": 16.506250381469727, |
|
"rewards/rejected": -2.590625047683716, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.272, |
|
"grad_norm": 0.04436049431080709, |
|
"learning_rate": 4.048888888888889e-07, |
|
"logits/chosen": -0.24531249701976776, |
|
"logits/rejected": -0.2109375, |
|
"logps/chosen": -297.79998779296875, |
|
"logps/rejected": -413.6000061035156, |
|
"loss": 0.0016, |
|
"nll_loss": 1.203125, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 14.46875, |
|
"rewards/margins": 16.325000762939453, |
|
"rewards/rejected": -1.864843726158142, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.017501398014684644, |
|
"learning_rate": 4.004444444444444e-07, |
|
"logits/chosen": -0.329833984375, |
|
"logits/rejected": -0.34770506620407104, |
|
"logps/chosen": -308.5, |
|
"logps/rejected": -421.6000061035156, |
|
"loss": 0.0012, |
|
"nll_loss": 1.205468773841858, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 14.143750190734863, |
|
"rewards/margins": 16.381250381469727, |
|
"rewards/rejected": -2.240673780441284, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.288, |
|
"grad_norm": 0.028368395543364983, |
|
"learning_rate": 3.96e-07, |
|
"logits/chosen": -0.19560547173023224, |
|
"logits/rejected": -0.0811767578125, |
|
"logps/chosen": -310.3999938964844, |
|
"logps/rejected": -401.6000061035156, |
|
"loss": 0.0013, |
|
"nll_loss": 1.228906273841858, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 15.112500190734863, |
|
"rewards/margins": 16.625, |
|
"rewards/rejected": -1.511328101158142, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.296, |
|
"grad_norm": 0.03826354878861224, |
|
"learning_rate": 3.9155555555555553e-07, |
|
"logits/chosen": -0.21774902939796448, |
|
"logits/rejected": -0.11984863132238388, |
|
"logps/chosen": -343.70001220703125, |
|
"logps/rejected": -396.79998779296875, |
|
"loss": 0.0015, |
|
"nll_loss": 1.181249976158142, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 14.868749618530273, |
|
"rewards/margins": 15.931249618530273, |
|
"rewards/rejected": -1.049218773841858, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.304, |
|
"grad_norm": 0.015874061291675685, |
|
"learning_rate": 3.871111111111111e-07, |
|
"logits/chosen": -0.1663818359375, |
|
"logits/rejected": -0.06330566108226776, |
|
"logps/chosen": -333.70001220703125, |
|
"logps/rejected": -423.0, |
|
"loss": 0.0013, |
|
"nll_loss": 1.303125023841858, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 14.887499809265137, |
|
"rewards/margins": 17.950000762939453, |
|
"rewards/rejected": -3.07421875, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.312, |
|
"grad_norm": 0.019028625638946934, |
|
"learning_rate": 3.8266666666666665e-07, |
|
"logits/chosen": -0.13876953721046448, |
|
"logits/rejected": -0.10498046875, |
|
"logps/chosen": -383.70001220703125, |
|
"logps/rejected": -405.3999938964844, |
|
"loss": 0.0012, |
|
"nll_loss": 1.1964843273162842, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 14.25, |
|
"rewards/margins": 16.649999618530273, |
|
"rewards/rejected": -2.4046874046325684, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.019036097748468133, |
|
"learning_rate": 3.7822222222222224e-07, |
|
"logits/chosen": -0.3286499083042145, |
|
"logits/rejected": -0.28095704317092896, |
|
"logps/chosen": -329.79998779296875, |
|
"logps/rejected": -426.3999938964844, |
|
"loss": 0.0013, |
|
"nll_loss": 1.2902343273162842, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 16.049999237060547, |
|
"rewards/margins": 18.825000762939453, |
|
"rewards/rejected": -2.77069091796875, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.328, |
|
"grad_norm": 0.016730568465352296, |
|
"learning_rate": 3.7377777777777777e-07, |
|
"logits/chosen": -0.11003418266773224, |
|
"logits/rejected": -0.12824706733226776, |
|
"logps/chosen": -382.3999938964844, |
|
"logps/rejected": -416.0, |
|
"loss": 0.0026, |
|
"nll_loss": 1.211328148841858, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 15.206250190734863, |
|
"rewards/margins": 17.387500762939453, |
|
"rewards/rejected": -2.163281202316284, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.336, |
|
"grad_norm": 0.01850424460734669, |
|
"learning_rate": 3.693333333333333e-07, |
|
"logits/chosen": -0.16533203423023224, |
|
"logits/rejected": -0.1209716796875, |
|
"logps/chosen": -356.3999938964844, |
|
"logps/rejected": -413.79998779296875, |
|
"loss": 0.0012, |
|
"nll_loss": 1.1242187023162842, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 15.637499809265137, |
|
"rewards/margins": 17.71875, |
|
"rewards/rejected": -2.0859375, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.344, |
|
"grad_norm": 0.039652389470085536, |
|
"learning_rate": 3.6488888888888884e-07, |
|
"logits/chosen": -0.10117187350988388, |
|
"logits/rejected": -0.05829467624425888, |
|
"logps/chosen": -298.20001220703125, |
|
"logps/rejected": -420.6000061035156, |
|
"loss": 0.0012, |
|
"nll_loss": 1.1785156726837158, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 15.581250190734863, |
|
"rewards/margins": 18.375, |
|
"rewards/rejected": -2.7708983421325684, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.352, |
|
"grad_norm": 0.008419647898269315, |
|
"learning_rate": 3.604444444444444e-07, |
|
"logits/chosen": -0.24436035752296448, |
|
"logits/rejected": -0.1181640625, |
|
"logps/chosen": -296.29998779296875, |
|
"logps/rejected": -423.20001220703125, |
|
"loss": 0.0011, |
|
"nll_loss": 1.058984398841858, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 16.268749237060547, |
|
"rewards/margins": 18.568750381469727, |
|
"rewards/rejected": -2.2855467796325684, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.008531494272693656, |
|
"learning_rate": 3.5599999999999996e-07, |
|
"logits/chosen": -0.06640625, |
|
"logits/rejected": -0.0052490234375, |
|
"logps/chosen": -329.1000061035156, |
|
"logps/rejected": -404.6000061035156, |
|
"loss": 0.0016, |
|
"nll_loss": 1.16796875, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 16.049999237060547, |
|
"rewards/margins": 18.012500762939453, |
|
"rewards/rejected": -1.9445312023162842, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.368, |
|
"grad_norm": 0.021562461095979032, |
|
"learning_rate": 3.5155555555555554e-07, |
|
"logits/chosen": -0.17312011122703552, |
|
"logits/rejected": -0.07587890326976776, |
|
"logps/chosen": -316.8999938964844, |
|
"logps/rejected": -412.79998779296875, |
|
"loss": 0.0012, |
|
"nll_loss": 1.1652343273162842, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 15.381250381469727, |
|
"rewards/margins": 17.799999237060547, |
|
"rewards/rejected": -2.4085936546325684, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.376, |
|
"grad_norm": 0.07020322384992712, |
|
"learning_rate": 3.471111111111111e-07, |
|
"logits/chosen": -0.14414063096046448, |
|
"logits/rejected": -0.12218017876148224, |
|
"logps/chosen": -299.70001220703125, |
|
"logps/rejected": -428.0, |
|
"loss": 0.0017, |
|
"nll_loss": 1.25, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 15.856249809265137, |
|
"rewards/margins": 18.456249237060547, |
|
"rewards/rejected": -2.5699219703674316, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.384, |
|
"grad_norm": 0.012596113248962468, |
|
"learning_rate": 3.4266666666666666e-07, |
|
"logits/chosen": -0.08417968451976776, |
|
"logits/rejected": -0.03115234337747097, |
|
"logps/chosen": -302.79998779296875, |
|
"logps/rejected": -405.0, |
|
"loss": 0.0012, |
|
"nll_loss": 1.0558593273162842, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 15.668749809265137, |
|
"rewards/margins": 17.862499237060547, |
|
"rewards/rejected": -2.1884765625, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.392, |
|
"grad_norm": 0.007929870216062266, |
|
"learning_rate": 3.382222222222222e-07, |
|
"logits/chosen": -0.10902099311351776, |
|
"logits/rejected": -0.11533202975988388, |
|
"logps/chosen": -397.20001220703125, |
|
"logps/rejected": -404.6000061035156, |
|
"loss": 0.0038, |
|
"nll_loss": 1.135156273841858, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 15.993749618530273, |
|
"rewards/margins": 18.862499237060547, |
|
"rewards/rejected": -2.874706983566284, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.008630639702085227, |
|
"learning_rate": 3.337777777777778e-07, |
|
"logits/chosen": -0.26744383573532104, |
|
"logits/rejected": -0.14018554985523224, |
|
"logps/chosen": -318.6000061035156, |
|
"logps/rejected": -434.20001220703125, |
|
"loss": 0.0012, |
|
"nll_loss": 1.166406273841858, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 16.037500381469727, |
|
"rewards/margins": 19.012500762939453, |
|
"rewards/rejected": -2.995312452316284, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.408, |
|
"grad_norm": 0.016229293972070177, |
|
"learning_rate": 3.293333333333333e-07, |
|
"logits/chosen": -0.204833984375, |
|
"logits/rejected": -0.13259276747703552, |
|
"logps/chosen": -322.0, |
|
"logps/rejected": -407.79998779296875, |
|
"loss": 0.0022, |
|
"nll_loss": 1.116796851158142, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 15.512499809265137, |
|
"rewards/margins": 18.28125, |
|
"rewards/rejected": -2.77734375, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.416, |
|
"grad_norm": 0.5999767196042933, |
|
"learning_rate": 3.248888888888889e-07, |
|
"logits/chosen": -0.171142578125, |
|
"logits/rejected": -0.15626220405101776, |
|
"logps/chosen": -320.6000061035156, |
|
"logps/rejected": -428.20001220703125, |
|
"loss": 0.0013, |
|
"nll_loss": 1.1687500476837158, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 16.162500381469727, |
|
"rewards/margins": 19.862499237060547, |
|
"rewards/rejected": -3.703125, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.424, |
|
"grad_norm": 0.30677660404469975, |
|
"learning_rate": 3.204444444444444e-07, |
|
"logits/chosen": -0.20744629204273224, |
|
"logits/rejected": -0.15806885063648224, |
|
"logps/chosen": -359.0, |
|
"logps/rejected": -433.20001220703125, |
|
"loss": 0.0017, |
|
"nll_loss": 1.25390625, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 15.912500381469727, |
|
"rewards/margins": 19.4375, |
|
"rewards/rejected": -3.549999952316284, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.432, |
|
"grad_norm": 0.016384405120536898, |
|
"learning_rate": 3.1599999999999997e-07, |
|
"logits/chosen": -0.11655273288488388, |
|
"logits/rejected": -0.13643798232078552, |
|
"logps/chosen": -284.1000061035156, |
|
"logps/rejected": -408.6000061035156, |
|
"loss": 0.001, |
|
"nll_loss": 1.033593773841858, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 16.856250762939453, |
|
"rewards/margins": 19.043750762939453, |
|
"rewards/rejected": -2.197338819503784, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.02552403140799525, |
|
"learning_rate": 3.115555555555555e-07, |
|
"logits/chosen": -0.1761474609375, |
|
"logits/rejected": -0.08870239555835724, |
|
"logps/chosen": -308.8999938964844, |
|
"logps/rejected": -428.6000061035156, |
|
"loss": 0.0012, |
|
"nll_loss": 1.185156226158142, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 17.200000762939453, |
|
"rewards/margins": 19.887500762939453, |
|
"rewards/rejected": -2.680468797683716, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.448, |
|
"grad_norm": 0.012418272439184573, |
|
"learning_rate": 3.071111111111111e-07, |
|
"logits/chosen": -0.25361329317092896, |
|
"logits/rejected": -0.15983887016773224, |
|
"logps/chosen": -371.29998779296875, |
|
"logps/rejected": -401.3999938964844, |
|
"loss": 0.0012, |
|
"nll_loss": 1.234375, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 16.643749237060547, |
|
"rewards/margins": 18.518749237060547, |
|
"rewards/rejected": -1.90234375, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.456, |
|
"grad_norm": 0.008584798479513097, |
|
"learning_rate": 3.026666666666666e-07, |
|
"logits/chosen": -0.10489501804113388, |
|
"logits/rejected": 0.01859130896627903, |
|
"logps/chosen": -305.3999938964844, |
|
"logps/rejected": -411.20001220703125, |
|
"loss": 0.0012, |
|
"nll_loss": 1.1484375, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 17.762500762939453, |
|
"rewards/margins": 19.325000762939453, |
|
"rewards/rejected": -1.558447241783142, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.464, |
|
"grad_norm": 0.054564295459296766, |
|
"learning_rate": 2.982222222222222e-07, |
|
"logits/chosen": -0.12746581435203552, |
|
"logits/rejected": 0.02424316480755806, |
|
"logps/chosen": -309.5, |
|
"logps/rejected": -403.6000061035156, |
|
"loss": 0.0013, |
|
"nll_loss": 1.109765648841858, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 16.78125, |
|
"rewards/margins": 18.862499237060547, |
|
"rewards/rejected": -2.0740723609924316, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.472, |
|
"grad_norm": 0.021190418364761706, |
|
"learning_rate": 2.937777777777778e-07, |
|
"logits/chosen": -0.12922362983226776, |
|
"logits/rejected": -0.007800293155014515, |
|
"logps/chosen": -327.8999938964844, |
|
"logps/rejected": -372.6000061035156, |
|
"loss": 0.0012, |
|
"nll_loss": 1.1261718273162842, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 16.299999237060547, |
|
"rewards/margins": 17.212499618530273, |
|
"rewards/rejected": -0.9365234375, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.1273137285714855, |
|
"learning_rate": 2.8933333333333333e-07, |
|
"logits/chosen": -0.02729492262005806, |
|
"logits/rejected": 0.03876953199505806, |
|
"logps/chosen": -349.6000061035156, |
|
"logps/rejected": -420.6000061035156, |
|
"loss": 0.0013, |
|
"nll_loss": 1.221093773841858, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 16.512500762939453, |
|
"rewards/margins": 20.143749237060547, |
|
"rewards/rejected": -3.608593702316284, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.488, |
|
"grad_norm": 0.11206326927393837, |
|
"learning_rate": 2.848888888888889e-07, |
|
"logits/chosen": -0.05439453199505806, |
|
"logits/rejected": 0.04978637769818306, |
|
"logps/chosen": -288.8999938964844, |
|
"logps/rejected": -419.0, |
|
"loss": 0.0011, |
|
"nll_loss": 1.078125, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 16.268749237060547, |
|
"rewards/margins": 18.625, |
|
"rewards/rejected": -2.3518919944763184, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.496, |
|
"grad_norm": 0.006737027469415457, |
|
"learning_rate": 2.8044444444444445e-07, |
|
"logits/chosen": -0.06257323920726776, |
|
"logits/rejected": -0.02427978441119194, |
|
"logps/chosen": -282.6000061035156, |
|
"logps/rejected": -422.20001220703125, |
|
"loss": 0.0011, |
|
"nll_loss": 1.094140648841858, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 16.34375, |
|
"rewards/margins": 19.587499618530273, |
|
"rewards/rejected": -3.24609375, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.504, |
|
"grad_norm": 0.007960696014384233, |
|
"learning_rate": 2.7600000000000004e-07, |
|
"logits/chosen": -0.26665037870407104, |
|
"logits/rejected": -0.16494140028953552, |
|
"logps/chosen": -271.0, |
|
"logps/rejected": -415.3999938964844, |
|
"loss": 0.0011, |
|
"nll_loss": 1.0574219226837158, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 17.25, |
|
"rewards/margins": 19.387500762939453, |
|
"rewards/rejected": -2.118847608566284, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.512, |
|
"grad_norm": 0.012795263557863883, |
|
"learning_rate": 2.715555555555555e-07, |
|
"logits/chosen": -0.03950195387005806, |
|
"logits/rejected": 0.03125, |
|
"logps/chosen": -325.8999938964844, |
|
"logps/rejected": -407.79998779296875, |
|
"loss": 0.0111, |
|
"nll_loss": 1.0187499523162842, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 17.393749237060547, |
|
"rewards/margins": 19.674999237060547, |
|
"rewards/rejected": -2.2586913108825684, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.01127355606086418, |
|
"learning_rate": 2.671111111111111e-07, |
|
"logits/chosen": -0.078369140625, |
|
"logits/rejected": -0.0323486328125, |
|
"logps/chosen": -289.0, |
|
"logps/rejected": -417.0, |
|
"loss": 0.001, |
|
"nll_loss": 1.0125000476837158, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 17.318750381469727, |
|
"rewards/margins": 19.424999237060547, |
|
"rewards/rejected": -2.0975098609924316, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.528, |
|
"grad_norm": 0.019808530454758836, |
|
"learning_rate": 2.6266666666666664e-07, |
|
"logits/chosen": -0.04782714694738388, |
|
"logits/rejected": 0.09812011569738388, |
|
"logps/chosen": -323.6000061035156, |
|
"logps/rejected": -409.6000061035156, |
|
"loss": 0.0011, |
|
"nll_loss": 1.1015625, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 17.774999618530273, |
|
"rewards/margins": 20.549999237060547, |
|
"rewards/rejected": -2.7822265625, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.536, |
|
"grad_norm": 0.021732913992860835, |
|
"learning_rate": 2.582222222222222e-07, |
|
"logits/chosen": 0.01522216759622097, |
|
"logits/rejected": 0.11748047173023224, |
|
"logps/chosen": -285.20001220703125, |
|
"logps/rejected": -419.20001220703125, |
|
"loss": 0.0011, |
|
"nll_loss": 1.078125, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 17.850000381469727, |
|
"rewards/margins": 19.487499237060547, |
|
"rewards/rejected": -1.6281249523162842, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.544, |
|
"grad_norm": 0.005724681559177148, |
|
"learning_rate": 2.5377777777777776e-07, |
|
"logits/chosen": 0.0006347656017169356, |
|
"logits/rejected": 0.07487793266773224, |
|
"logps/chosen": -316.0, |
|
"logps/rejected": -392.0, |
|
"loss": 0.0024, |
|
"nll_loss": 0.9683593511581421, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 17.106250762939453, |
|
"rewards/margins": 18.868749618530273, |
|
"rewards/rejected": -1.749609351158142, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.552, |
|
"grad_norm": 0.00968898581539461, |
|
"learning_rate": 2.493333333333333e-07, |
|
"logits/chosen": -0.03961181640625, |
|
"logits/rejected": 0.116943359375, |
|
"logps/chosen": -282.0, |
|
"logps/rejected": -428.20001220703125, |
|
"loss": 0.0011, |
|
"nll_loss": 1.0832030773162842, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 17.96875, |
|
"rewards/margins": 21.024999618530273, |
|
"rewards/rejected": -3.087207078933716, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.02206940002406898, |
|
"learning_rate": 2.448888888888889e-07, |
|
"logits/chosen": 0.05800781399011612, |
|
"logits/rejected": 0.04348144680261612, |
|
"logps/chosen": -305.8999938964844, |
|
"logps/rejected": -411.20001220703125, |
|
"loss": 0.0012, |
|
"nll_loss": 1.172265648841858, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 17.575000762939453, |
|
"rewards/margins": 19.975000381469727, |
|
"rewards/rejected": -2.412109375, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.568, |
|
"grad_norm": 0.01284076138380296, |
|
"learning_rate": 2.404444444444444e-07, |
|
"logits/chosen": -0.03164062649011612, |
|
"logits/rejected": 0.05482788011431694, |
|
"logps/chosen": -294.6000061035156, |
|
"logps/rejected": -401.6000061035156, |
|
"loss": 0.0011, |
|
"nll_loss": 1.074609398841858, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 18.200000762939453, |
|
"rewards/margins": 19.387500762939453, |
|
"rewards/rejected": -1.1970703601837158, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.576, |
|
"grad_norm": 0.007678737859862069, |
|
"learning_rate": 2.3599999999999997e-07, |
|
"logits/chosen": -0.03760986402630806, |
|
"logits/rejected": 0.05534667894244194, |
|
"logps/chosen": -310.20001220703125, |
|
"logps/rejected": -413.79998779296875, |
|
"loss": 0.0013, |
|
"nll_loss": 1.169531226158142, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 17.600000381469727, |
|
"rewards/margins": 20.212499618530273, |
|
"rewards/rejected": -2.596874952316284, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.584, |
|
"grad_norm": 0.020744736487327976, |
|
"learning_rate": 2.3155555555555553e-07, |
|
"logits/chosen": -0.02609863318502903, |
|
"logits/rejected": 0.0072021484375, |
|
"logps/chosen": -287.0, |
|
"logps/rejected": -421.79998779296875, |
|
"loss": 0.001, |
|
"nll_loss": 1.001953125, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 17.375, |
|
"rewards/margins": 20.887500762939453, |
|
"rewards/rejected": -3.515942335128784, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.592, |
|
"grad_norm": 0.010296671008794668, |
|
"learning_rate": 2.2711111111111112e-07, |
|
"logits/chosen": -0.06264648586511612, |
|
"logits/rejected": -0.03441772609949112, |
|
"logps/chosen": -281.20001220703125, |
|
"logps/rejected": -420.79998779296875, |
|
"loss": 0.001, |
|
"nll_loss": 0.9945312738418579, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 17.787500381469727, |
|
"rewards/margins": 20.075000762939453, |
|
"rewards/rejected": -2.292163133621216, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.01984847155435085, |
|
"learning_rate": 2.2266666666666668e-07, |
|
"logits/chosen": -0.0015625000232830644, |
|
"logits/rejected": 0.05145263671875, |
|
"logps/chosen": -328.6000061035156, |
|
"logps/rejected": -402.20001220703125, |
|
"loss": 0.0013, |
|
"nll_loss": 0.98046875, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 17.600000381469727, |
|
"rewards/margins": 19.737499237060547, |
|
"rewards/rejected": -2.1128907203674316, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.608, |
|
"grad_norm": 0.011414762899716836, |
|
"learning_rate": 2.1822222222222224e-07, |
|
"logits/chosen": 0.06040038913488388, |
|
"logits/rejected": -0.010241699405014515, |
|
"logps/chosen": -287.70001220703125, |
|
"logps/rejected": -402.79998779296875, |
|
"loss": 0.001, |
|
"nll_loss": 1.03125, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 17.362499237060547, |
|
"rewards/margins": 19.549999237060547, |
|
"rewards/rejected": -2.1845703125, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.616, |
|
"grad_norm": 0.011751018637081315, |
|
"learning_rate": 2.1377777777777777e-07, |
|
"logits/chosen": -0.10244140774011612, |
|
"logits/rejected": -0.09792480617761612, |
|
"logps/chosen": -279.29998779296875, |
|
"logps/rejected": -444.20001220703125, |
|
"loss": 0.0011, |
|
"nll_loss": 1.045312523841858, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 18.112499237060547, |
|
"rewards/margins": 21.924999237060547, |
|
"rewards/rejected": -3.8414063453674316, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.624, |
|
"grad_norm": 0.010739426686833121, |
|
"learning_rate": 2.0933333333333333e-07, |
|
"logits/chosen": -0.06641845405101776, |
|
"logits/rejected": -0.03793945163488388, |
|
"logps/chosen": -283.6000061035156, |
|
"logps/rejected": -433.6000061035156, |
|
"loss": 0.001, |
|
"nll_loss": 1.0441405773162842, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 18.043750762939453, |
|
"rewards/margins": 22.350000381469727, |
|
"rewards/rejected": -4.30859375, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.632, |
|
"grad_norm": 0.008894669609579957, |
|
"learning_rate": 2.048888888888889e-07, |
|
"logits/chosen": -0.13934326171875, |
|
"logits/rejected": 0.03299560397863388, |
|
"logps/chosen": -269.0, |
|
"logps/rejected": -435.0, |
|
"loss": 0.0026, |
|
"nll_loss": 1.0304687023162842, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 17.850000381469727, |
|
"rewards/margins": 21.799999237060547, |
|
"rewards/rejected": -3.9339842796325684, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.00781796371528786, |
|
"learning_rate": 2.0044444444444445e-07, |
|
"logits/chosen": -0.136871337890625, |
|
"logits/rejected": -0.06427001953125, |
|
"logps/chosen": -301.79998779296875, |
|
"logps/rejected": -441.20001220703125, |
|
"loss": 0.0011, |
|
"nll_loss": 1.0535156726837158, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 17.856250762939453, |
|
"rewards/margins": 22.362499237060547, |
|
"rewards/rejected": -4.500781059265137, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.648, |
|
"grad_norm": 0.006941095936510498, |
|
"learning_rate": 1.96e-07, |
|
"logits/chosen": -0.06254883110523224, |
|
"logits/rejected": -0.03428955003619194, |
|
"logps/chosen": -317.79998779296875, |
|
"logps/rejected": -424.6000061035156, |
|
"loss": 0.001, |
|
"nll_loss": 1.033593773841858, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 17.875, |
|
"rewards/margins": 20.450000762939453, |
|
"rewards/rejected": -2.5609374046325684, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.656, |
|
"grad_norm": 0.01161314226877997, |
|
"learning_rate": 1.9155555555555554e-07, |
|
"logits/chosen": -0.06083984300494194, |
|
"logits/rejected": 0.07171630859375, |
|
"logps/chosen": -300.0, |
|
"logps/rejected": -426.3999938964844, |
|
"loss": 0.0011, |
|
"nll_loss": 1.101171851158142, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 18.8125, |
|
"rewards/margins": 22.337499618530273, |
|
"rewards/rejected": -3.5337891578674316, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.664, |
|
"grad_norm": 0.04491917017356749, |
|
"learning_rate": 1.871111111111111e-07, |
|
"logits/chosen": -0.02180175855755806, |
|
"logits/rejected": -0.0004760742303915322, |
|
"logps/chosen": -272.1000061035156, |
|
"logps/rejected": -424.20001220703125, |
|
"loss": 0.001, |
|
"nll_loss": 1.0390625, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 18.962499618530273, |
|
"rewards/margins": 22.612499237060547, |
|
"rewards/rejected": -3.6480469703674316, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.672, |
|
"grad_norm": 0.014327809455407186, |
|
"learning_rate": 1.8266666666666666e-07, |
|
"logits/chosen": -0.06074218824505806, |
|
"logits/rejected": -0.0008728027460165322, |
|
"logps/chosen": -304.8999938964844, |
|
"logps/rejected": -395.3999938964844, |
|
"loss": 0.0016, |
|
"nll_loss": 1.0378906726837158, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 18.612499237060547, |
|
"rewards/margins": 19.75, |
|
"rewards/rejected": -1.1486327648162842, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.010011390264830125, |
|
"learning_rate": 1.7822222222222222e-07, |
|
"logits/chosen": -0.143798828125, |
|
"logits/rejected": -0.04060058668255806, |
|
"logps/chosen": -308.1000061035156, |
|
"logps/rejected": -433.6000061035156, |
|
"loss": 0.0031, |
|
"nll_loss": 1.0398437976837158, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 18.887500762939453, |
|
"rewards/margins": 21.774999618530273, |
|
"rewards/rejected": -2.8993163108825684, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.688, |
|
"grad_norm": 0.006838199221939372, |
|
"learning_rate": 1.7377777777777778e-07, |
|
"logits/chosen": 0.041259765625, |
|
"logits/rejected": 0.13017578423023224, |
|
"logps/chosen": -293.0, |
|
"logps/rejected": -432.0, |
|
"loss": 0.001, |
|
"nll_loss": 1.0363280773162842, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 18.375, |
|
"rewards/margins": 21.825000762939453, |
|
"rewards/rejected": -3.4496092796325684, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.696, |
|
"grad_norm": 0.011644152757164477, |
|
"learning_rate": 1.6933333333333334e-07, |
|
"logits/chosen": 0.08931884914636612, |
|
"logits/rejected": 0.07216797024011612, |
|
"logps/chosen": -299.3999938964844, |
|
"logps/rejected": -411.0, |
|
"loss": 0.001, |
|
"nll_loss": 0.9429687261581421, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 19.487499237060547, |
|
"rewards/margins": 21.487499237060547, |
|
"rewards/rejected": -2.010546922683716, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.704, |
|
"grad_norm": 0.01033541957175912, |
|
"learning_rate": 1.6488888888888887e-07, |
|
"logits/chosen": 0.09145507961511612, |
|
"logits/rejected": 0.15330810844898224, |
|
"logps/chosen": -304.79998779296875, |
|
"logps/rejected": -400.20001220703125, |
|
"loss": 0.0009, |
|
"nll_loss": 0.8675781488418579, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 18.4375, |
|
"rewards/margins": 20.612499237060547, |
|
"rewards/rejected": -2.170703172683716, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.712, |
|
"grad_norm": 0.008717665163684966, |
|
"learning_rate": 1.6044444444444443e-07, |
|
"logits/chosen": 0.10676269233226776, |
|
"logits/rejected": 0.12166748195886612, |
|
"logps/chosen": -274.5, |
|
"logps/rejected": -426.6000061035156, |
|
"loss": 0.001, |
|
"nll_loss": 0.9921875, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 19.149999618530273, |
|
"rewards/margins": 22.774999618530273, |
|
"rewards/rejected": -3.625, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.007924558579110246, |
|
"learning_rate": 1.56e-07, |
|
"logits/chosen": -0.11391601711511612, |
|
"logits/rejected": -0.02934570237994194, |
|
"logps/chosen": -266.8999938964844, |
|
"logps/rejected": -427.20001220703125, |
|
"loss": 0.0035, |
|
"nll_loss": 0.981249988079071, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 18.662500381469727, |
|
"rewards/margins": 21.200000762939453, |
|
"rewards/rejected": -2.541015625, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.728, |
|
"grad_norm": 0.07578643720533651, |
|
"learning_rate": 1.5155555555555555e-07, |
|
"logits/chosen": 0.07387695461511612, |
|
"logits/rejected": 0.15861816704273224, |
|
"logps/chosen": -239.5500030517578, |
|
"logps/rejected": -417.3999938964844, |
|
"loss": 0.0011, |
|
"nll_loss": 1.0226562023162842, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 18.8125, |
|
"rewards/margins": 21.987499237060547, |
|
"rewards/rejected": -3.1800780296325684, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.736, |
|
"grad_norm": 0.009494584129256885, |
|
"learning_rate": 1.4711111111111111e-07, |
|
"logits/chosen": -0.004748535342514515, |
|
"logits/rejected": 0.05303344875574112, |
|
"logps/chosen": -327.79998779296875, |
|
"logps/rejected": -390.79998779296875, |
|
"loss": 0.001, |
|
"nll_loss": 1.0050780773162842, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 19.056249618530273, |
|
"rewards/margins": 20.575000762939453, |
|
"rewards/rejected": -1.507421851158142, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.744, |
|
"grad_norm": 0.017667443139199403, |
|
"learning_rate": 1.4266666666666665e-07, |
|
"logits/chosen": -0.054931640625, |
|
"logits/rejected": 0.02968749962747097, |
|
"logps/chosen": -261.0, |
|
"logps/rejected": -418.0, |
|
"loss": 0.001, |
|
"nll_loss": 0.9671875238418579, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 19.137500762939453, |
|
"rewards/margins": 21.331249237060547, |
|
"rewards/rejected": -2.1927733421325684, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.752, |
|
"grad_norm": 0.010899226100416153, |
|
"learning_rate": 1.382222222222222e-07, |
|
"logits/chosen": -0.0184326171875, |
|
"logits/rejected": 0.11514892429113388, |
|
"logps/chosen": -288.6000061035156, |
|
"logps/rejected": -408.3999938964844, |
|
"loss": 0.0465, |
|
"nll_loss": 1.074609398841858, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 19.024999618530273, |
|
"rewards/margins": 21.262500762939453, |
|
"rewards/rejected": -2.2400145530700684, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.025779260274360533, |
|
"learning_rate": 1.3377777777777777e-07, |
|
"logits/chosen": 0.02338867262005806, |
|
"logits/rejected": 0.10743407905101776, |
|
"logps/chosen": -330.6000061035156, |
|
"logps/rejected": -398.20001220703125, |
|
"loss": 0.001, |
|
"nll_loss": 1.0070312023162842, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 18.756250381469727, |
|
"rewards/margins": 20.049999237060547, |
|
"rewards/rejected": -1.2770264148712158, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.768, |
|
"grad_norm": 0.025413812683072193, |
|
"learning_rate": 1.2933333333333333e-07, |
|
"logits/chosen": 0.05156249925494194, |
|
"logits/rejected": 0.17954102158546448, |
|
"logps/chosen": -291.3999938964844, |
|
"logps/rejected": -413.0, |
|
"loss": 0.001, |
|
"nll_loss": 0.958984375, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 19.924999237060547, |
|
"rewards/margins": 21.637500762939453, |
|
"rewards/rejected": -1.715429663658142, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.776, |
|
"grad_norm": 0.008806349125691143, |
|
"learning_rate": 1.2488888888888889e-07, |
|
"logits/chosen": 0.02752685546875, |
|
"logits/rejected": 0.08073730766773224, |
|
"logps/chosen": -264.6000061035156, |
|
"logps/rejected": -407.6000061035156, |
|
"loss": 0.0009, |
|
"nll_loss": 0.8984375, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 19.0, |
|
"rewards/margins": 20.237499237060547, |
|
"rewards/rejected": -1.255761742591858, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.784, |
|
"grad_norm": 0.0098690579475145, |
|
"learning_rate": 1.2044444444444445e-07, |
|
"logits/chosen": 0.01823730394244194, |
|
"logits/rejected": 0.11467285454273224, |
|
"logps/chosen": -310.20001220703125, |
|
"logps/rejected": -413.6000061035156, |
|
"loss": 0.001, |
|
"nll_loss": 0.9722656011581421, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 20.237499237060547, |
|
"rewards/margins": 22.325000762939453, |
|
"rewards/rejected": -2.0926756858825684, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.792, |
|
"grad_norm": 0.010627117006645067, |
|
"learning_rate": 1.16e-07, |
|
"logits/chosen": -0.07388915866613388, |
|
"logits/rejected": 0.07421875, |
|
"logps/chosen": -342.8999938964844, |
|
"logps/rejected": -412.79998779296875, |
|
"loss": 0.001, |
|
"nll_loss": 1.040624976158142, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 18.96875, |
|
"rewards/margins": 21.049999237060547, |
|
"rewards/rejected": -2.0562500953674316, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.04218956204057828, |
|
"learning_rate": 1.1155555555555555e-07, |
|
"logits/chosen": -0.0010253905784338713, |
|
"logits/rejected": 0.06143798679113388, |
|
"logps/chosen": -290.70001220703125, |
|
"logps/rejected": -412.6000061035156, |
|
"loss": 0.0011, |
|
"nll_loss": 1.09375, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 19.962499618530273, |
|
"rewards/margins": 22.0, |
|
"rewards/rejected": -2.0201172828674316, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.808, |
|
"grad_norm": 0.014065236545009229, |
|
"learning_rate": 1.0711111111111111e-07, |
|
"logits/chosen": 0.20627442002296448, |
|
"logits/rejected": 0.24067382514476776, |
|
"logps/chosen": -263.79998779296875, |
|
"logps/rejected": -406.0, |
|
"loss": 0.0009, |
|
"nll_loss": 0.910937488079071, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 19.4375, |
|
"rewards/margins": 21.524999618530273, |
|
"rewards/rejected": -2.0770020484924316, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.816, |
|
"grad_norm": 0.008101550660192351, |
|
"learning_rate": 1.0266666666666666e-07, |
|
"logits/chosen": 0.13178710639476776, |
|
"logits/rejected": 0.11772461235523224, |
|
"logps/chosen": -241.75, |
|
"logps/rejected": -394.20001220703125, |
|
"loss": 0.0009, |
|
"nll_loss": 0.893359363079071, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 20.100000381469727, |
|
"rewards/margins": 20.950000762939453, |
|
"rewards/rejected": -0.851611316204071, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.824, |
|
"grad_norm": 0.020212312188331298, |
|
"learning_rate": 9.822222222222222e-08, |
|
"logits/chosen": 0.03201904147863388, |
|
"logits/rejected": 0.09085693210363388, |
|
"logps/chosen": -283.8999938964844, |
|
"logps/rejected": -410.79998779296875, |
|
"loss": 0.001, |
|
"nll_loss": 0.98828125, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 20.662500381469727, |
|
"rewards/margins": 22.450000762939453, |
|
"rewards/rejected": -1.788671851158142, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.832, |
|
"grad_norm": 0.01018779281956963, |
|
"learning_rate": 9.377777777777778e-08, |
|
"logits/chosen": 0.005297851748764515, |
|
"logits/rejected": 0.03730468824505806, |
|
"logps/chosen": -293.1000061035156, |
|
"logps/rejected": -418.20001220703125, |
|
"loss": 0.0391, |
|
"nll_loss": 1.017578125, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 20.100000381469727, |
|
"rewards/margins": 21.225000381469727, |
|
"rewards/rejected": -1.11474609375, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.02497328379034309, |
|
"learning_rate": 8.933333333333333e-08, |
|
"logits/chosen": 0.03041992150247097, |
|
"logits/rejected": 0.05394287034869194, |
|
"logps/chosen": -302.5, |
|
"logps/rejected": -412.79998779296875, |
|
"loss": 0.0011, |
|
"nll_loss": 1.077734351158142, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 20.100000381469727, |
|
"rewards/margins": 21.3125, |
|
"rewards/rejected": -1.21630859375, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.848, |
|
"grad_norm": 0.012861381907556958, |
|
"learning_rate": 8.488888888888889e-08, |
|
"logits/chosen": 0.03743896633386612, |
|
"logits/rejected": 0.131591796875, |
|
"logps/chosen": -272.79998779296875, |
|
"logps/rejected": -407.20001220703125, |
|
"loss": 0.0009, |
|
"nll_loss": 0.9085937738418579, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 20.412500381469727, |
|
"rewards/margins": 21.924999237060547, |
|
"rewards/rejected": -1.5066406726837158, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.856, |
|
"grad_norm": 0.009207668604585216, |
|
"learning_rate": 8.044444444444445e-08, |
|
"logits/chosen": 0.04277343675494194, |
|
"logits/rejected": 0.11362304538488388, |
|
"logps/chosen": -288.20001220703125, |
|
"logps/rejected": -404.6000061035156, |
|
"loss": 0.0011, |
|
"nll_loss": 1.010156273841858, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 19.450000762939453, |
|
"rewards/margins": 20.737499237060547, |
|
"rewards/rejected": -1.2658202648162842, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.864, |
|
"grad_norm": 0.014789555122865826, |
|
"learning_rate": 7.599999999999999e-08, |
|
"logits/chosen": -0.03659667819738388, |
|
"logits/rejected": 0.09859619289636612, |
|
"logps/chosen": -269.79998779296875, |
|
"logps/rejected": -416.20001220703125, |
|
"loss": 0.001, |
|
"nll_loss": 1.0222656726837158, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 19.387500762939453, |
|
"rewards/margins": 21.487499237060547, |
|
"rewards/rejected": -2.081835985183716, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.872, |
|
"grad_norm": 0.01967144963219391, |
|
"learning_rate": 7.155555555555555e-08, |
|
"logits/chosen": 0.050048828125, |
|
"logits/rejected": 0.12807616591453552, |
|
"logps/chosen": -283.79998779296875, |
|
"logps/rejected": -398.3999938964844, |
|
"loss": 0.001, |
|
"nll_loss": 0.9683593511581421, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 19.975000381469727, |
|
"rewards/margins": 21.174999237060547, |
|
"rewards/rejected": -1.1923339366912842, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.01262612387400558, |
|
"learning_rate": 6.71111111111111e-08, |
|
"logits/chosen": -0.08295898139476776, |
|
"logits/rejected": 0.06423339992761612, |
|
"logps/chosen": -340.5, |
|
"logps/rejected": -409.3999938964844, |
|
"loss": 0.001, |
|
"nll_loss": 1.01171875, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 19.274999618530273, |
|
"rewards/margins": 21.024999618530273, |
|
"rewards/rejected": -1.7332031726837158, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.888, |
|
"grad_norm": 0.013672600008616023, |
|
"learning_rate": 6.266666666666666e-08, |
|
"logits/chosen": 0.16115722060203552, |
|
"logits/rejected": 0.17141112685203552, |
|
"logps/chosen": -246.89999389648438, |
|
"logps/rejected": -415.20001220703125, |
|
"loss": 0.0009, |
|
"nll_loss": 0.9058593511581421, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 19.475000381469727, |
|
"rewards/margins": 21.612499237060547, |
|
"rewards/rejected": -2.133984327316284, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.896, |
|
"grad_norm": 0.01751993042439502, |
|
"learning_rate": 5.822222222222222e-08, |
|
"logits/chosen": 0.06224365159869194, |
|
"logits/rejected": 0.15957030653953552, |
|
"logps/chosen": -226.8000030517578, |
|
"logps/rejected": -421.79998779296875, |
|
"loss": 0.001, |
|
"nll_loss": 0.981249988079071, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 21.274999618530273, |
|
"rewards/margins": 22.875, |
|
"rewards/rejected": -1.5966796875, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.904, |
|
"grad_norm": 0.014517125852295223, |
|
"learning_rate": 5.377777777777778e-08, |
|
"logits/chosen": -0.0188446044921875, |
|
"logits/rejected": 0.1414794921875, |
|
"logps/chosen": -283.5, |
|
"logps/rejected": -416.20001220703125, |
|
"loss": 0.001, |
|
"nll_loss": 0.995312511920929, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 19.987499237060547, |
|
"rewards/margins": 22.587499618530273, |
|
"rewards/rejected": -2.6039061546325684, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.912, |
|
"grad_norm": 0.02080473594604604, |
|
"learning_rate": 4.933333333333333e-08, |
|
"logits/chosen": 0.041839599609375, |
|
"logits/rejected": 0.14179687201976776, |
|
"logps/chosen": -338.70001220703125, |
|
"logps/rejected": -391.3999938964844, |
|
"loss": 0.0009, |
|
"nll_loss": 0.916796863079071, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 19.575000762939453, |
|
"rewards/margins": 21.100000381469727, |
|
"rewards/rejected": -1.5293457508087158, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 0.020719630349212197, |
|
"learning_rate": 4.4888888888888885e-08, |
|
"logits/chosen": 0.04411621019244194, |
|
"logits/rejected": 0.07084961235523224, |
|
"logps/chosen": -269.3999938964844, |
|
"logps/rejected": -404.79998779296875, |
|
"loss": 0.001, |
|
"nll_loss": 0.961718738079071, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 19.725000381469727, |
|
"rewards/margins": 21.475000381469727, |
|
"rewards/rejected": -1.7581055164337158, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.928, |
|
"grad_norm": 0.008502519454361718, |
|
"learning_rate": 4.044444444444444e-08, |
|
"logits/chosen": -0.06318359076976776, |
|
"logits/rejected": 0.095306396484375, |
|
"logps/chosen": -291.3999938964844, |
|
"logps/rejected": -408.0, |
|
"loss": 0.0009, |
|
"nll_loss": 0.875781238079071, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 20.287500381469727, |
|
"rewards/margins": 22.037500381469727, |
|
"rewards/rejected": -1.7532227039337158, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.936, |
|
"grad_norm": 0.010182005409104732, |
|
"learning_rate": 3.6e-08, |
|
"logits/chosen": -0.02708740159869194, |
|
"logits/rejected": 0.07539062201976776, |
|
"logps/chosen": -297.70001220703125, |
|
"logps/rejected": -411.3999938964844, |
|
"loss": 0.0011, |
|
"nll_loss": 1.0515625476837158, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 19.549999237060547, |
|
"rewards/margins": 21.25, |
|
"rewards/rejected": -1.7091796398162842, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.944, |
|
"grad_norm": 0.014883019162230187, |
|
"learning_rate": 3.155555555555556e-08, |
|
"logits/chosen": -0.02933349646627903, |
|
"logits/rejected": 0.08122558891773224, |
|
"logps/chosen": -297.70001220703125, |
|
"logps/rejected": -410.0, |
|
"loss": 0.0017, |
|
"nll_loss": 0.9140625, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 19.962499618530273, |
|
"rewards/margins": 21.512500762939453, |
|
"rewards/rejected": -1.5568358898162842, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.952, |
|
"grad_norm": 0.006673830289265544, |
|
"learning_rate": 2.7111111111111108e-08, |
|
"logits/chosen": -0.1202392578125, |
|
"logits/rejected": -0.01416015625, |
|
"logps/chosen": -267.1000061035156, |
|
"logps/rejected": -421.79998779296875, |
|
"loss": 0.0015, |
|
"nll_loss": 0.9007812738418579, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 18.825000762939453, |
|
"rewards/margins": 21.350000381469727, |
|
"rewards/rejected": -2.530468702316284, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.016858387886967145, |
|
"learning_rate": 2.2666666666666668e-08, |
|
"logits/chosen": 0.02562255784869194, |
|
"logits/rejected": 0.12241210788488388, |
|
"logps/chosen": -303.20001220703125, |
|
"logps/rejected": -397.0, |
|
"loss": 0.001, |
|
"nll_loss": 1.024999976158142, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 20.774999618530273, |
|
"rewards/margins": 22.137500762939453, |
|
"rewards/rejected": -1.382421851158142, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.968, |
|
"grad_norm": 0.014032183883427854, |
|
"learning_rate": 1.822222222222222e-08, |
|
"logits/chosen": 0.02207031287252903, |
|
"logits/rejected": 0.04111327975988388, |
|
"logps/chosen": -264.3999938964844, |
|
"logps/rejected": -418.20001220703125, |
|
"loss": 0.0009, |
|
"nll_loss": 0.9339843988418579, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 19.412500381469727, |
|
"rewards/margins": 22.100000381469727, |
|
"rewards/rejected": -2.688281297683716, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.976, |
|
"grad_norm": 0.013396264787771736, |
|
"learning_rate": 1.3777777777777778e-08, |
|
"logits/chosen": 0.06943359225988388, |
|
"logits/rejected": 0.160491943359375, |
|
"logps/chosen": -273.25, |
|
"logps/rejected": -424.6000061035156, |
|
"loss": 0.001, |
|
"nll_loss": 0.955859363079071, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 20.075000762939453, |
|
"rewards/margins": 22.850000381469727, |
|
"rewards/rejected": -2.7822265625, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.984, |
|
"grad_norm": 0.01902150518424847, |
|
"learning_rate": 9.333333333333334e-09, |
|
"logits/chosen": 0.12221679836511612, |
|
"logits/rejected": 0.22767333686351776, |
|
"logps/chosen": -282.20001220703125, |
|
"logps/rejected": -382.79998779296875, |
|
"loss": 0.0008, |
|
"nll_loss": 0.8285156488418579, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 18.587499618530273, |
|
"rewards/margins": 19.037500381469727, |
|
"rewards/rejected": -0.44819337129592896, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.992, |
|
"grad_norm": 0.008528899989414147, |
|
"learning_rate": 4.888888888888888e-09, |
|
"logits/chosen": 0.05415039137005806, |
|
"logits/rejected": 0.09858398139476776, |
|
"logps/chosen": -258.70001220703125, |
|
"logps/rejected": -415.20001220703125, |
|
"loss": 0.0009, |
|
"nll_loss": 0.880859375, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 20.700000762939453, |
|
"rewards/margins": 22.612499237060547, |
|
"rewards/rejected": -1.9093749523162842, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.012294810094826115, |
|
"learning_rate": 4.4444444444444443e-10, |
|
"logits/chosen": -0.02900390699505806, |
|
"logits/rejected": 0.1044921875, |
|
"logps/chosen": -266.1000061035156, |
|
"logps/rejected": -404.0, |
|
"loss": 0.0011, |
|
"nll_loss": 0.961718738079071, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 20.8125, |
|
"rewards/margins": 21.975000381469727, |
|
"rewards/rejected": -1.18310546875, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_logits/chosen": -0.11485877633094788, |
|
"eval_logits/rejected": 0.04485614597797394, |
|
"eval_logps/chosen": -333.0769348144531, |
|
"eval_logps/rejected": -407.5384521484375, |
|
"eval_loss": 0.0016917419852688909, |
|
"eval_nll_loss": 0.98046875, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": 18.94230842590332, |
|
"eval_rewards/margins": 20.269229888916016, |
|
"eval_rewards/rejected": -1.33984375, |
|
"eval_runtime": 8.6236, |
|
"eval_samples_per_second": 11.596, |
|
"eval_steps_per_second": 1.507, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 1250, |
|
"total_flos": 0.0, |
|
"train_loss": 0.0, |
|
"train_runtime": 0.984, |
|
"train_samples_per_second": 10161.128, |
|
"train_steps_per_second": 1270.268 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1250, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|