|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 100, |
|
"global_step": 4164, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0007204610951008645, |
|
"grad_norm": 14.58157977905889, |
|
"learning_rate": 1.199040767386091e-10, |
|
"logits/chosen": -1.901450514793396, |
|
"logits/rejected": -1.9076323509216309, |
|
"logps/chosen": -0.8524526953697205, |
|
"logps/rejected": -0.9626365900039673, |
|
"loss": 1.1927, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.704905390739441, |
|
"rewards/margins": 0.22036786377429962, |
|
"rewards/rejected": -1.9252731800079346, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.007204610951008645, |
|
"grad_norm": 17.709463159121455, |
|
"learning_rate": 1.199040767386091e-09, |
|
"logits/chosen": -2.020684242248535, |
|
"logits/rejected": -2.0064282417297363, |
|
"logps/chosen": -1.0048482418060303, |
|
"logps/rejected": -1.1098697185516357, |
|
"loss": 1.216, |
|
"rewards/accuracies": 0.5208333134651184, |
|
"rewards/chosen": -2.0096964836120605, |
|
"rewards/margins": 0.21004274487495422, |
|
"rewards/rejected": -2.2197394371032715, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01440922190201729, |
|
"grad_norm": 22.640302051500377, |
|
"learning_rate": 2.398081534772182e-09, |
|
"logits/chosen": -2.021089792251587, |
|
"logits/rejected": -2.0176689624786377, |
|
"logps/chosen": -1.0516496896743774, |
|
"logps/rejected": -1.1834802627563477, |
|
"loss": 1.1858, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -2.103299379348755, |
|
"rewards/margins": 0.26366108655929565, |
|
"rewards/rejected": -2.3669605255126953, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.021613832853025938, |
|
"grad_norm": 17.8606028438409, |
|
"learning_rate": 3.597122302158273e-09, |
|
"logits/chosen": -1.9866092205047607, |
|
"logits/rejected": -1.9793494939804077, |
|
"logps/chosen": -1.0540482997894287, |
|
"logps/rejected": -1.1519711017608643, |
|
"loss": 1.2346, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -2.1080965995788574, |
|
"rewards/margins": 0.19584545493125916, |
|
"rewards/rejected": -2.3039422035217285, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.02881844380403458, |
|
"grad_norm": 19.245572130250604, |
|
"learning_rate": 4.796163069544364e-09, |
|
"logits/chosen": -2.0317888259887695, |
|
"logits/rejected": -2.031811475753784, |
|
"logps/chosen": -1.0351777076721191, |
|
"logps/rejected": -1.136722207069397, |
|
"loss": 1.2355, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -2.0703554153442383, |
|
"rewards/margins": 0.20308911800384521, |
|
"rewards/rejected": -2.273444414138794, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03602305475504323, |
|
"grad_norm": 14.943806509066846, |
|
"learning_rate": 5.995203836930456e-09, |
|
"logits/chosen": -1.9625627994537354, |
|
"logits/rejected": -1.9631847143173218, |
|
"logps/chosen": -0.9414892196655273, |
|
"logps/rejected": -1.007533311843872, |
|
"loss": 1.2547, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.8829784393310547, |
|
"rewards/margins": 0.13208839297294617, |
|
"rewards/rejected": -2.015066623687744, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.043227665706051875, |
|
"grad_norm": 21.528231741291215, |
|
"learning_rate": 7.194244604316546e-09, |
|
"logits/chosen": -2.033930778503418, |
|
"logits/rejected": -2.0294690132141113, |
|
"logps/chosen": -1.0896106958389282, |
|
"logps/rejected": -1.1459602117538452, |
|
"loss": 1.2679, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -2.1792213916778564, |
|
"rewards/margins": 0.1126992255449295, |
|
"rewards/rejected": -2.2919204235076904, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.05043227665706052, |
|
"grad_norm": 20.70296936549822, |
|
"learning_rate": 8.393285371702639e-09, |
|
"logits/chosen": -2.0241129398345947, |
|
"logits/rejected": -2.0117270946502686, |
|
"logps/chosen": -1.1098978519439697, |
|
"logps/rejected": -1.204820156097412, |
|
"loss": 1.2271, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -2.2197957038879395, |
|
"rewards/margins": 0.1898445188999176, |
|
"rewards/rejected": -2.409640312194824, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.05763688760806916, |
|
"grad_norm": 24.40623296093575, |
|
"learning_rate": 9.592326139088728e-09, |
|
"logits/chosen": -2.0398144721984863, |
|
"logits/rejected": -2.036891222000122, |
|
"logps/chosen": -1.1656566858291626, |
|
"logps/rejected": -1.237831473350525, |
|
"loss": 1.2527, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -2.331313371658325, |
|
"rewards/margins": 0.14434944093227386, |
|
"rewards/rejected": -2.47566294670105, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.06484149855907781, |
|
"grad_norm": 15.525751311455734, |
|
"learning_rate": 1.0791366906474819e-08, |
|
"logits/chosen": -2.0057613849639893, |
|
"logits/rejected": -2.0072615146636963, |
|
"logps/chosen": -1.0418776273727417, |
|
"logps/rejected": -1.1488852500915527, |
|
"loss": 1.215, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -2.0837552547454834, |
|
"rewards/margins": 0.21401505172252655, |
|
"rewards/rejected": -2.2977705001831055, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.07204610951008646, |
|
"grad_norm": 19.01739570575657, |
|
"learning_rate": 1.1990407673860912e-08, |
|
"logits/chosen": -2.0440549850463867, |
|
"logits/rejected": -2.038007974624634, |
|
"logps/chosen": -1.0073726177215576, |
|
"logps/rejected": -1.114424467086792, |
|
"loss": 1.2172, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -2.0147452354431152, |
|
"rewards/margins": 0.21410349011421204, |
|
"rewards/rejected": -2.228848934173584, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0792507204610951, |
|
"grad_norm": 16.468864603689383, |
|
"learning_rate": 1.3189448441247003e-08, |
|
"logits/chosen": -1.986783742904663, |
|
"logits/rejected": -1.975547194480896, |
|
"logps/chosen": -1.0294089317321777, |
|
"logps/rejected": -1.1291263103485107, |
|
"loss": 1.2279, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -2.0588178634643555, |
|
"rewards/margins": 0.19943459331989288, |
|
"rewards/rejected": -2.2582526206970215, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.08645533141210375, |
|
"grad_norm": 18.27069220463476, |
|
"learning_rate": 1.4388489208633092e-08, |
|
"logits/chosen": -1.9731948375701904, |
|
"logits/rejected": -1.9713401794433594, |
|
"logps/chosen": -0.9640307426452637, |
|
"logps/rejected": -1.0653537511825562, |
|
"loss": 1.2087, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -1.9280614852905273, |
|
"rewards/margins": 0.2026461362838745, |
|
"rewards/rejected": -2.1307075023651123, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.0936599423631124, |
|
"grad_norm": 17.232187953156046, |
|
"learning_rate": 1.5587529976019183e-08, |
|
"logits/chosen": -2.066575527191162, |
|
"logits/rejected": -2.065995931625366, |
|
"logps/chosen": -1.0801920890808105, |
|
"logps/rejected": -1.1521753072738647, |
|
"loss": 1.2549, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -2.160384178161621, |
|
"rewards/margins": 0.14396657049655914, |
|
"rewards/rejected": -2.3043506145477295, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.10086455331412104, |
|
"grad_norm": 20.847348575081657, |
|
"learning_rate": 1.6786570743405277e-08, |
|
"logits/chosen": -1.9832985401153564, |
|
"logits/rejected": -1.9769630432128906, |
|
"logps/chosen": -0.9781940579414368, |
|
"logps/rejected": -1.122657060623169, |
|
"loss": 1.1694, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.9563881158828735, |
|
"rewards/margins": 0.2889261543750763, |
|
"rewards/rejected": -2.245314121246338, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.10806916426512968, |
|
"grad_norm": 19.95238793204191, |
|
"learning_rate": 1.7985611510791365e-08, |
|
"logits/chosen": -1.9963840246200562, |
|
"logits/rejected": -1.9920928478240967, |
|
"logps/chosen": -1.0187867879867554, |
|
"logps/rejected": -1.136918306350708, |
|
"loss": 1.2067, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -2.0375735759735107, |
|
"rewards/margins": 0.23626303672790527, |
|
"rewards/rejected": -2.273836612701416, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.11527377521613832, |
|
"grad_norm": 17.4507491502089, |
|
"learning_rate": 1.9184652278177456e-08, |
|
"logits/chosen": -2.00455904006958, |
|
"logits/rejected": -1.9985454082489014, |
|
"logps/chosen": -0.9479260444641113, |
|
"logps/rejected": -1.0970423221588135, |
|
"loss": 1.1509, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.8958520889282227, |
|
"rewards/margins": 0.2982328534126282, |
|
"rewards/rejected": -2.194084644317627, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.12247838616714697, |
|
"grad_norm": 22.64495005377011, |
|
"learning_rate": 2.038369304556355e-08, |
|
"logits/chosen": -2.0030527114868164, |
|
"logits/rejected": -1.995448350906372, |
|
"logps/chosen": -1.0368740558624268, |
|
"logps/rejected": -1.1604634523391724, |
|
"loss": 1.2057, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -2.0737481117248535, |
|
"rewards/margins": 0.24717874825000763, |
|
"rewards/rejected": -2.3209269046783447, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.12968299711815562, |
|
"grad_norm": 23.590437364971006, |
|
"learning_rate": 2.1582733812949638e-08, |
|
"logits/chosen": -2.0346579551696777, |
|
"logits/rejected": -2.027749538421631, |
|
"logps/chosen": -1.020750641822815, |
|
"logps/rejected": -1.1084620952606201, |
|
"loss": 1.2476, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -2.04150128364563, |
|
"rewards/margins": 0.17542308568954468, |
|
"rewards/rejected": -2.2169241905212402, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.13688760806916425, |
|
"grad_norm": 22.966874261128403, |
|
"learning_rate": 2.278177458033573e-08, |
|
"logits/chosen": -2.073704719543457, |
|
"logits/rejected": -2.0714824199676514, |
|
"logps/chosen": -0.9697921872138977, |
|
"logps/rejected": -1.065453290939331, |
|
"loss": 1.212, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -1.9395843744277954, |
|
"rewards/margins": 0.19132229685783386, |
|
"rewards/rejected": -2.130906581878662, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.1440922190201729, |
|
"grad_norm": 22.638490791764895, |
|
"learning_rate": 2.3980815347721823e-08, |
|
"logits/chosen": -2.0427424907684326, |
|
"logits/rejected": -2.0397419929504395, |
|
"logps/chosen": -1.0259110927581787, |
|
"logps/rejected": -1.1529022455215454, |
|
"loss": 1.1871, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -2.0518221855163574, |
|
"rewards/margins": 0.2539823353290558, |
|
"rewards/rejected": -2.305804491043091, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.15129682997118155, |
|
"grad_norm": 21.113736148839788, |
|
"learning_rate": 2.517985611510791e-08, |
|
"logits/chosen": -2.0403473377227783, |
|
"logits/rejected": -2.037600040435791, |
|
"logps/chosen": -1.0739350318908691, |
|
"logps/rejected": -1.150781273841858, |
|
"loss": 1.2504, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -2.1478700637817383, |
|
"rewards/margins": 0.15369237959384918, |
|
"rewards/rejected": -2.301562547683716, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.1585014409221902, |
|
"grad_norm": 15.482070000655302, |
|
"learning_rate": 2.6378896882494006e-08, |
|
"logits/chosen": -1.9863160848617554, |
|
"logits/rejected": -1.982267141342163, |
|
"logps/chosen": -1.0080206394195557, |
|
"logps/rejected": -1.176837682723999, |
|
"loss": 1.1505, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -2.0160412788391113, |
|
"rewards/margins": 0.3376340866088867, |
|
"rewards/rejected": -2.353675365447998, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.16570605187319884, |
|
"grad_norm": 17.014637756082593, |
|
"learning_rate": 2.7577937649880097e-08, |
|
"logits/chosen": -2.021378993988037, |
|
"logits/rejected": -2.021695613861084, |
|
"logps/chosen": -1.0124410390853882, |
|
"logps/rejected": -1.12635338306427, |
|
"loss": 1.2019, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.0248820781707764, |
|
"rewards/margins": 0.22782447934150696, |
|
"rewards/rejected": -2.25270676612854, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.1729106628242075, |
|
"grad_norm": 22.32772580016105, |
|
"learning_rate": 2.8776978417266184e-08, |
|
"logits/chosen": -2.0529181957244873, |
|
"logits/rejected": -2.0477967262268066, |
|
"logps/chosen": -1.0616161823272705, |
|
"logps/rejected": -1.1394503116607666, |
|
"loss": 1.2614, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -2.123232364654541, |
|
"rewards/margins": 0.15566802024841309, |
|
"rewards/rejected": -2.278900623321533, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.18011527377521613, |
|
"grad_norm": 19.079728631088813, |
|
"learning_rate": 2.997601918465228e-08, |
|
"logits/chosen": -1.9696033000946045, |
|
"logits/rejected": -1.9657630920410156, |
|
"logps/chosen": -1.0835182666778564, |
|
"logps/rejected": -1.1734166145324707, |
|
"loss": 1.2393, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -2.167036533355713, |
|
"rewards/margins": 0.17979690432548523, |
|
"rewards/rejected": -2.3468332290649414, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.1873198847262248, |
|
"grad_norm": 21.30398020890557, |
|
"learning_rate": 3.1175059952038366e-08, |
|
"logits/chosen": -1.9843509197235107, |
|
"logits/rejected": -1.9924278259277344, |
|
"logps/chosen": -1.1062877178192139, |
|
"logps/rejected": -1.2165796756744385, |
|
"loss": 1.2142, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -2.2125754356384277, |
|
"rewards/margins": 0.22058391571044922, |
|
"rewards/rejected": -2.433159351348877, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.19452449567723343, |
|
"grad_norm": 20.993622960377618, |
|
"learning_rate": 3.237410071942446e-08, |
|
"logits/chosen": -2.0651376247406006, |
|
"logits/rejected": -2.0571722984313965, |
|
"logps/chosen": -1.0719540119171143, |
|
"logps/rejected": -1.2004284858703613, |
|
"loss": 1.181, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -2.1439080238342285, |
|
"rewards/margins": 0.2569490075111389, |
|
"rewards/rejected": -2.4008569717407227, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.2017291066282421, |
|
"grad_norm": 25.067055659781758, |
|
"learning_rate": 3.3573141486810555e-08, |
|
"logits/chosen": -2.014195680618286, |
|
"logits/rejected": -2.012540102005005, |
|
"logps/chosen": -0.935396671295166, |
|
"logps/rejected": -1.049852967262268, |
|
"loss": 1.1977, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -1.870793342590332, |
|
"rewards/margins": 0.2289123237133026, |
|
"rewards/rejected": -2.099705934524536, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.20893371757925072, |
|
"grad_norm": 21.777264205916122, |
|
"learning_rate": 3.477218225419664e-08, |
|
"logits/chosen": -2.044172763824463, |
|
"logits/rejected": -2.0461270809173584, |
|
"logps/chosen": -1.0135643482208252, |
|
"logps/rejected": -1.1082309484481812, |
|
"loss": 1.2343, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -2.0271286964416504, |
|
"rewards/margins": 0.18933361768722534, |
|
"rewards/rejected": -2.2164618968963623, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.21613832853025935, |
|
"grad_norm": 20.318543545834533, |
|
"learning_rate": 3.597122302158273e-08, |
|
"logits/chosen": -2.0240025520324707, |
|
"logits/rejected": -2.0156774520874023, |
|
"logps/chosen": -1.0902036428451538, |
|
"logps/rejected": -1.1914021968841553, |
|
"loss": 1.2135, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -2.1804072856903076, |
|
"rewards/margins": 0.20239713788032532, |
|
"rewards/rejected": -2.3828043937683105, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.22334293948126802, |
|
"grad_norm": 18.50470861360763, |
|
"learning_rate": 3.717026378896883e-08, |
|
"logits/chosen": -1.9557920694351196, |
|
"logits/rejected": -1.955775260925293, |
|
"logps/chosen": -1.0874634981155396, |
|
"logps/rejected": -1.1727240085601807, |
|
"loss": 1.2381, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -2.174926996231079, |
|
"rewards/margins": 0.17052076756954193, |
|
"rewards/rejected": -2.3454480171203613, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.23054755043227665, |
|
"grad_norm": 15.935054480540096, |
|
"learning_rate": 3.836930455635491e-08, |
|
"logits/chosen": -2.031646966934204, |
|
"logits/rejected": -2.0232386589050293, |
|
"logps/chosen": -1.0084177255630493, |
|
"logps/rejected": -1.1408658027648926, |
|
"loss": 1.1926, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.0168354511260986, |
|
"rewards/margins": 0.2648962140083313, |
|
"rewards/rejected": -2.281731605529785, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.2377521613832853, |
|
"grad_norm": 15.808626134367197, |
|
"learning_rate": 3.9568345323741003e-08, |
|
"logits/chosen": -2.019885778427124, |
|
"logits/rejected": -2.022150754928589, |
|
"logps/chosen": -1.0463831424713135, |
|
"logps/rejected": -1.069990873336792, |
|
"loss": 1.3364, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -2.092766284942627, |
|
"rewards/margins": 0.04721563309431076, |
|
"rewards/rejected": -2.139981746673584, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.24495677233429394, |
|
"grad_norm": 18.32115617252851, |
|
"learning_rate": 4.07673860911271e-08, |
|
"logits/chosen": -2.0614123344421387, |
|
"logits/rejected": -2.055767297744751, |
|
"logps/chosen": -1.0877503156661987, |
|
"logps/rejected": -1.16796875, |
|
"loss": 1.2366, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -2.1755006313323975, |
|
"rewards/margins": 0.16043710708618164, |
|
"rewards/rejected": -2.3359375, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.2521613832853026, |
|
"grad_norm": 19.369790564686102, |
|
"learning_rate": 4.1966426858513185e-08, |
|
"logits/chosen": -1.9940099716186523, |
|
"logits/rejected": -1.9883639812469482, |
|
"logps/chosen": -0.9887149930000305, |
|
"logps/rejected": -1.1154861450195312, |
|
"loss": 1.1858, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -1.977429986000061, |
|
"rewards/margins": 0.25354230403900146, |
|
"rewards/rejected": -2.2309722900390625, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.25936599423631124, |
|
"grad_norm": 21.686526135721945, |
|
"learning_rate": 4.3165467625899276e-08, |
|
"logits/chosen": -1.9959064722061157, |
|
"logits/rejected": -1.9917312860488892, |
|
"logps/chosen": -1.0866310596466064, |
|
"logps/rejected": -1.2025970220565796, |
|
"loss": 1.1977, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.173262119293213, |
|
"rewards/margins": 0.23193176090717316, |
|
"rewards/rejected": -2.405194044113159, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.2665706051873199, |
|
"grad_norm": 18.21919999535183, |
|
"learning_rate": 4.4364508393285374e-08, |
|
"logits/chosen": -2.0002856254577637, |
|
"logits/rejected": -2.000253200531006, |
|
"logps/chosen": -1.0520254373550415, |
|
"logps/rejected": -1.180267572402954, |
|
"loss": 1.1778, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -2.104050874710083, |
|
"rewards/margins": 0.2564844489097595, |
|
"rewards/rejected": -2.360535144805908, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.2737752161383285, |
|
"grad_norm": 16.536106044001812, |
|
"learning_rate": 4.556354916067146e-08, |
|
"logits/chosen": -2.028313398361206, |
|
"logits/rejected": -2.032285213470459, |
|
"logps/chosen": -1.0125257968902588, |
|
"logps/rejected": -1.0858430862426758, |
|
"loss": 1.2682, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -2.0250515937805176, |
|
"rewards/margins": 0.14663462340831757, |
|
"rewards/rejected": -2.1716861724853516, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.28097982708933716, |
|
"grad_norm": 15.31773608533987, |
|
"learning_rate": 4.676258992805755e-08, |
|
"logits/chosen": -2.0320096015930176, |
|
"logits/rejected": -2.0257675647735596, |
|
"logps/chosen": -1.0224783420562744, |
|
"logps/rejected": -1.1486625671386719, |
|
"loss": 1.1819, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.044956684112549, |
|
"rewards/margins": 0.25236865878105164, |
|
"rewards/rejected": -2.2973251342773438, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.2881844380403458, |
|
"grad_norm": 18.995537958721503, |
|
"learning_rate": 4.796163069544365e-08, |
|
"logits/chosen": -2.034123420715332, |
|
"logits/rejected": -2.034450054168701, |
|
"logps/chosen": -0.9964189529418945, |
|
"logps/rejected": -1.0486726760864258, |
|
"loss": 1.2726, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -1.992837905883789, |
|
"rewards/margins": 0.10450725257396698, |
|
"rewards/rejected": -2.0973453521728516, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.2953890489913545, |
|
"grad_norm": 18.624392586338367, |
|
"learning_rate": 4.916067146282973e-08, |
|
"logits/chosen": -2.0277891159057617, |
|
"logits/rejected": -2.0259571075439453, |
|
"logps/chosen": -1.0748345851898193, |
|
"logps/rejected": -1.1457411050796509, |
|
"loss": 1.262, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -2.1496691703796387, |
|
"rewards/margins": 0.14181289076805115, |
|
"rewards/rejected": -2.2914822101593018, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.3025936599423631, |
|
"grad_norm": 16.76581954495512, |
|
"learning_rate": 4.999992091672379e-08, |
|
"logits/chosen": -2.011078119277954, |
|
"logits/rejected": -2.0153493881225586, |
|
"logps/chosen": -1.0450259447097778, |
|
"logps/rejected": -1.1236448287963867, |
|
"loss": 1.2425, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -2.0900518894195557, |
|
"rewards/margins": 0.15723773837089539, |
|
"rewards/rejected": -2.2472896575927734, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.30979827089337175, |
|
"grad_norm": 17.72733209255425, |
|
"learning_rate": 4.999851500573209e-08, |
|
"logits/chosen": -1.9903459548950195, |
|
"logits/rejected": -1.991233229637146, |
|
"logps/chosen": -1.0592777729034424, |
|
"logps/rejected": -1.0997775793075562, |
|
"loss": 1.3022, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -2.1185555458068848, |
|
"rewards/margins": 0.08099973201751709, |
|
"rewards/rejected": -2.1995551586151123, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.3170028818443804, |
|
"grad_norm": 15.96665018689344, |
|
"learning_rate": 4.999535180235972e-08, |
|
"logits/chosen": -1.990563988685608, |
|
"logits/rejected": -1.9907207489013672, |
|
"logps/chosen": -1.0212013721466064, |
|
"logps/rejected": -1.1435030698776245, |
|
"loss": 1.1959, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -2.042402744293213, |
|
"rewards/margins": 0.2446034699678421, |
|
"rewards/rejected": -2.287006139755249, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.3242074927953891, |
|
"grad_norm": 17.84897470512453, |
|
"learning_rate": 4.9990431528966836e-08, |
|
"logits/chosen": -2.010443925857544, |
|
"logits/rejected": -2.006673574447632, |
|
"logps/chosen": -1.1450097560882568, |
|
"logps/rejected": -1.1849489212036133, |
|
"loss": 1.3018, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -2.2900195121765137, |
|
"rewards/margins": 0.07987822592258453, |
|
"rewards/rejected": -2.3698978424072266, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.3314121037463977, |
|
"grad_norm": 24.49190807066052, |
|
"learning_rate": 4.9983754531428326e-08, |
|
"logits/chosen": -2.006472110748291, |
|
"logits/rejected": -2.00079083442688, |
|
"logps/chosen": -1.1708580255508423, |
|
"logps/rejected": -1.2872368097305298, |
|
"loss": 1.2012, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.3417160511016846, |
|
"rewards/margins": 0.23275737464427948, |
|
"rewards/rejected": -2.5744736194610596, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.33861671469740634, |
|
"grad_norm": 23.024434569130843, |
|
"learning_rate": 4.997532127910954e-08, |
|
"logits/chosen": -2.0429301261901855, |
|
"logits/rejected": -2.0308475494384766, |
|
"logps/chosen": -1.100434422492981, |
|
"logps/rejected": -1.2019624710083008, |
|
"loss": 1.2198, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -2.200868844985962, |
|
"rewards/margins": 0.20305626094341278, |
|
"rewards/rejected": -2.4039249420166016, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.345821325648415, |
|
"grad_norm": 21.129827787614413, |
|
"learning_rate": 4.996513236483331e-08, |
|
"logits/chosen": -2.101729154586792, |
|
"logits/rejected": -2.091571569442749, |
|
"logps/chosen": -0.9851749539375305, |
|
"logps/rejected": -1.106676459312439, |
|
"loss": 1.185, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -1.970349907875061, |
|
"rewards/margins": 0.2430029660463333, |
|
"rewards/rejected": -2.213352918624878, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.3530259365994236, |
|
"grad_norm": 18.94655048736081, |
|
"learning_rate": 4.9953188504838225e-08, |
|
"logits/chosen": -2.0206782817840576, |
|
"logits/rejected": -2.0197720527648926, |
|
"logps/chosen": -0.9880903959274292, |
|
"logps/rejected": -1.1017425060272217, |
|
"loss": 1.1937, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.9761807918548584, |
|
"rewards/margins": 0.2273043841123581, |
|
"rewards/rejected": -2.2034850120544434, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.36023054755043227, |
|
"grad_norm": 18.60846892662722, |
|
"learning_rate": 4.993949053872834e-08, |
|
"logits/chosen": -2.019057035446167, |
|
"logits/rejected": -2.0055313110351562, |
|
"logps/chosen": -1.0131161212921143, |
|
"logps/rejected": -1.139453649520874, |
|
"loss": 1.1821, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -2.0262322425842285, |
|
"rewards/margins": 0.2526749074459076, |
|
"rewards/rejected": -2.278907299041748, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.36743515850144093, |
|
"grad_norm": 19.18531858517567, |
|
"learning_rate": 4.9924039429414086e-08, |
|
"logits/chosen": -2.0883572101593018, |
|
"logits/rejected": -2.0818283557891846, |
|
"logps/chosen": -1.0440417528152466, |
|
"logps/rejected": -1.1581791639328003, |
|
"loss": 1.2079, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -2.088083505630493, |
|
"rewards/margins": 0.22827525436878204, |
|
"rewards/rejected": -2.3163583278656006, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.3746397694524496, |
|
"grad_norm": 16.068632795684866, |
|
"learning_rate": 4.990683626304467e-08, |
|
"logits/chosen": -2.010894775390625, |
|
"logits/rejected": -2.0092484951019287, |
|
"logps/chosen": -1.1070988178253174, |
|
"logps/rejected": -1.2031704187393188, |
|
"loss": 1.2198, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -2.2141976356506348, |
|
"rewards/margins": 0.19214320182800293, |
|
"rewards/rejected": -2.4063408374786377, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.3818443804034582, |
|
"grad_norm": 17.727178124609676, |
|
"learning_rate": 4.9887882248931646e-08, |
|
"logits/chosen": -1.97884202003479, |
|
"logits/rejected": -1.968973159790039, |
|
"logps/chosen": -0.9846093058586121, |
|
"logps/rejected": -1.0614283084869385, |
|
"loss": 1.2503, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -1.9692186117172241, |
|
"rewards/margins": 0.15363821387290955, |
|
"rewards/rejected": -2.122856616973877, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.38904899135446686, |
|
"grad_norm": 22.67722196494781, |
|
"learning_rate": 4.986717871946393e-08, |
|
"logits/chosen": -2.004068374633789, |
|
"logits/rejected": -1.99717116355896, |
|
"logps/chosen": -1.0308892726898193, |
|
"logps/rejected": -1.1323744058609009, |
|
"loss": 1.2209, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -2.0617785453796387, |
|
"rewards/margins": 0.20297034084796906, |
|
"rewards/rejected": -2.2647488117218018, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.3962536023054755, |
|
"grad_norm": 17.281352424891857, |
|
"learning_rate": 4.984472713001416e-08, |
|
"logits/chosen": -1.9620494842529297, |
|
"logits/rejected": -1.962517499923706, |
|
"logps/chosen": -1.0005210638046265, |
|
"logps/rejected": -1.0776532888412476, |
|
"loss": 1.2683, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -2.001042127609253, |
|
"rewards/margins": 0.1542646884918213, |
|
"rewards/rejected": -2.155306577682495, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.4034582132564842, |
|
"grad_norm": 17.117159642375974, |
|
"learning_rate": 4.982052905883637e-08, |
|
"logits/chosen": -2.031991481781006, |
|
"logits/rejected": -2.0326719284057617, |
|
"logps/chosen": -1.080214262008667, |
|
"logps/rejected": -1.181120753288269, |
|
"loss": 1.224, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -2.160428524017334, |
|
"rewards/margins": 0.2018129527568817, |
|
"rewards/rejected": -2.362241506576538, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.4106628242074928, |
|
"grad_norm": 16.328895540705197, |
|
"learning_rate": 4.979458620695505e-08, |
|
"logits/chosen": -2.029468059539795, |
|
"logits/rejected": -2.0152204036712646, |
|
"logps/chosen": -1.0948221683502197, |
|
"logps/rejected": -1.208194613456726, |
|
"loss": 1.2094, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -2.1896443367004395, |
|
"rewards/margins": 0.22674505412578583, |
|
"rewards/rejected": -2.416389226913452, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.41786743515850144, |
|
"grad_norm": 19.61140460251683, |
|
"learning_rate": 4.976690039804555e-08, |
|
"logits/chosen": -2.033027172088623, |
|
"logits/rejected": -2.0314948558807373, |
|
"logps/chosen": -0.9877282381057739, |
|
"logps/rejected": -1.0673277378082275, |
|
"loss": 1.2473, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.9754564762115479, |
|
"rewards/margins": 0.1591992825269699, |
|
"rewards/rejected": -2.134655475616455, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.4250720461095101, |
|
"grad_norm": 21.430631009789273, |
|
"learning_rate": 4.973747357830592e-08, |
|
"logits/chosen": -2.0215108394622803, |
|
"logits/rejected": -2.021780490875244, |
|
"logps/chosen": -1.0275431871414185, |
|
"logps/rejected": -1.1647249460220337, |
|
"loss": 1.1677, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -2.055086374282837, |
|
"rewards/margins": 0.2743634283542633, |
|
"rewards/rejected": -2.3294498920440674, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.4322766570605187, |
|
"grad_norm": 19.463998303694815, |
|
"learning_rate": 4.970630781632009e-08, |
|
"logits/chosen": -2.0801994800567627, |
|
"logits/rejected": -2.076254367828369, |
|
"logps/chosen": -1.0327340364456177, |
|
"logps/rejected": -1.1751863956451416, |
|
"loss": 1.1681, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -2.0654680728912354, |
|
"rewards/margins": 0.28490471839904785, |
|
"rewards/rejected": -2.350372791290283, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.43948126801152737, |
|
"grad_norm": 21.00995063503415, |
|
"learning_rate": 4.967340530291242e-08, |
|
"logits/chosen": -2.027909517288208, |
|
"logits/rejected": -2.0180211067199707, |
|
"logps/chosen": -1.0928115844726562, |
|
"logps/rejected": -1.1507136821746826, |
|
"loss": 1.2682, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -2.1856231689453125, |
|
"rewards/margins": 0.11580429971218109, |
|
"rewards/rejected": -2.3014273643493652, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.44668587896253603, |
|
"grad_norm": 24.905225792062406, |
|
"learning_rate": 4.9638768350993755e-08, |
|
"logits/chosen": -2.0285048484802246, |
|
"logits/rejected": -2.021249771118164, |
|
"logps/chosen": -0.9952943921089172, |
|
"logps/rejected": -1.0829205513000488, |
|
"loss": 1.2345, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -1.9905887842178345, |
|
"rewards/margins": 0.17525213956832886, |
|
"rewards/rejected": -2.1658411026000977, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.4538904899135447, |
|
"grad_norm": 20.771750563160076, |
|
"learning_rate": 4.9602399395398786e-08, |
|
"logits/chosen": -2.0377490520477295, |
|
"logits/rejected": -2.037675380706787, |
|
"logps/chosen": -1.027521014213562, |
|
"logps/rejected": -1.1547839641571045, |
|
"loss": 1.183, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -2.055042028427124, |
|
"rewards/margins": 0.2545255422592163, |
|
"rewards/rejected": -2.309567928314209, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.4610951008645533, |
|
"grad_norm": 16.17835710154515, |
|
"learning_rate": 4.9564300992714914e-08, |
|
"logits/chosen": -1.9597883224487305, |
|
"logits/rejected": -1.9607963562011719, |
|
"logps/chosen": -1.0108855962753296, |
|
"logps/rejected": -1.116549015045166, |
|
"loss": 1.2101, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -2.021771192550659, |
|
"rewards/margins": 0.21132683753967285, |
|
"rewards/rejected": -2.233098030090332, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.46829971181556196, |
|
"grad_norm": 21.86769715087536, |
|
"learning_rate": 4.952447582110253e-08, |
|
"logits/chosen": -2.0587735176086426, |
|
"logits/rejected": -2.044377565383911, |
|
"logps/chosen": -1.0383652448654175, |
|
"logps/rejected": -1.1178988218307495, |
|
"loss": 1.2479, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -2.076730489730835, |
|
"rewards/margins": 0.15906734764575958, |
|
"rewards/rejected": -2.235797643661499, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.4755043227665706, |
|
"grad_norm": 23.755054747254476, |
|
"learning_rate": 4.948292668010676e-08, |
|
"logits/chosen": -2.031721353530884, |
|
"logits/rejected": -2.032727003097534, |
|
"logps/chosen": -1.0880773067474365, |
|
"logps/rejected": -1.1748898029327393, |
|
"loss": 1.2449, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -2.176154613494873, |
|
"rewards/margins": 0.17362497746944427, |
|
"rewards/rejected": -2.3497796058654785, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.4827089337175792, |
|
"grad_norm": 20.474460354625247, |
|
"learning_rate": 4.943965649046064e-08, |
|
"logits/chosen": -2.0048508644104004, |
|
"logits/rejected": -1.9955081939697266, |
|
"logps/chosen": -1.062713384628296, |
|
"logps/rejected": -1.1663198471069336, |
|
"loss": 1.2154, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -2.125426769256592, |
|
"rewards/margins": 0.2072126865386963, |
|
"rewards/rejected": -2.332639694213867, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.4899135446685879, |
|
"grad_norm": 19.048186528049722, |
|
"learning_rate": 4.9394668293879835e-08, |
|
"logits/chosen": -1.959315538406372, |
|
"logits/rejected": -1.9503145217895508, |
|
"logps/chosen": -1.0368311405181885, |
|
"logps/rejected": -1.1063206195831299, |
|
"loss": 1.2624, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -2.073662281036377, |
|
"rewards/margins": 0.13897888362407684, |
|
"rewards/rejected": -2.2126412391662598, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.49711815561959655, |
|
"grad_norm": 24.933354819026505, |
|
"learning_rate": 4.93479652528488e-08, |
|
"logits/chosen": -2.020735502243042, |
|
"logits/rejected": -2.0154590606689453, |
|
"logps/chosen": -1.1052331924438477, |
|
"logps/rejected": -1.209161639213562, |
|
"loss": 1.2262, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -2.2104663848876953, |
|
"rewards/margins": 0.2078566551208496, |
|
"rewards/rejected": -2.418323278427124, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.5043227665706052, |
|
"grad_norm": 20.317629206968732, |
|
"learning_rate": 4.929955065039848e-08, |
|
"logits/chosen": -2.0213494300842285, |
|
"logits/rejected": -2.0158300399780273, |
|
"logps/chosen": -1.0192697048187256, |
|
"logps/rejected": -1.1514381170272827, |
|
"loss": 1.1829, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -2.038539409637451, |
|
"rewards/margins": 0.2643369436264038, |
|
"rewards/rejected": -2.3028762340545654, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.5115273775216138, |
|
"grad_norm": 19.004922715885144, |
|
"learning_rate": 4.92494278898755e-08, |
|
"logits/chosen": -1.985918402671814, |
|
"logits/rejected": -1.982656717300415, |
|
"logps/chosen": -0.8973722457885742, |
|
"logps/rejected": -1.0216716527938843, |
|
"loss": 1.1973, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -1.7947444915771484, |
|
"rewards/margins": 0.24859857559204102, |
|
"rewards/rejected": -2.0433433055877686, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.5187319884726225, |
|
"grad_norm": 18.960064654240945, |
|
"learning_rate": 4.9197600494702955e-08, |
|
"logits/chosen": -2.007420539855957, |
|
"logits/rejected": -2.001126289367676, |
|
"logps/chosen": -1.0426667928695679, |
|
"logps/rejected": -1.1658456325531006, |
|
"loss": 1.1852, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -2.0853335857391357, |
|
"rewards/margins": 0.2463577687740326, |
|
"rewards/rejected": -2.331691265106201, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.5259365994236311, |
|
"grad_norm": 20.53343043509484, |
|
"learning_rate": 4.9144072108132725e-08, |
|
"logits/chosen": -2.0134854316711426, |
|
"logits/rejected": -2.0023691654205322, |
|
"logps/chosen": -1.0226707458496094, |
|
"logps/rejected": -1.1051828861236572, |
|
"loss": 1.2518, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -2.0453414916992188, |
|
"rewards/margins": 0.16502413153648376, |
|
"rewards/rejected": -2.2103657722473145, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.5331412103746398, |
|
"grad_norm": 17.758862211588106, |
|
"learning_rate": 4.908884649298937e-08, |
|
"logits/chosen": -1.9972114562988281, |
|
"logits/rejected": -2.004119634628296, |
|
"logps/chosen": -1.0192463397979736, |
|
"logps/rejected": -1.0796899795532227, |
|
"loss": 1.2835, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -2.0384926795959473, |
|
"rewards/margins": 0.12088724225759506, |
|
"rewards/rejected": -2.1593799591064453, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.5403458213256485, |
|
"grad_norm": 23.124810759913256, |
|
"learning_rate": 4.903192753140557e-08, |
|
"logits/chosen": -2.0147690773010254, |
|
"logits/rejected": -2.009342908859253, |
|
"logps/chosen": -1.1004369258880615, |
|
"logps/rejected": -1.1906808614730835, |
|
"loss": 1.2378, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -2.200873851776123, |
|
"rewards/margins": 0.18048794567584991, |
|
"rewards/rejected": -2.381361722946167, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.547550432276657, |
|
"grad_norm": 19.72534726379729, |
|
"learning_rate": 4.897331922454931e-08, |
|
"logits/chosen": -1.9795690774917603, |
|
"logits/rejected": -1.9833734035491943, |
|
"logps/chosen": -1.0041850805282593, |
|
"logps/rejected": -1.1136337518692017, |
|
"loss": 1.2165, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -2.0083701610565186, |
|
"rewards/margins": 0.2188970297574997, |
|
"rewards/rejected": -2.2272675037384033, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.5547550432276657, |
|
"grad_norm": 20.539097658978797, |
|
"learning_rate": 4.891302569234256e-08, |
|
"logits/chosen": -1.9727134704589844, |
|
"logits/rejected": -1.9754774570465088, |
|
"logps/chosen": -0.9772794842720032, |
|
"logps/rejected": -1.1290626525878906, |
|
"loss": 1.1643, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -1.9545589685440063, |
|
"rewards/margins": 0.3035663962364197, |
|
"rewards/rejected": -2.2581253051757812, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.5619596541786743, |
|
"grad_norm": 22.07597844396349, |
|
"learning_rate": 4.8851051173171656e-08, |
|
"logits/chosen": -1.9940038919448853, |
|
"logits/rejected": -1.9926246404647827, |
|
"logps/chosen": -1.0405977964401245, |
|
"logps/rejected": -1.1220670938491821, |
|
"loss": 1.2393, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -2.081195592880249, |
|
"rewards/margins": 0.1629386693239212, |
|
"rewards/rejected": -2.2441341876983643, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.569164265129683, |
|
"grad_norm": 17.470111374688827, |
|
"learning_rate": 4.87874000235894e-08, |
|
"logits/chosen": -2.013667106628418, |
|
"logits/rejected": -2.0078587532043457, |
|
"logps/chosen": -1.0763031244277954, |
|
"logps/rejected": -1.233242154121399, |
|
"loss": 1.1596, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.152606248855591, |
|
"rewards/margins": 0.3138778507709503, |
|
"rewards/rejected": -2.466484308242798, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.5763688760806917, |
|
"grad_norm": 19.520543671943127, |
|
"learning_rate": 4.872207671800876e-08, |
|
"logits/chosen": -2.0354135036468506, |
|
"logits/rejected": -2.0318105220794678, |
|
"logps/chosen": -1.0444949865341187, |
|
"logps/rejected": -1.1220977306365967, |
|
"loss": 1.2567, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -2.0889899730682373, |
|
"rewards/margins": 0.15520496666431427, |
|
"rewards/rejected": -2.2441954612731934, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.5835734870317003, |
|
"grad_norm": 15.931565272235597, |
|
"learning_rate": 4.865508584838841e-08, |
|
"logits/chosen": -2.0230934619903564, |
|
"logits/rejected": -2.025510311126709, |
|
"logps/chosen": -1.0136370658874512, |
|
"logps/rejected": -1.1028186082839966, |
|
"loss": 1.2343, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -2.0272741317749023, |
|
"rewards/margins": 0.17836324870586395, |
|
"rewards/rejected": -2.205637216567993, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.590778097982709, |
|
"grad_norm": 21.101696006896514, |
|
"learning_rate": 4.858643212390985e-08, |
|
"logits/chosen": -2.0232460498809814, |
|
"logits/rejected": -2.0134730339050293, |
|
"logps/chosen": -1.0298725366592407, |
|
"logps/rejected": -1.1151840686798096, |
|
"loss": 1.2504, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -2.0597450733184814, |
|
"rewards/margins": 0.1706230342388153, |
|
"rewards/rejected": -2.230368137359619, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.5979827089337176, |
|
"grad_norm": 18.384683685983724, |
|
"learning_rate": 4.851612037064643e-08, |
|
"logits/chosen": -2.0008656978607178, |
|
"logits/rejected": -1.9988391399383545, |
|
"logps/chosen": -0.96119225025177, |
|
"logps/rejected": -1.0799301862716675, |
|
"loss": 1.2051, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -1.92238450050354, |
|
"rewards/margins": 0.2374759167432785, |
|
"rewards/rejected": -2.159860372543335, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.6051873198847262, |
|
"grad_norm": 15.976129382373403, |
|
"learning_rate": 4.8444155531224065e-08, |
|
"logits/chosen": -2.0319008827209473, |
|
"logits/rejected": -2.031928539276123, |
|
"logps/chosen": -1.0886750221252441, |
|
"logps/rejected": -1.1605113744735718, |
|
"loss": 1.2625, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -2.1773500442504883, |
|
"rewards/margins": 0.143672913312912, |
|
"rewards/rejected": -2.3210227489471436, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.6123919308357348, |
|
"grad_norm": 15.372106337343025, |
|
"learning_rate": 4.8370542664473805e-08, |
|
"logits/chosen": -2.03184175491333, |
|
"logits/rejected": -2.0259571075439453, |
|
"logps/chosen": -1.0505023002624512, |
|
"logps/rejected": -1.15494704246521, |
|
"loss": 1.2248, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -2.1010046005249023, |
|
"rewards/margins": 0.20888929069042206, |
|
"rewards/rejected": -2.30989408493042, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.6195965417867435, |
|
"grad_norm": 17.833021138756298, |
|
"learning_rate": 4.829528694507624e-08, |
|
"logits/chosen": -2.011185646057129, |
|
"logits/rejected": -2.0070912837982178, |
|
"logps/chosen": -1.161972999572754, |
|
"logps/rejected": -1.218332290649414, |
|
"loss": 1.28, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -2.323945999145508, |
|
"rewards/margins": 0.11271880567073822, |
|
"rewards/rejected": -2.436664581298828, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.6268011527377522, |
|
"grad_norm": 20.10043591744987, |
|
"learning_rate": 4.821839366319768e-08, |
|
"logits/chosen": -2.0453944206237793, |
|
"logits/rejected": -2.0392508506774902, |
|
"logps/chosen": -1.0053439140319824, |
|
"logps/rejected": -1.12282395362854, |
|
"loss": 1.1973, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -2.010687828063965, |
|
"rewards/margins": 0.23495987057685852, |
|
"rewards/rejected": -2.24564790725708, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.6340057636887608, |
|
"grad_norm": 19.633475514009838, |
|
"learning_rate": 4.813986822411833e-08, |
|
"logits/chosen": -2.037318706512451, |
|
"logits/rejected": -2.035334825515747, |
|
"logps/chosen": -1.0152684450149536, |
|
"logps/rejected": -1.0797330141067505, |
|
"loss": 1.2669, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -2.0305368900299072, |
|
"rewards/margins": 0.12892897427082062, |
|
"rewards/rejected": -2.159466028213501, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.6412103746397695, |
|
"grad_norm": 19.900627573984437, |
|
"learning_rate": 4.805971614785231e-08, |
|
"logits/chosen": -2.0658364295959473, |
|
"logits/rejected": -2.0642929077148438, |
|
"logps/chosen": -1.0170501470565796, |
|
"logps/rejected": -1.11166250705719, |
|
"loss": 1.2213, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.034100294113159, |
|
"rewards/margins": 0.1892244815826416, |
|
"rewards/rejected": -2.22332501411438, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.6484149855907781, |
|
"grad_norm": 20.046745017622534, |
|
"learning_rate": 4.797794306875963e-08, |
|
"logits/chosen": -1.9768317937850952, |
|
"logits/rejected": -1.9782040119171143, |
|
"logps/chosen": -1.1424155235290527, |
|
"logps/rejected": -1.2143452167510986, |
|
"loss": 1.2686, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -2.2848310470581055, |
|
"rewards/margins": 0.14385904371738434, |
|
"rewards/rejected": -2.4286904335021973, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.6556195965417867, |
|
"grad_norm": 20.156486798671747, |
|
"learning_rate": 4.7894554735150076e-08, |
|
"logits/chosen": -1.979318618774414, |
|
"logits/rejected": -1.9829566478729248, |
|
"logps/chosen": -1.042389154434204, |
|
"logps/rejected": -1.108424186706543, |
|
"loss": 1.2626, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -2.084778308868408, |
|
"rewards/margins": 0.1320703774690628, |
|
"rewards/rejected": -2.216848373413086, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.6628242074927954, |
|
"grad_norm": 23.296556306421977, |
|
"learning_rate": 4.7809557008879185e-08, |
|
"logits/chosen": -2.017183780670166, |
|
"logits/rejected": -2.0119588375091553, |
|
"logps/chosen": -0.9740872383117676, |
|
"logps/rejected": -1.0616848468780518, |
|
"loss": 1.2388, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -1.9481744766235352, |
|
"rewards/margins": 0.1751951277256012, |
|
"rewards/rejected": -2.1233696937561035, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.670028818443804, |
|
"grad_norm": 18.069785801871536, |
|
"learning_rate": 4.772295586493613e-08, |
|
"logits/chosen": -2.057365894317627, |
|
"logits/rejected": -2.054624080657959, |
|
"logps/chosen": -1.0349071025848389, |
|
"logps/rejected": -1.1510379314422607, |
|
"loss": 1.193, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.0698142051696777, |
|
"rewards/margins": 0.23226144909858704, |
|
"rewards/rejected": -2.3020758628845215, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.6772334293948127, |
|
"grad_norm": 19.620026043686646, |
|
"learning_rate": 4.763475739102374e-08, |
|
"logits/chosen": -2.00927472114563, |
|
"logits/rejected": -2.015021562576294, |
|
"logps/chosen": -1.1269561052322388, |
|
"logps/rejected": -1.1944589614868164, |
|
"loss": 1.2561, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -2.2539122104644775, |
|
"rewards/margins": 0.13500596582889557, |
|
"rewards/rejected": -2.388917922973633, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.6844380403458213, |
|
"grad_norm": 15.430566823053855, |
|
"learning_rate": 4.754496778713054e-08, |
|
"logits/chosen": -1.9693466424942017, |
|
"logits/rejected": -1.9732694625854492, |
|
"logps/chosen": -1.0118048191070557, |
|
"logps/rejected": -1.1344263553619385, |
|
"loss": 1.2008, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -2.0236096382141113, |
|
"rewards/margins": 0.24524304270744324, |
|
"rewards/rejected": -2.268852710723877, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.69164265129683, |
|
"grad_norm": 21.25135809120288, |
|
"learning_rate": 4.7453593365094926e-08, |
|
"logits/chosen": -2.04045033454895, |
|
"logits/rejected": -2.039541244506836, |
|
"logps/chosen": -1.049902319908142, |
|
"logps/rejected": -1.1590924263000488, |
|
"loss": 1.2091, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -2.099804639816284, |
|
"rewards/margins": 0.21838030219078064, |
|
"rewards/rejected": -2.3181848526000977, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.6988472622478387, |
|
"grad_norm": 21.39072451404026, |
|
"learning_rate": 4.736064054816145e-08, |
|
"logits/chosen": -2.042609691619873, |
|
"logits/rejected": -2.0387399196624756, |
|
"logps/chosen": -0.9685258865356445, |
|
"logps/rejected": -1.0943108797073364, |
|
"loss": 1.1795, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -1.937051773071289, |
|
"rewards/margins": 0.25157004594802856, |
|
"rewards/rejected": -2.188621759414673, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.7060518731988472, |
|
"grad_norm": 17.20168162072602, |
|
"learning_rate": 4.726611587052933e-08, |
|
"logits/chosen": -1.9772526025772095, |
|
"logits/rejected": -1.9768762588500977, |
|
"logps/chosen": -1.1084102392196655, |
|
"logps/rejected": -1.2353932857513428, |
|
"loss": 1.1801, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -2.216820478439331, |
|
"rewards/margins": 0.2539660334587097, |
|
"rewards/rejected": -2.4707865715026855, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.7132564841498559, |
|
"grad_norm": 22.219628346195623, |
|
"learning_rate": 4.71700259768931e-08, |
|
"logits/chosen": -2.0274641513824463, |
|
"logits/rejected": -2.0244956016540527, |
|
"logps/chosen": -1.109879732131958, |
|
"logps/rejected": -1.2068617343902588, |
|
"loss": 1.2336, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -2.219759464263916, |
|
"rewards/margins": 0.19396351277828217, |
|
"rewards/rejected": -2.4137234687805176, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.7204610951008645, |
|
"grad_norm": 19.81819744621828, |
|
"learning_rate": 4.707237762197549e-08, |
|
"logits/chosen": -2.013184070587158, |
|
"logits/rejected": -2.0100245475769043, |
|
"logps/chosen": -1.0080925226211548, |
|
"logps/rejected": -1.1278679370880127, |
|
"loss": 1.2121, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -2.0161850452423096, |
|
"rewards/margins": 0.23955106735229492, |
|
"rewards/rejected": -2.2557358741760254, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.7276657060518732, |
|
"grad_norm": 23.320316952087914, |
|
"learning_rate": 4.697317767005265e-08, |
|
"logits/chosen": -2.0245862007141113, |
|
"logits/rejected": -2.0211946964263916, |
|
"logps/chosen": -1.002010703086853, |
|
"logps/rejected": -1.0942790508270264, |
|
"loss": 1.2568, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -2.004021406173706, |
|
"rewards/margins": 0.1845366507768631, |
|
"rewards/rejected": -2.1885581016540527, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.7348703170028819, |
|
"grad_norm": 17.35614684932965, |
|
"learning_rate": 4.6872433094471577e-08, |
|
"logits/chosen": -2.0214576721191406, |
|
"logits/rejected": -2.01664137840271, |
|
"logps/chosen": -1.0324314832687378, |
|
"logps/rejected": -1.127612590789795, |
|
"loss": 1.2122, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -2.0648629665374756, |
|
"rewards/margins": 0.19036227464675903, |
|
"rewards/rejected": -2.25522518157959, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.7420749279538905, |
|
"grad_norm": 16.487356163413914, |
|
"learning_rate": 4.677015097715994e-08, |
|
"logits/chosen": -1.9668807983398438, |
|
"logits/rejected": -1.9662902355194092, |
|
"logps/chosen": -1.0229971408843994, |
|
"logps/rejected": -1.1552445888519287, |
|
"loss": 1.1997, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -2.045994281768799, |
|
"rewards/margins": 0.2644946873188019, |
|
"rewards/rejected": -2.3104891777038574, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.7492795389048992, |
|
"grad_norm": 17.492033929105126, |
|
"learning_rate": 4.666633850812825e-08, |
|
"logits/chosen": -2.0216238498687744, |
|
"logits/rejected": -2.0157718658447266, |
|
"logps/chosen": -1.0129607915878296, |
|
"logps/rejected": -1.0947651863098145, |
|
"loss": 1.2367, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -2.025921583175659, |
|
"rewards/margins": 0.16360855102539062, |
|
"rewards/rejected": -2.189530372619629, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.7564841498559077, |
|
"grad_norm": 17.49180259130834, |
|
"learning_rate": 4.656100298496439e-08, |
|
"logits/chosen": -1.971518874168396, |
|
"logits/rejected": -1.9679629802703857, |
|
"logps/chosen": -0.9385242462158203, |
|
"logps/rejected": -1.0688835382461548, |
|
"loss": 1.1859, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.8770484924316406, |
|
"rewards/margins": 0.26071876287460327, |
|
"rewards/rejected": -2.1377670764923096, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.7636887608069164, |
|
"grad_norm": 17.905832545876255, |
|
"learning_rate": 4.6454151812320715e-08, |
|
"logits/chosen": -2.0001180171966553, |
|
"logits/rejected": -1.9940083026885986, |
|
"logps/chosen": -1.03890061378479, |
|
"logps/rejected": -1.1473093032836914, |
|
"loss": 1.2178, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.07780122756958, |
|
"rewards/margins": 0.21681778132915497, |
|
"rewards/rejected": -2.294618606567383, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.770893371757925, |
|
"grad_norm": 20.829212072329433, |
|
"learning_rate": 4.6345792501393434e-08, |
|
"logits/chosen": -2.0026588439941406, |
|
"logits/rejected": -2.0007362365722656, |
|
"logps/chosen": -1.0745230913162231, |
|
"logps/rejected": -1.201542615890503, |
|
"loss": 1.2046, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -2.1490461826324463, |
|
"rewards/margins": 0.2540392279624939, |
|
"rewards/rejected": -2.403085231781006, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.7780979827089337, |
|
"grad_norm": 20.734671350383845, |
|
"learning_rate": 4.6235932669394676e-08, |
|
"logits/chosen": -2.0293848514556885, |
|
"logits/rejected": -2.030176877975464, |
|
"logps/chosen": -1.0878403186798096, |
|
"logps/rejected": -1.196656584739685, |
|
"loss": 1.2186, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.175680637359619, |
|
"rewards/margins": 0.21763241291046143, |
|
"rewards/rejected": -2.39331316947937, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.7853025936599424, |
|
"grad_norm": 24.317214064629283, |
|
"learning_rate": 4.612458003901698e-08, |
|
"logits/chosen": -2.041074514389038, |
|
"logits/rejected": -2.0332765579223633, |
|
"logps/chosen": -1.109058141708374, |
|
"logps/rejected": -1.2108246088027954, |
|
"loss": 1.2286, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -2.218116283416748, |
|
"rewards/margins": 0.20353302359580994, |
|
"rewards/rejected": -2.421649217605591, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.792507204610951, |
|
"grad_norm": 23.34024566936978, |
|
"learning_rate": 4.6011742437890476e-08, |
|
"logits/chosen": -2.028428077697754, |
|
"logits/rejected": -2.023019790649414, |
|
"logps/chosen": -1.0458049774169922, |
|
"logps/rejected": -1.1794006824493408, |
|
"loss": 1.1775, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -2.0916099548339844, |
|
"rewards/margins": 0.2671913504600525, |
|
"rewards/rejected": -2.3588013648986816, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.7997118155619597, |
|
"grad_norm": 16.933139927466357, |
|
"learning_rate": 4.589742779803259e-08, |
|
"logits/chosen": -2.025526523590088, |
|
"logits/rejected": -2.018397569656372, |
|
"logps/chosen": -1.0093412399291992, |
|
"logps/rejected": -1.129741907119751, |
|
"loss": 1.1948, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -2.0186824798583984, |
|
"rewards/margins": 0.2408013790845871, |
|
"rewards/rejected": -2.259483814239502, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.8069164265129684, |
|
"grad_norm": 18.433386982266423, |
|
"learning_rate": 4.5781644155290486e-08, |
|
"logits/chosen": -1.9837512969970703, |
|
"logits/rejected": -1.9759635925292969, |
|
"logps/chosen": -1.047893762588501, |
|
"logps/rejected": -1.1082303524017334, |
|
"loss": 1.2713, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -2.095787525177002, |
|
"rewards/margins": 0.12067310512065887, |
|
"rewards/rejected": -2.216460704803467, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.8141210374639769, |
|
"grad_norm": 18.152544924178944, |
|
"learning_rate": 4.566439964877613e-08, |
|
"logits/chosen": -2.0132524967193604, |
|
"logits/rejected": -2.0092389583587646, |
|
"logps/chosen": -0.9992140531539917, |
|
"logps/rejected": -1.0850255489349365, |
|
"loss": 1.2443, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -1.9984281063079834, |
|
"rewards/margins": 0.17162318527698517, |
|
"rewards/rejected": -2.170051097869873, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.8213256484149856, |
|
"grad_norm": 16.195560643437258, |
|
"learning_rate": 4.554570252029421e-08, |
|
"logits/chosen": -2.0523180961608887, |
|
"logits/rejected": -2.0510833263397217, |
|
"logps/chosen": -1.0488303899765015, |
|
"logps/rejected": -1.1647334098815918, |
|
"loss": 1.201, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -2.097660779953003, |
|
"rewards/margins": 0.23180584609508514, |
|
"rewards/rejected": -2.3294668197631836, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.8285302593659942, |
|
"grad_norm": 17.9745846350065, |
|
"learning_rate": 4.542556111376274e-08, |
|
"logits/chosen": -2.0492236614227295, |
|
"logits/rejected": -2.0428290367126465, |
|
"logps/chosen": -1.0749974250793457, |
|
"logps/rejected": -1.166634202003479, |
|
"loss": 1.2395, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -2.1499948501586914, |
|
"rewards/margins": 0.18327349424362183, |
|
"rewards/rejected": -2.333268404006958, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.8357348703170029, |
|
"grad_norm": 23.066926614034124, |
|
"learning_rate": 4.5303983874626506e-08, |
|
"logits/chosen": -1.9916588068008423, |
|
"logits/rejected": -1.9900974035263062, |
|
"logps/chosen": -1.0387059450149536, |
|
"logps/rejected": -1.115934133529663, |
|
"loss": 1.2651, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -2.0774118900299072, |
|
"rewards/margins": 0.15445652604103088, |
|
"rewards/rejected": -2.231868267059326, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.8429394812680115, |
|
"grad_norm": 20.106291828506194, |
|
"learning_rate": 4.518097934926339e-08, |
|
"logits/chosen": -1.995008111000061, |
|
"logits/rejected": -1.9863529205322266, |
|
"logps/chosen": -1.0165393352508545, |
|
"logps/rejected": -1.1260240077972412, |
|
"loss": 1.2057, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.033078670501709, |
|
"rewards/margins": 0.21896927058696747, |
|
"rewards/rejected": -2.2520480155944824, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.8501440922190202, |
|
"grad_norm": 22.8857527390999, |
|
"learning_rate": 4.505655618438363e-08, |
|
"logits/chosen": -1.9628753662109375, |
|
"logits/rejected": -1.9588840007781982, |
|
"logps/chosen": -1.0615794658660889, |
|
"logps/rejected": -1.1646844148635864, |
|
"loss": 1.2307, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -2.1231589317321777, |
|
"rewards/margins": 0.20620973408222198, |
|
"rewards/rejected": -2.329368829727173, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.8573487031700289, |
|
"grad_norm": 17.434899766590377, |
|
"learning_rate": 4.4930723126421945e-08, |
|
"logits/chosen": -2.0546653270721436, |
|
"logits/rejected": -2.047938585281372, |
|
"logps/chosen": -1.0720479488372803, |
|
"logps/rejected": -1.1471359729766846, |
|
"loss": 1.252, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -2.1440958976745605, |
|
"rewards/margins": 0.15017575025558472, |
|
"rewards/rejected": -2.294271945953369, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.8645533141210374, |
|
"grad_norm": 22.14075015263452, |
|
"learning_rate": 4.48034890209227e-08, |
|
"logits/chosen": -1.983888030052185, |
|
"logits/rejected": -1.9716113805770874, |
|
"logps/chosen": -1.0871121883392334, |
|
"logps/rejected": -1.1737545728683472, |
|
"loss": 1.23, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -2.174224376678467, |
|
"rewards/margins": 0.1732848584651947, |
|
"rewards/rejected": -2.3475091457366943, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.8717579250720461, |
|
"grad_norm": 18.53077050982448, |
|
"learning_rate": 4.4674862811918155e-08, |
|
"logits/chosen": -1.971573829650879, |
|
"logits/rejected": -1.980055570602417, |
|
"logps/chosen": -0.9388012886047363, |
|
"logps/rejected": -1.091797113418579, |
|
"loss": 1.1596, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.8776025772094727, |
|
"rewards/margins": 0.30599164962768555, |
|
"rewards/rejected": -2.183594226837158, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.8789625360230547, |
|
"grad_norm": 17.441252552193376, |
|
"learning_rate": 4.454485354129966e-08, |
|
"logits/chosen": -1.9985713958740234, |
|
"logits/rejected": -1.994210958480835, |
|
"logps/chosen": -1.0104329586029053, |
|
"logps/rejected": -1.11543869972229, |
|
"loss": 1.2194, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -2.0208659172058105, |
|
"rewards/margins": 0.21001139283180237, |
|
"rewards/rejected": -2.23087739944458, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.8861671469740634, |
|
"grad_norm": 17.20275284474546, |
|
"learning_rate": 4.4413470348182124e-08, |
|
"logits/chosen": -1.9755537509918213, |
|
"logits/rejected": -1.9634100198745728, |
|
"logps/chosen": -0.9853811264038086, |
|
"logps/rejected": -1.076774001121521, |
|
"loss": 1.2316, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -1.9707622528076172, |
|
"rewards/margins": 0.18278571963310242, |
|
"rewards/rejected": -2.153548002243042, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.8933717579250721, |
|
"grad_norm": 21.253905408711432, |
|
"learning_rate": 4.42807224682615e-08, |
|
"logits/chosen": -1.9815731048583984, |
|
"logits/rejected": -1.9793331623077393, |
|
"logps/chosen": -0.9373159408569336, |
|
"logps/rejected": -1.0729162693023682, |
|
"loss": 1.181, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -1.8746318817138672, |
|
"rewards/margins": 0.2712007462978363, |
|
"rewards/rejected": -2.1458325386047363, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.9005763688760807, |
|
"grad_norm": 18.803145183231678, |
|
"learning_rate": 4.4146619233165604e-08, |
|
"logits/chosen": -2.0230329036712646, |
|
"logits/rejected": -2.025296688079834, |
|
"logps/chosen": -1.0652821063995361, |
|
"logps/rejected": -1.2190508842468262, |
|
"loss": 1.1677, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -2.1305642127990723, |
|
"rewards/margins": 0.3075374960899353, |
|
"rewards/rejected": -2.4381017684936523, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.9077809798270894, |
|
"grad_norm": 25.018490567837954, |
|
"learning_rate": 4.4011170069798126e-08, |
|
"logits/chosen": -2.020940065383911, |
|
"logits/rejected": -2.025850296020508, |
|
"logps/chosen": -1.1181256771087646, |
|
"logps/rejected": -1.2433640956878662, |
|
"loss": 1.1932, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -2.2362513542175293, |
|
"rewards/margins": 0.25047701597213745, |
|
"rewards/rejected": -2.4867281913757324, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.9149855907780979, |
|
"grad_norm": 18.123087760553187, |
|
"learning_rate": 4.387438449967594e-08, |
|
"logits/chosen": -1.982254981994629, |
|
"logits/rejected": -1.97560715675354, |
|
"logps/chosen": -0.9658223986625671, |
|
"logps/rejected": -1.085925579071045, |
|
"loss": 1.1909, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.9316447973251343, |
|
"rewards/margins": 0.24020643532276154, |
|
"rewards/rejected": -2.17185115814209, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.9221902017291066, |
|
"grad_norm": 21.17056826903978, |
|
"learning_rate": 4.373627213825983e-08, |
|
"logits/chosen": -2.0719313621520996, |
|
"logits/rejected": -2.0676798820495605, |
|
"logps/chosen": -1.0272830724716187, |
|
"logps/rejected": -1.1627672910690308, |
|
"loss": 1.1829, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -2.0545661449432373, |
|
"rewards/margins": 0.27096837759017944, |
|
"rewards/rejected": -2.3255345821380615, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.9293948126801153, |
|
"grad_norm": 16.73743221772608, |
|
"learning_rate": 4.359684269427848e-08, |
|
"logits/chosen": -2.034970760345459, |
|
"logits/rejected": -2.0339713096618652, |
|
"logps/chosen": -0.9956309199333191, |
|
"logps/rejected": -1.0993244647979736, |
|
"loss": 1.2107, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -1.9912618398666382, |
|
"rewards/margins": 0.20738673210144043, |
|
"rewards/rejected": -2.1986489295959473, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.9365994236311239, |
|
"grad_norm": 23.479698749807888, |
|
"learning_rate": 4.34561059690461e-08, |
|
"logits/chosen": -2.079378843307495, |
|
"logits/rejected": -2.0813305377960205, |
|
"logps/chosen": -1.047837495803833, |
|
"logps/rejected": -1.112128496170044, |
|
"loss": 1.2707, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -2.095674991607666, |
|
"rewards/margins": 0.12858203053474426, |
|
"rewards/rejected": -2.224256992340088, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.9438040345821326, |
|
"grad_norm": 21.373476828454745, |
|
"learning_rate": 4.3314071855773314e-08, |
|
"logits/chosen": -2.044544219970703, |
|
"logits/rejected": -2.0450897216796875, |
|
"logps/chosen": -0.9845747947692871, |
|
"logps/rejected": -1.0792670249938965, |
|
"loss": 1.2235, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -1.9691495895385742, |
|
"rewards/margins": 0.18938450515270233, |
|
"rewards/rejected": -2.158534049987793, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.9510086455331412, |
|
"grad_norm": 20.390675123621403, |
|
"learning_rate": 4.3170750338871806e-08, |
|
"logits/chosen": -2.0153450965881348, |
|
"logits/rejected": -2.008953094482422, |
|
"logps/chosen": -1.0770236253738403, |
|
"logps/rejected": -1.2197729349136353, |
|
"loss": 1.1662, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -2.1540472507476807, |
|
"rewards/margins": 0.2854984402656555, |
|
"rewards/rejected": -2.4395458698272705, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.9582132564841499, |
|
"grad_norm": 14.835531781677203, |
|
"learning_rate": 4.3026151493252414e-08, |
|
"logits/chosen": -2.039367437362671, |
|
"logits/rejected": -2.0349154472351074, |
|
"logps/chosen": -1.0609954595565796, |
|
"logps/rejected": -1.1818583011627197, |
|
"loss": 1.2003, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -2.121990919113159, |
|
"rewards/margins": 0.24172568321228027, |
|
"rewards/rejected": -2.3637166023254395, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.9654178674351584, |
|
"grad_norm": 25.71038185604989, |
|
"learning_rate": 4.2880285483616895e-08, |
|
"logits/chosen": -2.0069704055786133, |
|
"logits/rejected": -2.007664680480957, |
|
"logps/chosen": -1.0175052881240845, |
|
"logps/rejected": -1.1325770616531372, |
|
"loss": 1.2093, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -2.035010576248169, |
|
"rewards/margins": 0.2301437109708786, |
|
"rewards/rejected": -2.2651541233062744, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.9726224783861671, |
|
"grad_norm": 16.092608904878997, |
|
"learning_rate": 4.273316256374342e-08, |
|
"logits/chosen": -1.9464366436004639, |
|
"logits/rejected": -1.9446899890899658, |
|
"logps/chosen": -1.01396644115448, |
|
"logps/rejected": -1.0869011878967285, |
|
"loss": 1.264, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -2.02793288230896, |
|
"rewards/margins": 0.1458693891763687, |
|
"rewards/rejected": -2.173802375793457, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.9798270893371758, |
|
"grad_norm": 16.212857235886922, |
|
"learning_rate": 4.258479307576576e-08, |
|
"logits/chosen": -1.9840402603149414, |
|
"logits/rejected": -1.9818894863128662, |
|
"logps/chosen": -0.9638694524765015, |
|
"logps/rejected": -1.0546468496322632, |
|
"loss": 1.24, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -1.927738904953003, |
|
"rewards/margins": 0.18155473470687866, |
|
"rewards/rejected": -2.1092936992645264, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.9870317002881844, |
|
"grad_norm": 21.341000872382455, |
|
"learning_rate": 4.243518744944626e-08, |
|
"logits/chosen": -2.0093555450439453, |
|
"logits/rejected": -2.0047600269317627, |
|
"logps/chosen": -1.0009874105453491, |
|
"logps/rejected": -1.1216598749160767, |
|
"loss": 1.1889, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -2.0019748210906982, |
|
"rewards/margins": 0.24134452641010284, |
|
"rewards/rejected": -2.2433197498321533, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.9942363112391931, |
|
"grad_norm": 20.83793747644969, |
|
"learning_rate": 4.22843562014427e-08, |
|
"logits/chosen": -1.9709367752075195, |
|
"logits/rejected": -1.9672348499298096, |
|
"logps/chosen": -1.0514830350875854, |
|
"logps/rejected": -1.1256954669952393, |
|
"loss": 1.2503, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -2.102966070175171, |
|
"rewards/margins": 0.14842486381530762, |
|
"rewards/rejected": -2.2513909339904785, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 1.0014409221902016, |
|
"grad_norm": 28.539886189287515, |
|
"learning_rate": 4.2132309934569e-08, |
|
"logits/chosen": -2.051409959793091, |
|
"logits/rejected": -2.051856517791748, |
|
"logps/chosen": -1.015867829322815, |
|
"logps/rejected": -1.128615379333496, |
|
"loss": 1.2103, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.03173565864563, |
|
"rewards/margins": 0.22549493610858917, |
|
"rewards/rejected": -2.257230758666992, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 1.0086455331412103, |
|
"grad_norm": 18.4548159325349, |
|
"learning_rate": 4.197905933704989e-08, |
|
"logits/chosen": -1.9460541009902954, |
|
"logits/rejected": -1.9434579610824585, |
|
"logps/chosen": -1.0608787536621094, |
|
"logps/rejected": -1.1942651271820068, |
|
"loss": 1.2017, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -2.1217575073242188, |
|
"rewards/margins": 0.2667728066444397, |
|
"rewards/rejected": -2.3885302543640137, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.015850144092219, |
|
"grad_norm": 23.677146712392545, |
|
"learning_rate": 4.1824615181769577e-08, |
|
"logits/chosen": -1.992706060409546, |
|
"logits/rejected": -1.9971202611923218, |
|
"logps/chosen": -1.0128545761108398, |
|
"logps/rejected": -1.138115644454956, |
|
"loss": 1.2035, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -2.0257091522216797, |
|
"rewards/margins": 0.2505221366882324, |
|
"rewards/rejected": -2.276231288909912, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 1.0230547550432276, |
|
"grad_norm": 18.466913113268376, |
|
"learning_rate": 4.1668988325514434e-08, |
|
"logits/chosen": -2.0149149894714355, |
|
"logits/rejected": -2.0098109245300293, |
|
"logps/chosen": -1.1170905828475952, |
|
"logps/rejected": -1.2321112155914307, |
|
"loss": 1.2252, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -2.2341811656951904, |
|
"rewards/margins": 0.2300410270690918, |
|
"rewards/rejected": -2.4642224311828613, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 1.0302593659942363, |
|
"grad_norm": 20.916480925982736, |
|
"learning_rate": 4.1512189708209844e-08, |
|
"logits/chosen": -2.0576863288879395, |
|
"logits/rejected": -2.0563559532165527, |
|
"logps/chosen": -0.9412269592285156, |
|
"logps/rejected": -1.0276962518692017, |
|
"loss": 1.2464, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -1.8824539184570312, |
|
"rewards/margins": 0.17293845117092133, |
|
"rewards/rejected": -2.0553925037384033, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 1.037463976945245, |
|
"grad_norm": 22.10230375057076, |
|
"learning_rate": 4.1354230352151143e-08, |
|
"logits/chosen": -2.009265661239624, |
|
"logits/rejected": -2.002540111541748, |
|
"logps/chosen": -1.138351559638977, |
|
"logps/rejected": -1.2199509143829346, |
|
"loss": 1.2585, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -2.276703119277954, |
|
"rewards/margins": 0.16319862008094788, |
|
"rewards/rejected": -2.439901828765869, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 1.0446685878962536, |
|
"grad_norm": 16.842031017248782, |
|
"learning_rate": 4.119512136122882e-08, |
|
"logits/chosen": -2.0774807929992676, |
|
"logits/rejected": -2.086643695831299, |
|
"logps/chosen": -0.9951038360595703, |
|
"logps/rejected": -1.1451139450073242, |
|
"loss": 1.1708, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.9902076721191406, |
|
"rewards/margins": 0.30002015829086304, |
|
"rewards/rejected": -2.2902278900146484, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.0518731988472623, |
|
"grad_norm": 15.427164808054908, |
|
"learning_rate": 4.103487392014795e-08, |
|
"logits/chosen": -1.992767095565796, |
|
"logits/rejected": -1.980544090270996, |
|
"logps/chosen": -1.0006814002990723, |
|
"logps/rejected": -1.15886390209198, |
|
"loss": 1.1455, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -2.0013628005981445, |
|
"rewards/margins": 0.31636515259742737, |
|
"rewards/rejected": -2.31772780418396, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 1.059077809798271, |
|
"grad_norm": 16.81042888795935, |
|
"learning_rate": 4.087349929364192e-08, |
|
"logits/chosen": -2.034682273864746, |
|
"logits/rejected": -2.0252864360809326, |
|
"logps/chosen": -0.9601753354072571, |
|
"logps/rejected": -1.0913857221603394, |
|
"loss": 1.1863, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -1.9203506708145142, |
|
"rewards/margins": 0.26242080330848694, |
|
"rewards/rejected": -2.1827714443206787, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 1.0662824207492796, |
|
"grad_norm": 17.584619579081235, |
|
"learning_rate": 4.0711008825680645e-08, |
|
"logits/chosen": -1.979069709777832, |
|
"logits/rejected": -1.97795832157135, |
|
"logps/chosen": -1.0063145160675049, |
|
"logps/rejected": -1.1248080730438232, |
|
"loss": 1.2064, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -2.0126290321350098, |
|
"rewards/margins": 0.2369869500398636, |
|
"rewards/rejected": -2.2496161460876465, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 1.0734870317002883, |
|
"grad_norm": 19.98068478862068, |
|
"learning_rate": 4.054741393867306e-08, |
|
"logits/chosen": -1.99558424949646, |
|
"logits/rejected": -1.9926925897598267, |
|
"logps/chosen": -1.1117796897888184, |
|
"logps/rejected": -1.1623036861419678, |
|
"loss": 1.2882, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -2.2235593795776367, |
|
"rewards/margins": 0.10104763507843018, |
|
"rewards/rejected": -2.3246073722839355, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 1.080691642651297, |
|
"grad_norm": 18.569188294062595, |
|
"learning_rate": 4.038272613266419e-08, |
|
"logits/chosen": -1.9959461688995361, |
|
"logits/rejected": -1.9826500415802002, |
|
"logps/chosen": -1.0095350742340088, |
|
"logps/rejected": -1.1202278137207031, |
|
"loss": 1.2023, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -2.0190701484680176, |
|
"rewards/margins": 0.22138550877571106, |
|
"rewards/rejected": -2.2404556274414062, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.0878962536023056, |
|
"grad_norm": 18.129783454014866, |
|
"learning_rate": 4.0216956984526784e-08, |
|
"logits/chosen": -2.04606032371521, |
|
"logits/rejected": -2.047947406768799, |
|
"logps/chosen": -1.0161449909210205, |
|
"logps/rejected": -1.124267339706421, |
|
"loss": 1.2167, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -2.032289981842041, |
|
"rewards/margins": 0.21624493598937988, |
|
"rewards/rejected": -2.248534679412842, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 1.0951008645533142, |
|
"grad_norm": 16.171374987629033, |
|
"learning_rate": 4.0050118147147446e-08, |
|
"logits/chosen": -1.9890464544296265, |
|
"logits/rejected": -1.989335298538208, |
|
"logps/chosen": -1.0982977151870728, |
|
"logps/rejected": -1.110621690750122, |
|
"loss": 1.3393, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -2.1965954303741455, |
|
"rewards/margins": 0.02464829757809639, |
|
"rewards/rejected": -2.221243381500244, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 1.1023054755043227, |
|
"grad_norm": 17.66132069219183, |
|
"learning_rate": 3.988222134860755e-08, |
|
"logits/chosen": -2.0323548316955566, |
|
"logits/rejected": -2.0236430168151855, |
|
"logps/chosen": -0.9508152008056641, |
|
"logps/rejected": -1.1158647537231445, |
|
"loss": 1.1407, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.9016304016113281, |
|
"rewards/margins": 0.33009934425354004, |
|
"rewards/rejected": -2.231729507446289, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 1.1095100864553313, |
|
"grad_norm": 23.676130358664636, |
|
"learning_rate": 3.9713278391358724e-08, |
|
"logits/chosen": -2.0360183715820312, |
|
"logits/rejected": -2.0298333168029785, |
|
"logps/chosen": -1.025137186050415, |
|
"logps/rejected": -1.1484403610229492, |
|
"loss": 1.1877, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -2.05027437210083, |
|
"rewards/margins": 0.24660632014274597, |
|
"rewards/rejected": -2.2968807220458984, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 1.11671469740634, |
|
"grad_norm": 17.77840056029204, |
|
"learning_rate": 3.954330115139328e-08, |
|
"logits/chosen": -2.0122570991516113, |
|
"logits/rejected": -2.0070974826812744, |
|
"logps/chosen": -1.0277431011199951, |
|
"logps/rejected": -1.1330978870391846, |
|
"loss": 1.2216, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -2.0554862022399902, |
|
"rewards/margins": 0.21070995926856995, |
|
"rewards/rejected": -2.266195774078369, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.1239193083573487, |
|
"grad_norm": 25.812098081681867, |
|
"learning_rate": 3.937230157740931e-08, |
|
"logits/chosen": -2.067347764968872, |
|
"logits/rejected": -2.0611376762390137, |
|
"logps/chosen": -1.0478734970092773, |
|
"logps/rejected": -1.1832859516143799, |
|
"loss": 1.1824, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -2.0957469940185547, |
|
"rewards/margins": 0.2708250880241394, |
|
"rewards/rejected": -2.3665719032287598, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 1.1311239193083573, |
|
"grad_norm": 16.22328310375803, |
|
"learning_rate": 3.920029168997077e-08, |
|
"logits/chosen": -2.04835844039917, |
|
"logits/rejected": -2.04648494720459, |
|
"logps/chosen": -1.0037837028503418, |
|
"logps/rejected": -1.131502628326416, |
|
"loss": 1.1863, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.0075674057006836, |
|
"rewards/margins": 0.25543779134750366, |
|
"rewards/rejected": -2.263005256652832, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 1.138328530259366, |
|
"grad_norm": 29.81353401958458, |
|
"learning_rate": 3.9027283580662476e-08, |
|
"logits/chosen": -2.0225307941436768, |
|
"logits/rejected": -2.0166521072387695, |
|
"logps/chosen": -1.0478241443634033, |
|
"logps/rejected": -1.1936235427856445, |
|
"loss": 1.1765, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -2.0956482887268066, |
|
"rewards/margins": 0.2915985882282257, |
|
"rewards/rejected": -2.387247085571289, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 1.1455331412103746, |
|
"grad_norm": 16.941588748106863, |
|
"learning_rate": 3.885328941124014e-08, |
|
"logits/chosen": -1.991965889930725, |
|
"logits/rejected": -1.9873225688934326, |
|
"logps/chosen": -0.9666957855224609, |
|
"logps/rejected": -1.1006277799606323, |
|
"loss": 1.1706, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.9333915710449219, |
|
"rewards/margins": 0.26786428689956665, |
|
"rewards/rejected": -2.2012555599212646, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 1.1527377521613833, |
|
"grad_norm": 20.82364621838478, |
|
"learning_rate": 3.867832141277539e-08, |
|
"logits/chosen": -2.0321202278137207, |
|
"logits/rejected": -2.0232601165771484, |
|
"logps/chosen": -1.0682156085968018, |
|
"logps/rejected": -1.180410623550415, |
|
"loss": 1.2096, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -2.1364312171936035, |
|
"rewards/margins": 0.2243901491165161, |
|
"rewards/rejected": -2.36082124710083, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.159942363112392, |
|
"grad_norm": 20.912686096120964, |
|
"learning_rate": 3.850239188479606e-08, |
|
"logits/chosen": -1.9847033023834229, |
|
"logits/rejected": -1.9881378412246704, |
|
"logps/chosen": -1.0096313953399658, |
|
"logps/rejected": -1.1002733707427979, |
|
"loss": 1.2372, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -2.0192627906799316, |
|
"rewards/margins": 0.18128342926502228, |
|
"rewards/rejected": -2.2005467414855957, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 1.1671469740634006, |
|
"grad_norm": 21.899733424702635, |
|
"learning_rate": 3.832551319442151e-08, |
|
"logits/chosen": -2.0586349964141846, |
|
"logits/rejected": -2.059906482696533, |
|
"logps/chosen": -1.057755708694458, |
|
"logps/rejected": -1.184890627861023, |
|
"loss": 1.1897, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.115511417388916, |
|
"rewards/margins": 0.254270076751709, |
|
"rewards/rejected": -2.369781255722046, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 1.1743515850144093, |
|
"grad_norm": 17.2910410178799, |
|
"learning_rate": 3.81476977754933e-08, |
|
"logits/chosen": -1.9560763835906982, |
|
"logits/rejected": -1.9524368047714233, |
|
"logps/chosen": -1.0269958972930908, |
|
"logps/rejected": -1.0969812870025635, |
|
"loss": 1.2579, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -2.0539917945861816, |
|
"rewards/margins": 0.13997015357017517, |
|
"rewards/rejected": -2.193962574005127, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 1.181556195965418, |
|
"grad_norm": 16.85537517324203, |
|
"learning_rate": 3.796895812770114e-08, |
|
"logits/chosen": -1.9784526824951172, |
|
"logits/rejected": -1.9793262481689453, |
|
"logps/chosen": -1.0173704624176025, |
|
"logps/rejected": -1.1091585159301758, |
|
"loss": 1.2408, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -2.034740924835205, |
|
"rewards/margins": 0.1835760474205017, |
|
"rewards/rejected": -2.2183170318603516, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 1.1887608069164266, |
|
"grad_norm": 22.175531020521074, |
|
"learning_rate": 3.7789306815704216e-08, |
|
"logits/chosen": -2.009108781814575, |
|
"logits/rejected": -2.006824254989624, |
|
"logps/chosen": -1.0072455406188965, |
|
"logps/rejected": -1.0781667232513428, |
|
"loss": 1.2618, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -2.014491081237793, |
|
"rewards/margins": 0.14184223115444183, |
|
"rewards/rejected": -2.1563334465026855, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.195965417867435, |
|
"grad_norm": 18.89404553225258, |
|
"learning_rate": 3.760875646824795e-08, |
|
"logits/chosen": -1.932428002357483, |
|
"logits/rejected": -1.9363291263580322, |
|
"logps/chosen": -0.9747514724731445, |
|
"logps/rejected": -1.0793020725250244, |
|
"loss": 1.2232, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -1.949502944946289, |
|
"rewards/margins": 0.20910124480724335, |
|
"rewards/rejected": -2.158604145050049, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 1.2031700288184437, |
|
"grad_norm": 22.227342153467788, |
|
"learning_rate": 3.742731977727623e-08, |
|
"logits/chosen": -2.0331270694732666, |
|
"logits/rejected": -2.0301709175109863, |
|
"logps/chosen": -1.0413671731948853, |
|
"logps/rejected": -1.1772552728652954, |
|
"loss": 1.1795, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -2.0827343463897705, |
|
"rewards/margins": 0.27177631855010986, |
|
"rewards/rejected": -2.354510545730591, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 1.2103746397694524, |
|
"grad_norm": 19.70333261721218, |
|
"learning_rate": 3.7245009497039244e-08, |
|
"logits/chosen": -1.970715880393982, |
|
"logits/rejected": -1.9627761840820312, |
|
"logps/chosen": -1.0116103887557983, |
|
"logps/rejected": -1.1484659910202026, |
|
"loss": 1.1716, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -2.0232207775115967, |
|
"rewards/margins": 0.2737112045288086, |
|
"rewards/rejected": -2.2969319820404053, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 1.217579250720461, |
|
"grad_norm": 18.86128397711634, |
|
"learning_rate": 3.7061838443196886e-08, |
|
"logits/chosen": -2.0151665210723877, |
|
"logits/rejected": -2.016679048538208, |
|
"logps/chosen": -1.026761531829834, |
|
"logps/rejected": -1.150320053100586, |
|
"loss": 1.1888, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -2.053523063659668, |
|
"rewards/margins": 0.2471170723438263, |
|
"rewards/rejected": -2.300640106201172, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 1.2247838616714697, |
|
"grad_norm": 22.70930817597516, |
|
"learning_rate": 3.68778194919179e-08, |
|
"logits/chosen": -1.983304738998413, |
|
"logits/rejected": -1.984287977218628, |
|
"logps/chosen": -1.0792837142944336, |
|
"logps/rejected": -1.2009527683258057, |
|
"loss": 1.1955, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -2.158567428588867, |
|
"rewards/margins": 0.24333825707435608, |
|
"rewards/rejected": -2.4019055366516113, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.2319884726224783, |
|
"grad_norm": 20.280682845222326, |
|
"learning_rate": 3.66929655789747e-08, |
|
"logits/chosen": -2.0337467193603516, |
|
"logits/rejected": -2.0225701332092285, |
|
"logps/chosen": -0.9402590990066528, |
|
"logps/rejected": -1.0919773578643799, |
|
"loss": 1.1634, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -1.8805181980133057, |
|
"rewards/margins": 0.30343663692474365, |
|
"rewards/rejected": -2.1839547157287598, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 1.239193083573487, |
|
"grad_norm": 16.359613747281564, |
|
"learning_rate": 3.6507289698834064e-08, |
|
"logits/chosen": -1.9774224758148193, |
|
"logits/rejected": -1.973842978477478, |
|
"logps/chosen": -0.98408442735672, |
|
"logps/rejected": -1.1161837577819824, |
|
"loss": 1.196, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -1.96816885471344, |
|
"rewards/margins": 0.2641984820365906, |
|
"rewards/rejected": -2.232367515563965, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 1.2463976945244957, |
|
"grad_norm": 25.191044914408238, |
|
"learning_rate": 3.6320804903743684e-08, |
|
"logits/chosen": -2.026642322540283, |
|
"logits/rejected": -2.0262598991394043, |
|
"logps/chosen": -1.0340476036071777, |
|
"logps/rejected": -1.1598145961761475, |
|
"loss": 1.1976, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -2.0680952072143555, |
|
"rewards/margins": 0.251534104347229, |
|
"rewards/rejected": -2.319629192352295, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 1.2536023054755043, |
|
"grad_norm": 17.23248745457562, |
|
"learning_rate": 3.61335243028146e-08, |
|
"logits/chosen": -2.0114941596984863, |
|
"logits/rejected": -2.016153573989868, |
|
"logps/chosen": -1.092045783996582, |
|
"logps/rejected": -1.2228668928146362, |
|
"loss": 1.1916, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -2.184091567993164, |
|
"rewards/margins": 0.2616419494152069, |
|
"rewards/rejected": -2.4457337856292725, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 1.260806916426513, |
|
"grad_norm": 18.437527072676268, |
|
"learning_rate": 3.5945461061099736e-08, |
|
"logits/chosen": -1.972100853919983, |
|
"logits/rejected": -1.9586093425750732, |
|
"logps/chosen": -1.0443975925445557, |
|
"logps/rejected": -1.1218501329421997, |
|
"loss": 1.2706, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -2.0887951850891113, |
|
"rewards/margins": 0.15490522980690002, |
|
"rewards/rejected": -2.2437002658843994, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.2680115273775217, |
|
"grad_norm": 19.695402848445642, |
|
"learning_rate": 3.5756628398668446e-08, |
|
"logits/chosen": -2.0573649406433105, |
|
"logits/rejected": -2.0625429153442383, |
|
"logps/chosen": -1.1325814723968506, |
|
"logps/rejected": -1.232399821281433, |
|
"loss": 1.2403, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -2.265162944793701, |
|
"rewards/margins": 0.19963672757148743, |
|
"rewards/rejected": -2.464799642562866, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 1.2752161383285303, |
|
"grad_norm": 17.758331420145563, |
|
"learning_rate": 3.556703958967716e-08, |
|
"logits/chosen": -2.041581630706787, |
|
"logits/rejected": -2.036958694458008, |
|
"logps/chosen": -1.0513150691986084, |
|
"logps/rejected": -1.1853423118591309, |
|
"loss": 1.1882, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -2.102630138397217, |
|
"rewards/margins": 0.2680542469024658, |
|
"rewards/rejected": -2.3706846237182617, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 1.282420749279539, |
|
"grad_norm": 24.11832525210908, |
|
"learning_rate": 3.5376707961436297e-08, |
|
"logits/chosen": -2.025054454803467, |
|
"logits/rejected": -2.019120693206787, |
|
"logps/chosen": -1.1408239603042603, |
|
"logps/rejected": -1.202470064163208, |
|
"loss": 1.2726, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -2.2816479206085205, |
|
"rewards/margins": 0.12329187244176865, |
|
"rewards/rejected": -2.404940128326416, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 1.2896253602305476, |
|
"grad_norm": 12.89708045158757, |
|
"learning_rate": 3.51856468934734e-08, |
|
"logits/chosen": -1.9773098230361938, |
|
"logits/rejected": -1.9786754846572876, |
|
"logps/chosen": -0.9762522578239441, |
|
"logps/rejected": -1.0697864294052124, |
|
"loss": 1.2234, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -1.9525045156478882, |
|
"rewards/margins": 0.18706828355789185, |
|
"rewards/rejected": -2.139572858810425, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 1.2968299711815563, |
|
"grad_norm": 20.10190857160128, |
|
"learning_rate": 3.499386981659262e-08, |
|
"logits/chosen": -2.0595974922180176, |
|
"logits/rejected": -2.0540311336517334, |
|
"logps/chosen": -1.0190843343734741, |
|
"logps/rejected": -1.208898901939392, |
|
"loss": 1.1243, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -2.0381686687469482, |
|
"rewards/margins": 0.3796289563179016, |
|
"rewards/rejected": -2.417797803878784, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.304034582132565, |
|
"grad_norm": 20.878615577501385, |
|
"learning_rate": 3.480139021193057e-08, |
|
"logits/chosen": -1.9839977025985718, |
|
"logits/rejected": -1.9858938455581665, |
|
"logps/chosen": -0.9964865446090698, |
|
"logps/rejected": -1.1170662641525269, |
|
"loss": 1.212, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -1.9929730892181396, |
|
"rewards/margins": 0.24115952849388123, |
|
"rewards/rejected": -2.2341325283050537, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 1.3112391930835736, |
|
"grad_norm": 28.47013732688272, |
|
"learning_rate": 3.4608221610008666e-08, |
|
"logits/chosen": -2.0153214931488037, |
|
"logits/rejected": -2.010758876800537, |
|
"logps/chosen": -0.9736091494560242, |
|
"logps/rejected": -1.120499849319458, |
|
"loss": 1.1707, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -1.9472182989120483, |
|
"rewards/margins": 0.29378125071525574, |
|
"rewards/rejected": -2.240999698638916, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 1.318443804034582, |
|
"grad_norm": 15.221657015785182, |
|
"learning_rate": 3.4414377589782e-08, |
|
"logits/chosen": -1.9868896007537842, |
|
"logits/rejected": -1.9957456588745117, |
|
"logps/chosen": -1.0180634260177612, |
|
"logps/rejected": -1.150467872619629, |
|
"loss": 1.1966, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -2.0361268520355225, |
|
"rewards/margins": 0.26480910181999207, |
|
"rewards/rejected": -2.300935745239258, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 1.3256484149855907, |
|
"grad_norm": 18.190653029469026, |
|
"learning_rate": 3.4219871777684745e-08, |
|
"logits/chosen": -1.9982116222381592, |
|
"logits/rejected": -1.9859825372695923, |
|
"logps/chosen": -0.9929243922233582, |
|
"logps/rejected": -1.1142441034317017, |
|
"loss": 1.2076, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -1.9858487844467163, |
|
"rewards/margins": 0.24263925850391388, |
|
"rewards/rejected": -2.2284882068634033, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 1.3328530259365994, |
|
"grad_norm": 17.791029774645512, |
|
"learning_rate": 3.4024717846672364e-08, |
|
"logits/chosen": -2.0318691730499268, |
|
"logits/rejected": -2.025087833404541, |
|
"logps/chosen": -0.9934013485908508, |
|
"logps/rejected": -1.1215975284576416, |
|
"loss": 1.1959, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -1.9868026971817017, |
|
"rewards/margins": 0.25639256834983826, |
|
"rewards/rejected": -2.243195056915283, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.340057636887608, |
|
"grad_norm": 17.7783196169273, |
|
"learning_rate": 3.382892951526036e-08, |
|
"logits/chosen": -2.0219979286193848, |
|
"logits/rejected": -2.0191798210144043, |
|
"logps/chosen": -1.0518848896026611, |
|
"logps/rejected": -1.20078444480896, |
|
"loss": 1.1628, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -2.1037697792053223, |
|
"rewards/margins": 0.2977990508079529, |
|
"rewards/rejected": -2.40156888961792, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 1.3472622478386167, |
|
"grad_norm": 20.2114199388819, |
|
"learning_rate": 3.3632520546559974e-08, |
|
"logits/chosen": -1.9855273962020874, |
|
"logits/rejected": -1.9737205505371094, |
|
"logps/chosen": -0.926679253578186, |
|
"logps/rejected": -1.0955464839935303, |
|
"loss": 1.1271, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.853358507156372, |
|
"rewards/margins": 0.3377344310283661, |
|
"rewards/rejected": -2.1910929679870605, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 1.3544668587896254, |
|
"grad_norm": 19.579421951203443, |
|
"learning_rate": 3.34355047473107e-08, |
|
"logits/chosen": -1.9991194009780884, |
|
"logits/rejected": -1.9949671030044556, |
|
"logps/chosen": -1.0290135145187378, |
|
"logps/rejected": -1.1186621189117432, |
|
"loss": 1.2445, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -2.0580270290374756, |
|
"rewards/margins": 0.1792970895767212, |
|
"rewards/rejected": -2.2373242378234863, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 1.361671469740634, |
|
"grad_norm": 22.936331468503273, |
|
"learning_rate": 3.323789596690971e-08, |
|
"logits/chosen": -1.966144323348999, |
|
"logits/rejected": -1.9670454263687134, |
|
"logps/chosen": -1.0209920406341553, |
|
"logps/rejected": -1.1551681756973267, |
|
"loss": 1.1787, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -2.0419840812683105, |
|
"rewards/margins": 0.26835212111473083, |
|
"rewards/rejected": -2.3103363513946533, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 1.3688760806916427, |
|
"grad_norm": 15.801647380635032, |
|
"learning_rate": 3.303970809643828e-08, |
|
"logits/chosen": -1.998286247253418, |
|
"logits/rejected": -2.0028045177459717, |
|
"logps/chosen": -1.0353937149047852, |
|
"logps/rejected": -1.1643174886703491, |
|
"loss": 1.1927, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -2.0707874298095703, |
|
"rewards/margins": 0.25784778594970703, |
|
"rewards/rejected": -2.3286349773406982, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.3760806916426513, |
|
"grad_norm": 20.693514419325513, |
|
"learning_rate": 3.2840955067685356e-08, |
|
"logits/chosen": -2.031480312347412, |
|
"logits/rejected": -2.035548686981201, |
|
"logps/chosen": -1.0550123453140259, |
|
"logps/rejected": -1.2029016017913818, |
|
"loss": 1.1631, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -2.1100246906280518, |
|
"rewards/margins": 0.29577863216400146, |
|
"rewards/rejected": -2.4058032035827637, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 1.38328530259366, |
|
"grad_norm": 16.858093329362955, |
|
"learning_rate": 3.264165085216817e-08, |
|
"logits/chosen": -2.038879871368408, |
|
"logits/rejected": -2.0388429164886475, |
|
"logps/chosen": -0.9352089166641235, |
|
"logps/rejected": -1.10355544090271, |
|
"loss": 1.1401, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -1.870417833328247, |
|
"rewards/margins": 0.33669325709342957, |
|
"rewards/rejected": -2.20711088180542, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 1.3904899135446687, |
|
"grad_norm": 18.690349536010206, |
|
"learning_rate": 3.244180946015008e-08, |
|
"logits/chosen": -1.966835618019104, |
|
"logits/rejected": -1.967462182044983, |
|
"logps/chosen": -1.0351486206054688, |
|
"logps/rejected": -1.0991723537445068, |
|
"loss": 1.273, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -2.0702972412109375, |
|
"rewards/margins": 0.12804751098155975, |
|
"rewards/rejected": -2.1983447074890137, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 1.397694524495677, |
|
"grad_norm": 15.348372078288971, |
|
"learning_rate": 3.224144493965578e-08, |
|
"logits/chosen": -2.0523886680603027, |
|
"logits/rejected": -2.0558857917785645, |
|
"logps/chosen": -0.9908173680305481, |
|
"logps/rejected": -1.0954809188842773, |
|
"loss": 1.2177, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.9816347360610962, |
|
"rewards/margins": 0.20932729542255402, |
|
"rewards/rejected": -2.1909618377685547, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 1.4048991354466858, |
|
"grad_norm": 17.879874010257755, |
|
"learning_rate": 3.204057137548371e-08, |
|
"logits/chosen": -2.014993667602539, |
|
"logits/rejected": -2.0096094608306885, |
|
"logps/chosen": -0.9776601791381836, |
|
"logps/rejected": -1.0827131271362305, |
|
"loss": 1.2165, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -1.9553203582763672, |
|
"rewards/margins": 0.2101059854030609, |
|
"rewards/rejected": -2.165426254272461, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 1.4121037463976944, |
|
"grad_norm": 19.498418734777132, |
|
"learning_rate": 3.183920288821597e-08, |
|
"logits/chosen": -1.9974403381347656, |
|
"logits/rejected": -1.9938675165176392, |
|
"logps/chosen": -1.002251386642456, |
|
"logps/rejected": -1.1633012294769287, |
|
"loss": 1.1473, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -2.004502773284912, |
|
"rewards/margins": 0.32209956645965576, |
|
"rewards/rejected": -2.3266024589538574, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 1.419308357348703, |
|
"grad_norm": 23.526801289262714, |
|
"learning_rate": 3.1637353633225735e-08, |
|
"logits/chosen": -2.0382392406463623, |
|
"logits/rejected": -2.0321145057678223, |
|
"logps/chosen": -1.0285365581512451, |
|
"logps/rejected": -1.1749916076660156, |
|
"loss": 1.1708, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -2.0570731163024902, |
|
"rewards/margins": 0.2929099202156067, |
|
"rewards/rejected": -2.3499832153320312, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 1.4265129682997117, |
|
"grad_norm": 19.581143803282398, |
|
"learning_rate": 3.143503779968213e-08, |
|
"logits/chosen": -2.0113444328308105, |
|
"logits/rejected": -2.011580467224121, |
|
"logps/chosen": -1.0159164667129517, |
|
"logps/rejected": -1.1518559455871582, |
|
"loss": 1.196, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -2.0318329334259033, |
|
"rewards/margins": 0.2718789875507355, |
|
"rewards/rejected": -2.3037118911743164, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 1.4337175792507204, |
|
"grad_norm": 18.05404403193421, |
|
"learning_rate": 3.1232269609552875e-08, |
|
"logits/chosen": -1.9945173263549805, |
|
"logits/rejected": -1.9919058084487915, |
|
"logps/chosen": -0.9980915188789368, |
|
"logps/rejected": -1.1186559200286865, |
|
"loss": 1.2014, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -1.9961830377578735, |
|
"rewards/margins": 0.24112899601459503, |
|
"rewards/rejected": -2.237311840057373, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 1.440922190201729, |
|
"grad_norm": 16.09307467422962, |
|
"learning_rate": 3.102906331660444e-08, |
|
"logits/chosen": -2.0536019802093506, |
|
"logits/rejected": -2.045327663421631, |
|
"logps/chosen": -0.9929038882255554, |
|
"logps/rejected": -1.1643650531768799, |
|
"loss": 1.1353, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.9858077764511108, |
|
"rewards/margins": 0.34292247891426086, |
|
"rewards/rejected": -2.3287301063537598, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.4481268011527377, |
|
"grad_norm": 16.081259631225404, |
|
"learning_rate": 3.082543320540015e-08, |
|
"logits/chosen": -1.9962193965911865, |
|
"logits/rejected": -1.9890375137329102, |
|
"logps/chosen": -1.0065879821777344, |
|
"logps/rejected": -1.1499404907226562, |
|
"loss": 1.1679, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -2.0131759643554688, |
|
"rewards/margins": 0.2867050766944885, |
|
"rewards/rejected": -2.2998809814453125, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 1.4553314121037464, |
|
"grad_norm": 18.028275293304183, |
|
"learning_rate": 3.062139359029599e-08, |
|
"logits/chosen": -2.031736373901367, |
|
"logits/rejected": -2.0316202640533447, |
|
"logps/chosen": -1.0291239023208618, |
|
"logps/rejected": -1.1133326292037964, |
|
"loss": 1.2476, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -2.0582478046417236, |
|
"rewards/margins": 0.16841746866703033, |
|
"rewards/rejected": -2.2266652584075928, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 1.462536023054755, |
|
"grad_norm": 18.739986191205507, |
|
"learning_rate": 3.041695881443437e-08, |
|
"logits/chosen": -2.0472700595855713, |
|
"logits/rejected": -2.0425424575805664, |
|
"logps/chosen": -0.9730477333068848, |
|
"logps/rejected": -1.1086480617523193, |
|
"loss": 1.1771, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.9460954666137695, |
|
"rewards/margins": 0.271200567483902, |
|
"rewards/rejected": -2.2172961235046387, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 1.4697406340057637, |
|
"grad_norm": 22.13845084834241, |
|
"learning_rate": 3.0212143248735886e-08, |
|
"logits/chosen": -2.0294270515441895, |
|
"logits/rejected": -2.029846668243408, |
|
"logps/chosen": -0.9991294741630554, |
|
"logps/rejected": -1.1360986232757568, |
|
"loss": 1.1761, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -1.9982589483261108, |
|
"rewards/margins": 0.27393826842308044, |
|
"rewards/rejected": -2.2721972465515137, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 1.4769452449567724, |
|
"grad_norm": 19.822466302624346, |
|
"learning_rate": 3.0006961290889077e-08, |
|
"logits/chosen": -2.0190889835357666, |
|
"logits/rejected": -2.0099661350250244, |
|
"logps/chosen": -1.1185331344604492, |
|
"logps/rejected": -1.286892056465149, |
|
"loss": 1.1647, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.2370662689208984, |
|
"rewards/margins": 0.33671754598617554, |
|
"rewards/rejected": -2.573784112930298, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 1.484149855907781, |
|
"grad_norm": 21.925715491881135, |
|
"learning_rate": 2.980142736433833e-08, |
|
"logits/chosen": -2.01119327545166, |
|
"logits/rejected": -2.004316806793213, |
|
"logps/chosen": -1.0309051275253296, |
|
"logps/rejected": -1.0949158668518066, |
|
"loss": 1.2751, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -2.061810255050659, |
|
"rewards/margins": 0.12802138924598694, |
|
"rewards/rejected": -2.1898317337036133, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 1.4913544668587897, |
|
"grad_norm": 24.46772736032293, |
|
"learning_rate": 2.9595555917269997e-08, |
|
"logits/chosen": -2.03961181640625, |
|
"logits/rejected": -2.0247714519500732, |
|
"logps/chosen": -1.1411329507827759, |
|
"logps/rejected": -1.2373685836791992, |
|
"loss": 1.2153, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -2.2822659015655518, |
|
"rewards/margins": 0.19247153401374817, |
|
"rewards/rejected": -2.4747371673583984, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 1.4985590778097984, |
|
"grad_norm": 18.449968646671344, |
|
"learning_rate": 2.9389361421596725e-08, |
|
"logits/chosen": -1.9533805847167969, |
|
"logits/rejected": -1.9556515216827393, |
|
"logps/chosen": -1.0595835447311401, |
|
"logps/rejected": -1.1933083534240723, |
|
"loss": 1.1832, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -2.1191670894622803, |
|
"rewards/margins": 0.2674497365951538, |
|
"rewards/rejected": -2.3866167068481445, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 1.505763688760807, |
|
"grad_norm": 20.34455177562933, |
|
"learning_rate": 2.9182858371940126e-08, |
|
"logits/chosen": -2.0372543334960938, |
|
"logits/rejected": -2.031832218170166, |
|
"logps/chosen": -1.0473064184188843, |
|
"logps/rejected": -1.1757621765136719, |
|
"loss": 1.188, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -2.0946128368377686, |
|
"rewards/margins": 0.25691163539886475, |
|
"rewards/rejected": -2.3515243530273438, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 1.5129682997118157, |
|
"grad_norm": 18.90759740416456, |
|
"learning_rate": 2.8976061284611908e-08, |
|
"logits/chosen": -1.9889026880264282, |
|
"logits/rejected": -1.9977174997329712, |
|
"logps/chosen": -0.9364235997200012, |
|
"logps/rejected": -1.0655431747436523, |
|
"loss": 1.1944, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -1.8728471994400024, |
|
"rewards/margins": 0.2582393288612366, |
|
"rewards/rejected": -2.1310863494873047, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.5201729106628243, |
|
"grad_norm": 21.434032214198695, |
|
"learning_rate": 2.8768984696593384e-08, |
|
"logits/chosen": -1.9844554662704468, |
|
"logits/rejected": -1.974907636642456, |
|
"logps/chosen": -1.0168864727020264, |
|
"logps/rejected": -1.1343626976013184, |
|
"loss": 1.2154, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -2.0337729454040527, |
|
"rewards/margins": 0.23495233058929443, |
|
"rewards/rejected": -2.2687253952026367, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 1.527377521613833, |
|
"grad_norm": 18.047284778863265, |
|
"learning_rate": 2.8561643164513637e-08, |
|
"logits/chosen": -1.9067440032958984, |
|
"logits/rejected": -1.9029529094696045, |
|
"logps/chosen": -1.0492842197418213, |
|
"logps/rejected": -1.1676268577575684, |
|
"loss": 1.2013, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -2.0985684394836426, |
|
"rewards/margins": 0.23668520152568817, |
|
"rewards/rejected": -2.3352537155151367, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 1.5345821325648417, |
|
"grad_norm": 18.95635925899202, |
|
"learning_rate": 2.8354051263626227e-08, |
|
"logits/chosen": -1.9887434244155884, |
|
"logits/rejected": -1.994476556777954, |
|
"logps/chosen": -1.0596574544906616, |
|
"logps/rejected": -1.1733436584472656, |
|
"loss": 1.206, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -2.1193149089813232, |
|
"rewards/margins": 0.22737233340740204, |
|
"rewards/rejected": -2.3466873168945312, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 1.54178674351585, |
|
"grad_norm": 19.584229312796637, |
|
"learning_rate": 2.8146223586784573e-08, |
|
"logits/chosen": -1.9805008172988892, |
|
"logits/rejected": -1.9726234674453735, |
|
"logps/chosen": -1.0646823644638062, |
|
"logps/rejected": -1.1987252235412598, |
|
"loss": 1.1873, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -2.1293647289276123, |
|
"rewards/margins": 0.26808565855026245, |
|
"rewards/rejected": -2.3974504470825195, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 1.5489913544668588, |
|
"grad_norm": 25.50415369546022, |
|
"learning_rate": 2.7938174743416205e-08, |
|
"logits/chosen": -1.9369735717773438, |
|
"logits/rejected": -1.933683156967163, |
|
"logps/chosen": -1.050445795059204, |
|
"logps/rejected": -1.1607348918914795, |
|
"loss": 1.2135, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -2.100891590118408, |
|
"rewards/margins": 0.220577672123909, |
|
"rewards/rejected": -2.321469783782959, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 1.5561959654178674, |
|
"grad_norm": 19.684619038178205, |
|
"learning_rate": 2.7729919358495728e-08, |
|
"logits/chosen": -2.005277395248413, |
|
"logits/rejected": -2.0062077045440674, |
|
"logps/chosen": -1.1121950149536133, |
|
"logps/rejected": -1.19098699092865, |
|
"loss": 1.2586, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -2.2243900299072266, |
|
"rewards/margins": 0.1575840413570404, |
|
"rewards/rejected": -2.3819739818573, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 1.563400576368876, |
|
"grad_norm": 19.293166467927325, |
|
"learning_rate": 2.7521472071516772e-08, |
|
"logits/chosen": -2.0027170181274414, |
|
"logits/rejected": -2.0016961097717285, |
|
"logps/chosen": -0.9449695348739624, |
|
"logps/rejected": -1.0605154037475586, |
|
"loss": 1.2076, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -1.8899390697479248, |
|
"rewards/margins": 0.2310914546251297, |
|
"rewards/rejected": -2.121030807495117, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 1.5706051873198847, |
|
"grad_norm": 22.062496687144794, |
|
"learning_rate": 2.731284753546289e-08, |
|
"logits/chosen": -1.9856891632080078, |
|
"logits/rejected": -1.9836666584014893, |
|
"logps/chosen": -1.081839919090271, |
|
"logps/rejected": -1.2224990129470825, |
|
"loss": 1.1741, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -2.163679838180542, |
|
"rewards/margins": 0.2813180387020111, |
|
"rewards/rejected": -2.444998025894165, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 1.5778097982708934, |
|
"grad_norm": 21.803351526445823, |
|
"learning_rate": 2.710406041577751e-08, |
|
"logits/chosen": -2.04976224899292, |
|
"logits/rejected": -2.0463500022888184, |
|
"logps/chosen": -1.0325794219970703, |
|
"logps/rejected": -1.1858645677566528, |
|
"loss": 1.1631, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -2.0651588439941406, |
|
"rewards/margins": 0.3065701127052307, |
|
"rewards/rejected": -2.3717291355133057, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 1.585014409221902, |
|
"grad_norm": 18.0281741107113, |
|
"learning_rate": 2.6895125389333017e-08, |
|
"logits/chosen": -2.0131421089172363, |
|
"logits/rejected": -2.0089025497436523, |
|
"logps/chosen": -1.0270203351974487, |
|
"logps/rejected": -1.177971363067627, |
|
"loss": 1.1622, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -2.0540406703948975, |
|
"rewards/margins": 0.3019018769264221, |
|
"rewards/rejected": -2.355942726135254, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.5922190201729105, |
|
"grad_norm": 17.129921710950377, |
|
"learning_rate": 2.6686057143399028e-08, |
|
"logits/chosen": -2.010429620742798, |
|
"logits/rejected": -2.0119571685791016, |
|
"logps/chosen": -1.0614731311798096, |
|
"logps/rejected": -1.1593468189239502, |
|
"loss": 1.2433, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -2.122946262359619, |
|
"rewards/margins": 0.1957472264766693, |
|
"rewards/rejected": -2.3186936378479004, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 1.5994236311239192, |
|
"grad_norm": 19.402506516811066, |
|
"learning_rate": 2.647687037460996e-08, |
|
"logits/chosen": -2.0160350799560547, |
|
"logits/rejected": -2.0153958797454834, |
|
"logps/chosen": -1.0873758792877197, |
|
"logps/rejected": -1.2827941179275513, |
|
"loss": 1.1246, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -2.1747517585754395, |
|
"rewards/margins": 0.3908364176750183, |
|
"rewards/rejected": -2.5655882358551025, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 1.6066282420749278, |
|
"grad_norm": 20.370963061014333, |
|
"learning_rate": 2.626757978793187e-08, |
|
"logits/chosen": -2.0244648456573486, |
|
"logits/rejected": -2.0181853771209717, |
|
"logps/chosen": -1.0852004289627075, |
|
"logps/rejected": -1.2089064121246338, |
|
"loss": 1.2036, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.170400857925415, |
|
"rewards/margins": 0.24741193652153015, |
|
"rewards/rejected": -2.4178128242492676, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 1.6138328530259365, |
|
"grad_norm": 23.538795309630903, |
|
"learning_rate": 2.6058200095628797e-08, |
|
"logits/chosen": -1.9968335628509521, |
|
"logits/rejected": -2.000123977661133, |
|
"logps/chosen": -0.9178045392036438, |
|
"logps/rejected": -1.086455225944519, |
|
"loss": 1.1446, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -1.8356090784072876, |
|
"rewards/margins": 0.3373013734817505, |
|
"rewards/rejected": -2.172910451889038, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 1.6210374639769451, |
|
"grad_norm": 18.7834477811749, |
|
"learning_rate": 2.584874601622854e-08, |
|
"logits/chosen": -2.0577359199523926, |
|
"logits/rejected": -2.0486464500427246, |
|
"logps/chosen": -1.0842779874801636, |
|
"logps/rejected": -1.2169630527496338, |
|
"loss": 1.2055, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -2.168555974960327, |
|
"rewards/margins": 0.26537027955055237, |
|
"rewards/rejected": -2.4339261054992676, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.6282420749279538, |
|
"grad_norm": 21.50129735883824, |
|
"learning_rate": 2.5639232273487993e-08, |
|
"logits/chosen": -1.9792057275772095, |
|
"logits/rejected": -1.9694305658340454, |
|
"logps/chosen": -0.9786102175712585, |
|
"logps/rejected": -1.0999042987823486, |
|
"loss": 1.2022, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.957220435142517, |
|
"rewards/margins": 0.24258823692798615, |
|
"rewards/rejected": -2.1998085975646973, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 1.6354466858789625, |
|
"grad_norm": 20.836699972853967, |
|
"learning_rate": 2.5429673595358142e-08, |
|
"logits/chosen": -2.0185582637786865, |
|
"logits/rejected": -2.0170459747314453, |
|
"logps/chosen": -1.043128490447998, |
|
"logps/rejected": -1.165533185005188, |
|
"loss": 1.1967, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -2.086256980895996, |
|
"rewards/margins": 0.24480919539928436, |
|
"rewards/rejected": -2.331066370010376, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 1.6426512968299711, |
|
"grad_norm": 23.73080611195804, |
|
"learning_rate": 2.5220084712948764e-08, |
|
"logits/chosen": -1.9826107025146484, |
|
"logits/rejected": -1.9717817306518555, |
|
"logps/chosen": -1.1198116540908813, |
|
"logps/rejected": -1.2381196022033691, |
|
"loss": 1.1906, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -2.2396233081817627, |
|
"rewards/margins": 0.23661574721336365, |
|
"rewards/rejected": -2.4762392044067383, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 1.6498559077809798, |
|
"grad_norm": 19.691578624312058, |
|
"learning_rate": 2.5010480359492838e-08, |
|
"logits/chosen": -1.9650003910064697, |
|
"logits/rejected": -1.9621715545654297, |
|
"logps/chosen": -1.0505197048187256, |
|
"logps/rejected": -1.1106680631637573, |
|
"loss": 1.2861, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -2.101039409637451, |
|
"rewards/margins": 0.12029679119586945, |
|
"rewards/rejected": -2.2213361263275146, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 1.6570605187319885, |
|
"grad_norm": 21.239713120458195, |
|
"learning_rate": 2.480087526931091e-08, |
|
"logits/chosen": -2.0088658332824707, |
|
"logits/rejected": -1.9966083765029907, |
|
"logps/chosen": -1.0031955242156982, |
|
"logps/rejected": -1.1170064210891724, |
|
"loss": 1.2166, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -2.0063910484313965, |
|
"rewards/margins": 0.22762183845043182, |
|
"rewards/rejected": -2.2340128421783447, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.6642651296829971, |
|
"grad_norm": 19.4121166793283, |
|
"learning_rate": 2.4591284176775326e-08, |
|
"logits/chosen": -1.9742721319198608, |
|
"logits/rejected": -1.9704244136810303, |
|
"logps/chosen": -1.07572340965271, |
|
"logps/rejected": -1.159128189086914, |
|
"loss": 1.2565, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -2.15144681930542, |
|
"rewards/margins": 0.16680975258350372, |
|
"rewards/rejected": -2.318256378173828, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 1.6714697406340058, |
|
"grad_norm": 21.976079747343572, |
|
"learning_rate": 2.4381721815274443e-08, |
|
"logits/chosen": -2.0400168895721436, |
|
"logits/rejected": -2.0402297973632812, |
|
"logps/chosen": -1.019706130027771, |
|
"logps/rejected": -1.1512401103973389, |
|
"loss": 1.1928, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.039412260055542, |
|
"rewards/margins": 0.26306766271591187, |
|
"rewards/rejected": -2.3024802207946777, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 1.6786743515850144, |
|
"grad_norm": 19.583839102475277, |
|
"learning_rate": 2.4172202916176936e-08, |
|
"logits/chosen": -2.0487046241760254, |
|
"logits/rejected": -2.0510191917419434, |
|
"logps/chosen": -0.9676868319511414, |
|
"logps/rejected": -1.1354566812515259, |
|
"loss": 1.1567, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -1.9353736639022827, |
|
"rewards/margins": 0.3355395197868347, |
|
"rewards/rejected": -2.2709133625030518, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 1.685878962536023, |
|
"grad_norm": 19.19182662272249, |
|
"learning_rate": 2.3962742207796268e-08, |
|
"logits/chosen": -1.9858787059783936, |
|
"logits/rejected": -1.9837026596069336, |
|
"logps/chosen": -0.9570139050483704, |
|
"logps/rejected": -1.1192009449005127, |
|
"loss": 1.1603, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -1.9140278100967407, |
|
"rewards/margins": 0.32437413930892944, |
|
"rewards/rejected": -2.2384018898010254, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 1.6930835734870318, |
|
"grad_norm": 22.691977894194924, |
|
"learning_rate": 2.3753354414355334e-08, |
|
"logits/chosen": -1.9461901187896729, |
|
"logits/rejected": -1.9355932474136353, |
|
"logps/chosen": -1.0649149417877197, |
|
"logps/rejected": -1.1817948818206787, |
|
"loss": 1.213, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -2.1298298835754395, |
|
"rewards/margins": 0.2337600290775299, |
|
"rewards/rejected": -2.3635897636413574, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.7002881844380404, |
|
"grad_norm": 18.59945891396093, |
|
"learning_rate": 2.3544054254951408e-08, |
|
"logits/chosen": -1.9878515005111694, |
|
"logits/rejected": -1.9792087078094482, |
|
"logps/chosen": -0.9375497102737427, |
|
"logps/rejected": -1.1345303058624268, |
|
"loss": 1.114, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.8750994205474854, |
|
"rewards/margins": 0.393961638212204, |
|
"rewards/rejected": -2.2690606117248535, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 1.707492795389049, |
|
"grad_norm": 18.203541895462912, |
|
"learning_rate": 2.3334856442521435e-08, |
|
"logits/chosen": -2.0370235443115234, |
|
"logits/rejected": -2.0295424461364746, |
|
"logps/chosen": -1.0964655876159668, |
|
"logps/rejected": -1.166515588760376, |
|
"loss": 1.27, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -2.1929311752319336, |
|
"rewards/margins": 0.1401001363992691, |
|
"rewards/rejected": -2.333031177520752, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 1.7146974063400577, |
|
"grad_norm": 19.123513495613718, |
|
"learning_rate": 2.3125775682807826e-08, |
|
"logits/chosen": -2.0507147312164307, |
|
"logits/rejected": -2.0506680011749268, |
|
"logps/chosen": -1.1658060550689697, |
|
"logps/rejected": -1.2665237188339233, |
|
"loss": 1.2309, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -2.3316121101379395, |
|
"rewards/margins": 0.2014356553554535, |
|
"rewards/rejected": -2.5330474376678467, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 1.7219020172910664, |
|
"grad_norm": 20.583955091856193, |
|
"learning_rate": 2.291682667332464e-08, |
|
"logits/chosen": -2.0658912658691406, |
|
"logits/rejected": -2.0607848167419434, |
|
"logps/chosen": -1.0484416484832764, |
|
"logps/rejected": -1.1794416904449463, |
|
"loss": 1.1918, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -2.0968832969665527, |
|
"rewards/margins": 0.2620001435279846, |
|
"rewards/rejected": -2.3588833808898926, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 1.729106628242075, |
|
"grad_norm": 15.255925002553854, |
|
"learning_rate": 2.2708024102324454e-08, |
|
"logits/chosen": -2.0251784324645996, |
|
"logits/rejected": -2.0195064544677734, |
|
"logps/chosen": -1.0335527658462524, |
|
"logps/rejected": -1.2097657918930054, |
|
"loss": 1.1498, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -2.067105531692505, |
|
"rewards/margins": 0.35242635011672974, |
|
"rewards/rejected": -2.4195315837860107, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.7363112391930837, |
|
"grad_norm": 22.44593573299748, |
|
"learning_rate": 2.2499382647765797e-08, |
|
"logits/chosen": -2.0198001861572266, |
|
"logits/rejected": -2.016092538833618, |
|
"logps/chosen": -1.0722882747650146, |
|
"logps/rejected": -1.161583662033081, |
|
"loss": 1.2463, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -2.1445765495300293, |
|
"rewards/margins": 0.17859075963497162, |
|
"rewards/rejected": -2.323167324066162, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 1.7435158501440924, |
|
"grad_norm": 20.996477598226324, |
|
"learning_rate": 2.2290916976281427e-08, |
|
"logits/chosen": -1.997984528541565, |
|
"logits/rejected": -1.991624116897583, |
|
"logps/chosen": -0.9992947578430176, |
|
"logps/rejected": -1.1312335729599, |
|
"loss": 1.2149, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -1.9985895156860352, |
|
"rewards/margins": 0.26387742161750793, |
|
"rewards/rejected": -2.2624671459198, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 1.7507204610951008, |
|
"grad_norm": 18.145146158512926, |
|
"learning_rate": 2.2082641742147238e-08, |
|
"logits/chosen": -1.9863611459732056, |
|
"logits/rejected": -1.9797251224517822, |
|
"logps/chosen": -1.0165367126464844, |
|
"logps/rejected": -1.2077693939208984, |
|
"loss": 1.115, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -2.0330734252929688, |
|
"rewards/margins": 0.38246554136276245, |
|
"rewards/rejected": -2.415538787841797, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 1.7579250720461095, |
|
"grad_norm": 20.511354788346416, |
|
"learning_rate": 2.1874571586252177e-08, |
|
"logits/chosen": -2.0291788578033447, |
|
"logits/rejected": -2.0222790241241455, |
|
"logps/chosen": -1.0278832912445068, |
|
"logps/rejected": -1.1068981885910034, |
|
"loss": 1.256, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -2.0557665824890137, |
|
"rewards/margins": 0.15802964568138123, |
|
"rewards/rejected": -2.213796377182007, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 1.7651296829971181, |
|
"grad_norm": 20.78736849578736, |
|
"learning_rate": 2.1666721135069037e-08, |
|
"logits/chosen": -2.022594928741455, |
|
"logits/rejected": -2.019284725189209, |
|
"logps/chosen": -1.1104170083999634, |
|
"logps/rejected": -1.2043354511260986, |
|
"loss": 1.2436, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -2.2208340167999268, |
|
"rewards/margins": 0.18783698976039886, |
|
"rewards/rejected": -2.4086709022521973, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.7723342939481268, |
|
"grad_norm": 15.559026450288725, |
|
"learning_rate": 2.145910499962628e-08, |
|
"logits/chosen": -2.0644400119781494, |
|
"logits/rejected": -2.0565133094787598, |
|
"logps/chosen": -0.9585247039794922, |
|
"logps/rejected": -1.101301908493042, |
|
"loss": 1.1824, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.9170494079589844, |
|
"rewards/margins": 0.28555426001548767, |
|
"rewards/rejected": -2.202603816986084, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 1.7795389048991355, |
|
"grad_norm": 23.88258329458798, |
|
"learning_rate": 2.1251737774480915e-08, |
|
"logits/chosen": -2.0456204414367676, |
|
"logits/rejected": -2.036010265350342, |
|
"logps/chosen": -1.1689999103546143, |
|
"logps/rejected": -1.2592300176620483, |
|
"loss": 1.2704, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -2.3379998207092285, |
|
"rewards/margins": 0.18046024441719055, |
|
"rewards/rejected": -2.5184600353240967, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 1.7867435158501441, |
|
"grad_norm": 17.478397647824718, |
|
"learning_rate": 2.104463403669264e-08, |
|
"logits/chosen": -1.9978790283203125, |
|
"logits/rejected": -1.9951884746551514, |
|
"logps/chosen": -1.0451444387435913, |
|
"logps/rejected": -1.189968466758728, |
|
"loss": 1.1806, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.0902888774871826, |
|
"rewards/margins": 0.2896478772163391, |
|
"rewards/rejected": -2.379936933517456, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 1.7939481268011528, |
|
"grad_norm": 17.10039588248249, |
|
"learning_rate": 2.0837808344799028e-08, |
|
"logits/chosen": -1.9799407720565796, |
|
"logits/rejected": -1.9754537343978882, |
|
"logps/chosen": -0.9404538869857788, |
|
"logps/rejected": -1.0731335878372192, |
|
"loss": 1.1826, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.8809077739715576, |
|
"rewards/margins": 0.26535919308662415, |
|
"rewards/rejected": -2.1462671756744385, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 1.8011527377521612, |
|
"grad_norm": 18.18583469521082, |
|
"learning_rate": 2.063127523779219e-08, |
|
"logits/chosen": -1.9833685159683228, |
|
"logits/rejected": -1.9792015552520752, |
|
"logps/chosen": -1.0076165199279785, |
|
"logps/rejected": -1.1942096948623657, |
|
"loss": 1.1139, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -2.015233039855957, |
|
"rewards/margins": 0.373186320066452, |
|
"rewards/rejected": -2.3884193897247314, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.8083573487031699, |
|
"grad_norm": 19.97417842705391, |
|
"learning_rate": 2.0425049234096737e-08, |
|
"logits/chosen": -1.9911282062530518, |
|
"logits/rejected": -1.9853017330169678, |
|
"logps/chosen": -1.0088120698928833, |
|
"logps/rejected": -1.1258007287979126, |
|
"loss": 1.2158, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -2.0176241397857666, |
|
"rewards/margins": 0.2339775562286377, |
|
"rewards/rejected": -2.251601457595825, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 1.8155619596541785, |
|
"grad_norm": 19.435490123277745, |
|
"learning_rate": 2.0219144830549163e-08, |
|
"logits/chosen": -1.9644883871078491, |
|
"logits/rejected": -1.9634536504745483, |
|
"logps/chosen": -1.0153688192367554, |
|
"logps/rejected": -1.161481261253357, |
|
"loss": 1.1831, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -2.0307376384735107, |
|
"rewards/margins": 0.2922249436378479, |
|
"rewards/rejected": -2.322962522506714, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 1.8227665706051872, |
|
"grad_norm": 19.09312194813426, |
|
"learning_rate": 2.0013576501378823e-08, |
|
"logits/chosen": -1.9830167293548584, |
|
"logits/rejected": -1.9765300750732422, |
|
"logps/chosen": -1.0100529193878174, |
|
"logps/rejected": -1.144884705543518, |
|
"loss": 1.1939, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -2.0201058387756348, |
|
"rewards/margins": 0.2696635127067566, |
|
"rewards/rejected": -2.289769411087036, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 1.8299711815561959, |
|
"grad_norm": 20.224925594213033, |
|
"learning_rate": 1.9808358697190426e-08, |
|
"logits/chosen": -1.9704053401947021, |
|
"logits/rejected": -1.966780662536621, |
|
"logps/chosen": -0.9303935766220093, |
|
"logps/rejected": -1.0650821924209595, |
|
"loss": 1.1986, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.8607871532440186, |
|
"rewards/margins": 0.26937711238861084, |
|
"rewards/rejected": -2.130164384841919, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 1.8371757925072045, |
|
"grad_norm": 21.09688980967129, |
|
"learning_rate": 1.9603505843948214e-08, |
|
"logits/chosen": -2.01230525970459, |
|
"logits/rejected": -2.002260208129883, |
|
"logps/chosen": -0.948139488697052, |
|
"logps/rejected": -1.1198240518569946, |
|
"loss": 1.1395, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.896278977394104, |
|
"rewards/margins": 0.3433689475059509, |
|
"rewards/rejected": -2.2396481037139893, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 1.8443804034582132, |
|
"grad_norm": 20.232449119924333, |
|
"learning_rate": 1.9399032341961886e-08, |
|
"logits/chosen": -1.9766803979873657, |
|
"logits/rejected": -1.960636854171753, |
|
"logps/chosen": -0.9899090528488159, |
|
"logps/rejected": -1.0627111196517944, |
|
"loss": 1.2699, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -1.9798181056976318, |
|
"rewards/margins": 0.1456039845943451, |
|
"rewards/rejected": -2.125422239303589, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 1.8515850144092219, |
|
"grad_norm": 26.201248917968616, |
|
"learning_rate": 1.9194952564874323e-08, |
|
"logits/chosen": -2.0239641666412354, |
|
"logits/rejected": -2.0179200172424316, |
|
"logps/chosen": -1.0649644136428833, |
|
"logps/rejected": -1.2079579830169678, |
|
"loss": 1.1683, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -2.1299288272857666, |
|
"rewards/margins": 0.2859875559806824, |
|
"rewards/rejected": -2.4159159660339355, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 1.8587896253602305, |
|
"grad_norm": 20.644198497609576, |
|
"learning_rate": 1.8991280858651157e-08, |
|
"logits/chosen": -1.9820836782455444, |
|
"logits/rejected": -1.9764404296875, |
|
"logps/chosen": -1.0636051893234253, |
|
"logps/rejected": -1.14960777759552, |
|
"loss": 1.2503, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -2.1272103786468506, |
|
"rewards/margins": 0.1720050871372223, |
|
"rewards/rejected": -2.29921555519104, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 1.8659942363112392, |
|
"grad_norm": 16.854142688708556, |
|
"learning_rate": 1.8788031540572327e-08, |
|
"logits/chosen": -1.9858763217926025, |
|
"logits/rejected": -1.977818489074707, |
|
"logps/chosen": -0.9995776414871216, |
|
"logps/rejected": -1.1453144550323486, |
|
"loss": 1.1718, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -1.9991552829742432, |
|
"rewards/margins": 0.29147323966026306, |
|
"rewards/rejected": -2.2906289100646973, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 1.8731988472622478, |
|
"grad_norm": 16.996398857656907, |
|
"learning_rate": 1.858521889822565e-08, |
|
"logits/chosen": -2.0046029090881348, |
|
"logits/rejected": -2.007223129272461, |
|
"logps/chosen": -0.9735875129699707, |
|
"logps/rejected": -1.0832773447036743, |
|
"loss": 1.2233, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -1.9471750259399414, |
|
"rewards/margins": 0.21937978267669678, |
|
"rewards/rejected": -2.1665546894073486, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.8804034582132565, |
|
"grad_norm": 16.42143731996496, |
|
"learning_rate": 1.8382857188502422e-08, |
|
"logits/chosen": -1.9885772466659546, |
|
"logits/rejected": -1.9836734533309937, |
|
"logps/chosen": -0.9854310750961304, |
|
"logps/rejected": -1.1128942966461182, |
|
"loss": 1.1824, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -1.9708621501922607, |
|
"rewards/margins": 0.25492629408836365, |
|
"rewards/rejected": -2.2257885932922363, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 1.8876080691642652, |
|
"grad_norm": 22.507165910966208, |
|
"learning_rate": 1.8180960636595234e-08, |
|
"logits/chosen": -1.9683783054351807, |
|
"logits/rejected": -1.966205358505249, |
|
"logps/chosen": -1.0359306335449219, |
|
"logps/rejected": -1.1797659397125244, |
|
"loss": 1.1791, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -2.0718612670898438, |
|
"rewards/margins": 0.28767016530036926, |
|
"rewards/rejected": -2.359531879425049, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 1.8948126801152738, |
|
"grad_norm": 20.53168247865903, |
|
"learning_rate": 1.7979543434998015e-08, |
|
"logits/chosen": -2.038526773452759, |
|
"logits/rejected": -2.0433452129364014, |
|
"logps/chosen": -1.1234701871871948, |
|
"logps/rejected": -1.2116920948028564, |
|
"loss": 1.2381, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -2.2469403743743896, |
|
"rewards/margins": 0.17644372582435608, |
|
"rewards/rejected": -2.423384189605713, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 1.9020172910662825, |
|
"grad_norm": 26.15291556775582, |
|
"learning_rate": 1.7778619742508345e-08, |
|
"logits/chosen": -1.9968883991241455, |
|
"logits/rejected": -1.9899314641952515, |
|
"logps/chosen": -1.0930571556091309, |
|
"logps/rejected": -1.1869739294052124, |
|
"loss": 1.2543, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -2.1861143112182617, |
|
"rewards/margins": 0.18783339858055115, |
|
"rewards/rejected": -2.373947858810425, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 1.9092219020172911, |
|
"grad_norm": 23.18245485008842, |
|
"learning_rate": 1.757820368323213e-08, |
|
"logits/chosen": -1.9929345846176147, |
|
"logits/rejected": -1.9831962585449219, |
|
"logps/chosen": -1.1062356233596802, |
|
"logps/rejected": -1.2650859355926514, |
|
"loss": 1.161, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -2.2124712467193604, |
|
"rewards/margins": 0.3177003860473633, |
|
"rewards/rejected": -2.5301718711853027, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 1.9164265129682998, |
|
"grad_norm": 22.34671676050883, |
|
"learning_rate": 1.7378309345590803e-08, |
|
"logits/chosen": -2.006321907043457, |
|
"logits/rejected": -2.015603542327881, |
|
"logps/chosen": -1.0863068103790283, |
|
"logps/rejected": -1.2286168336868286, |
|
"loss": 1.1821, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -2.1726136207580566, |
|
"rewards/margins": 0.2846204340457916, |
|
"rewards/rejected": -2.4572336673736572, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 1.9236311239193085, |
|
"grad_norm": 20.09934555506027, |
|
"learning_rate": 1.717895078133088e-08, |
|
"logits/chosen": -2.059466600418091, |
|
"logits/rejected": -2.0556395053863525, |
|
"logps/chosen": -1.0593311786651611, |
|
"logps/rejected": -1.2005198001861572, |
|
"loss": 1.1828, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -2.1186623573303223, |
|
"rewards/margins": 0.28237712383270264, |
|
"rewards/rejected": -2.4010396003723145, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 1.9308357348703171, |
|
"grad_norm": 21.005378635461394, |
|
"learning_rate": 1.698014200453624e-08, |
|
"logits/chosen": -2.0109024047851562, |
|
"logits/rejected": -2.0184760093688965, |
|
"logps/chosen": -1.031286597251892, |
|
"logps/rejected": -1.1622233390808105, |
|
"loss": 1.1776, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -2.062573194503784, |
|
"rewards/margins": 0.26187336444854736, |
|
"rewards/rejected": -2.324446678161621, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 1.9380403458213258, |
|
"grad_norm": 24.456240122864646, |
|
"learning_rate": 1.6781896990642964e-08, |
|
"logits/chosen": -1.9447215795516968, |
|
"logits/rejected": -1.942016839981079, |
|
"logps/chosen": -1.1477292776107788, |
|
"logps/rejected": -1.2380485534667969, |
|
"loss": 1.2441, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -2.2954585552215576, |
|
"rewards/margins": 0.18063834309577942, |
|
"rewards/rejected": -2.4760971069335938, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 1.9452449567723344, |
|
"grad_norm": 24.379018095612878, |
|
"learning_rate": 1.658422967545693e-08, |
|
"logits/chosen": -2.0516204833984375, |
|
"logits/rejected": -2.0385327339172363, |
|
"logps/chosen": -1.0048881769180298, |
|
"logps/rejected": -1.1195095777511597, |
|
"loss": 1.2189, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -2.0097763538360596, |
|
"rewards/margins": 0.2292429655790329, |
|
"rewards/rejected": -2.2390191555023193, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.952449567723343, |
|
"grad_norm": 20.606423235238847, |
|
"learning_rate": 1.638715395417418e-08, |
|
"logits/chosen": -2.0263454914093018, |
|
"logits/rejected": -2.024291515350342, |
|
"logps/chosen": -1.069252610206604, |
|
"logps/rejected": -1.2053518295288086, |
|
"loss": 1.1848, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -2.138505220413208, |
|
"rewards/margins": 0.27219831943511963, |
|
"rewards/rejected": -2.410703659057617, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 1.9596541786743515, |
|
"grad_norm": 22.535979632799137, |
|
"learning_rate": 1.619068368040416e-08, |
|
"logits/chosen": -2.024005174636841, |
|
"logits/rejected": -2.0195693969726562, |
|
"logps/chosen": -1.0005989074707031, |
|
"logps/rejected": -1.178637981414795, |
|
"loss": 1.1296, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -2.0011978149414062, |
|
"rewards/margins": 0.356078177690506, |
|
"rewards/rejected": -2.35727596282959, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 1.9668587896253602, |
|
"grad_norm": 17.493969053743083, |
|
"learning_rate": 1.5994832665195853e-08, |
|
"logits/chosen": -1.9611831903457642, |
|
"logits/rejected": -1.9615755081176758, |
|
"logps/chosen": -1.0340797901153564, |
|
"logps/rejected": -1.146831750869751, |
|
"loss": 1.2119, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -2.068159580230713, |
|
"rewards/margins": 0.22550389170646667, |
|
"rewards/rejected": -2.293663501739502, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 1.9740634005763689, |
|
"grad_norm": 20.261024993446156, |
|
"learning_rate": 1.5799614676066906e-08, |
|
"logits/chosen": -2.068851947784424, |
|
"logits/rejected": -2.065795421600342, |
|
"logps/chosen": -0.9484384655952454, |
|
"logps/rejected": -1.0868208408355713, |
|
"loss": 1.1744, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.8968769311904907, |
|
"rewards/margins": 0.2767646610736847, |
|
"rewards/rejected": -2.1736416816711426, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 1.9812680115273775, |
|
"grad_norm": 16.03971358941223, |
|
"learning_rate": 1.560504343603587e-08, |
|
"logits/chosen": -1.9830427169799805, |
|
"logits/rejected": -1.983306884765625, |
|
"logps/chosen": -1.0689435005187988, |
|
"logps/rejected": -1.224401593208313, |
|
"loss": 1.1606, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -2.1378870010375977, |
|
"rewards/margins": 0.3109160363674164, |
|
"rewards/rejected": -2.448803186416626, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.9884726224783862, |
|
"grad_norm": 18.729955235435014, |
|
"learning_rate": 1.541113262265748e-08, |
|
"logits/chosen": -2.0666756629943848, |
|
"logits/rejected": -2.0645081996917725, |
|
"logps/chosen": -1.0288206338882446, |
|
"logps/rejected": -1.1466666460037231, |
|
"loss": 1.2071, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.0576412677764893, |
|
"rewards/margins": 0.23569221794605255, |
|
"rewards/rejected": -2.2933332920074463, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 1.9956772334293948, |
|
"grad_norm": 25.946584240501473, |
|
"learning_rate": 1.5217895867061227e-08, |
|
"logits/chosen": -2.00740385055542, |
|
"logits/rejected": -2.0015203952789307, |
|
"logps/chosen": -1.0842344760894775, |
|
"logps/rejected": -1.1836035251617432, |
|
"loss": 1.2464, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -2.168468952178955, |
|
"rewards/margins": 0.19873787462711334, |
|
"rewards/rejected": -2.3672070503234863, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 2.0028818443804033, |
|
"grad_norm": 22.724338628633177, |
|
"learning_rate": 1.5025346752993098e-08, |
|
"logits/chosen": -1.9985382556915283, |
|
"logits/rejected": -2.000462532043457, |
|
"logps/chosen": -1.071683406829834, |
|
"logps/rejected": -1.1988445520401, |
|
"loss": 1.1999, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -2.143366813659668, |
|
"rewards/margins": 0.25432220101356506, |
|
"rewards/rejected": -2.3976891040802, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 2.010086455331412, |
|
"grad_norm": 23.240965924702092, |
|
"learning_rate": 1.4833498815860756e-08, |
|
"logits/chosen": -2.053358554840088, |
|
"logits/rejected": -2.055558443069458, |
|
"logps/chosen": -0.9996848106384277, |
|
"logps/rejected": -1.1848304271697998, |
|
"loss": 1.149, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.9993696212768555, |
|
"rewards/margins": 0.3702912926673889, |
|
"rewards/rejected": -2.3696608543395996, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 2.0172910662824206, |
|
"grad_norm": 18.225560415881105, |
|
"learning_rate": 1.4642365541781993e-08, |
|
"logits/chosen": -1.9646400213241577, |
|
"logits/rejected": -1.9561887979507446, |
|
"logps/chosen": -1.0267730951309204, |
|
"logps/rejected": -1.1928188800811768, |
|
"loss": 1.1511, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -2.053546190261841, |
|
"rewards/margins": 0.3320915699005127, |
|
"rewards/rejected": -2.3856377601623535, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.0244956772334293, |
|
"grad_norm": 17.931282922261985, |
|
"learning_rate": 1.4451960366636745e-08, |
|
"logits/chosen": -2.026698589324951, |
|
"logits/rejected": -2.0378384590148926, |
|
"logps/chosen": -1.0406183004379272, |
|
"logps/rejected": -1.1752769947052002, |
|
"loss": 1.1819, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -2.0812366008758545, |
|
"rewards/margins": 0.26931747794151306, |
|
"rewards/rejected": -2.3505539894104004, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 2.031700288184438, |
|
"grad_norm": 19.290431128690432, |
|
"learning_rate": 1.4262296675122592e-08, |
|
"logits/chosen": -2.0173158645629883, |
|
"logits/rejected": -2.0136220455169678, |
|
"logps/chosen": -1.030659556388855, |
|
"logps/rejected": -1.1910489797592163, |
|
"loss": 1.1523, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -2.06131911277771, |
|
"rewards/margins": 0.32077842950820923, |
|
"rewards/rejected": -2.3820979595184326, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 2.0389048991354466, |
|
"grad_norm": 17.902852888321604, |
|
"learning_rate": 1.407338779981389e-08, |
|
"logits/chosen": -1.9934546947479248, |
|
"logits/rejected": -1.9913368225097656, |
|
"logps/chosen": -0.9143481254577637, |
|
"logps/rejected": -1.0957781076431274, |
|
"loss": 1.1116, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.8286962509155273, |
|
"rewards/margins": 0.36285993456840515, |
|
"rewards/rejected": -2.191556215286255, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 2.0461095100864553, |
|
"grad_norm": 21.306516095869544, |
|
"learning_rate": 1.3885247020224534e-08, |
|
"logits/chosen": -2.0094637870788574, |
|
"logits/rejected": -2.004822015762329, |
|
"logps/chosen": -1.0016566514968872, |
|
"logps/rejected": -1.1311957836151123, |
|
"loss": 1.1913, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -2.0033133029937744, |
|
"rewards/margins": 0.25907841324806213, |
|
"rewards/rejected": -2.2623915672302246, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 2.053314121037464, |
|
"grad_norm": 17.105370578566056, |
|
"learning_rate": 1.369788756187445e-08, |
|
"logits/chosen": -2.008868455886841, |
|
"logits/rejected": -2.0054876804351807, |
|
"logps/chosen": -1.0270612239837646, |
|
"logps/rejected": -1.1226084232330322, |
|
"loss": 1.2343, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -2.0541224479675293, |
|
"rewards/margins": 0.19109439849853516, |
|
"rewards/rejected": -2.2452168464660645, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 2.0605187319884726, |
|
"grad_norm": 18.695157813530198, |
|
"learning_rate": 1.3511322595359925e-08, |
|
"logits/chosen": -2.033163547515869, |
|
"logits/rejected": -2.0247857570648193, |
|
"logps/chosen": -0.9382683634757996, |
|
"logps/rejected": -1.1057795286178589, |
|
"loss": 1.14, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.8765367269515991, |
|
"rewards/margins": 0.3350227475166321, |
|
"rewards/rejected": -2.2115590572357178, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 2.0677233429394812, |
|
"grad_norm": 17.33211536858926, |
|
"learning_rate": 1.3325565235427716e-08, |
|
"logits/chosen": -2.028552770614624, |
|
"logits/rejected": -2.0268807411193848, |
|
"logps/chosen": -0.9831819534301758, |
|
"logps/rejected": -1.1274645328521729, |
|
"loss": 1.177, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.9663639068603516, |
|
"rewards/margins": 0.288565069437027, |
|
"rewards/rejected": -2.2549290657043457, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 2.07492795389049, |
|
"grad_norm": 17.173051243263835, |
|
"learning_rate": 1.3140628540053218e-08, |
|
"logits/chosen": -1.9946448802947998, |
|
"logits/rejected": -1.9966709613800049, |
|
"logps/chosen": -0.9759955406188965, |
|
"logps/rejected": -1.1114940643310547, |
|
"loss": 1.1833, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -1.951991081237793, |
|
"rewards/margins": 0.2709970772266388, |
|
"rewards/rejected": -2.2229881286621094, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 2.0821325648414986, |
|
"grad_norm": 19.25779046293631, |
|
"learning_rate": 1.2956525509522451e-08, |
|
"logits/chosen": -1.9791204929351807, |
|
"logits/rejected": -1.97879159450531, |
|
"logps/chosen": -1.1120542287826538, |
|
"logps/rejected": -1.2156860828399658, |
|
"loss": 1.234, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -2.2241084575653076, |
|
"rewards/margins": 0.20726370811462402, |
|
"rewards/rejected": -2.4313721656799316, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 2.089337175792507, |
|
"grad_norm": 19.696970893217582, |
|
"learning_rate": 1.2773269085518267e-08, |
|
"logits/chosen": -2.011164426803589, |
|
"logits/rejected": -2.0127670764923096, |
|
"logps/chosen": -1.0766938924789429, |
|
"logps/rejected": -1.2073553800582886, |
|
"loss": 1.1837, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -2.1533877849578857, |
|
"rewards/margins": 0.2613227963447571, |
|
"rewards/rejected": -2.414710760116577, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 2.096541786743516, |
|
"grad_norm": 20.176935063380885, |
|
"learning_rate": 1.2590872150210574e-08, |
|
"logits/chosen": -2.0675017833709717, |
|
"logits/rejected": -2.0605273246765137, |
|
"logps/chosen": -1.057830810546875, |
|
"logps/rejected": -1.1687963008880615, |
|
"loss": 1.2247, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -2.11566162109375, |
|
"rewards/margins": 0.22193074226379395, |
|
"rewards/rejected": -2.337592601776123, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 2.1037463976945245, |
|
"grad_norm": 20.572020361191345, |
|
"learning_rate": 1.2409347525350775e-08, |
|
"logits/chosen": -2.0273613929748535, |
|
"logits/rejected": -2.0172836780548096, |
|
"logps/chosen": -1.1093732118606567, |
|
"logps/rejected": -1.2572507858276367, |
|
"loss": 1.166, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -2.2187464237213135, |
|
"rewards/margins": 0.2957550585269928, |
|
"rewards/rejected": -2.5145015716552734, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 2.110951008645533, |
|
"grad_norm": 22.752346590850024, |
|
"learning_rate": 1.2228707971370421e-08, |
|
"logits/chosen": -2.018433094024658, |
|
"logits/rejected": -2.01145601272583, |
|
"logps/chosen": -0.9928643107414246, |
|
"logps/rejected": -1.1063158512115479, |
|
"loss": 1.2256, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -1.9857286214828491, |
|
"rewards/margins": 0.22690317034721375, |
|
"rewards/rejected": -2.2126317024230957, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 2.118155619596542, |
|
"grad_norm": 21.233577131341413, |
|
"learning_rate": 1.2048966186484282e-08, |
|
"logits/chosen": -2.015031337738037, |
|
"logits/rejected": -1.9982612133026123, |
|
"logps/chosen": -1.117865800857544, |
|
"logps/rejected": -1.232062578201294, |
|
"loss": 1.2127, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -2.235731601715088, |
|
"rewards/margins": 0.2283933460712433, |
|
"rewards/rejected": -2.464125156402588, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 2.1253602305475505, |
|
"grad_norm": 28.100094599633593, |
|
"learning_rate": 1.187013480579762e-08, |
|
"logits/chosen": -2.010659694671631, |
|
"logits/rejected": -2.0132699012756348, |
|
"logps/chosen": -1.0425684452056885, |
|
"logps/rejected": -1.1762912273406982, |
|
"loss": 1.201, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -2.085136890411377, |
|
"rewards/margins": 0.26744550466537476, |
|
"rewards/rejected": -2.3525824546813965, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 2.132564841498559, |
|
"grad_norm": 40.79478390193748, |
|
"learning_rate": 1.1692226400418073e-08, |
|
"logits/chosen": -1.9510002136230469, |
|
"logits/rejected": -1.9495048522949219, |
|
"logps/chosen": -1.0822184085845947, |
|
"logps/rejected": -1.212081789970398, |
|
"loss": 1.2188, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -2.1644368171691895, |
|
"rewards/margins": 0.2597268521785736, |
|
"rewards/rejected": -2.424163579940796, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 2.139769452449568, |
|
"grad_norm": 16.326675723252357, |
|
"learning_rate": 1.1515253476571923e-08, |
|
"logits/chosen": -1.9815658330917358, |
|
"logits/rejected": -1.975783109664917, |
|
"logps/chosen": -1.0105996131896973, |
|
"logps/rejected": -1.192287564277649, |
|
"loss": 1.1208, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -2.0211992263793945, |
|
"rewards/margins": 0.36337584257125854, |
|
"rewards/rejected": -2.384575128555298, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 2.1469740634005765, |
|
"grad_norm": 19.921977505309442, |
|
"learning_rate": 1.133922847472496e-08, |
|
"logits/chosen": -1.9953126907348633, |
|
"logits/rejected": -1.9960988759994507, |
|
"logps/chosen": -1.110705018043518, |
|
"logps/rejected": -1.209160566329956, |
|
"loss": 1.2518, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -2.221410036087036, |
|
"rewards/margins": 0.19691102206707, |
|
"rewards/rejected": -2.418321132659912, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 2.154178674351585, |
|
"grad_norm": 22.89240067306987, |
|
"learning_rate": 1.1164163768707952e-08, |
|
"logits/chosen": -2.003279209136963, |
|
"logits/rejected": -1.9980405569076538, |
|
"logps/chosen": -1.0043981075286865, |
|
"logps/rejected": -1.1427768468856812, |
|
"loss": 1.1843, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -2.008796215057373, |
|
"rewards/margins": 0.27675721049308777, |
|
"rewards/rejected": -2.2855536937713623, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 2.161383285302594, |
|
"grad_norm": 17.89862895130365, |
|
"learning_rate": 1.0990071664846861e-08, |
|
"logits/chosen": -1.9780519008636475, |
|
"logits/rejected": -1.9771487712860107, |
|
"logps/chosen": -1.0197398662567139, |
|
"logps/rejected": -1.1996923685073853, |
|
"loss": 1.1581, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -2.0394797325134277, |
|
"rewards/margins": 0.35990482568740845, |
|
"rewards/rejected": -2.3993847370147705, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.1685878962536025, |
|
"grad_norm": 18.90659171579793, |
|
"learning_rate": 1.0816964401097739e-08, |
|
"logits/chosen": -1.964535117149353, |
|
"logits/rejected": -1.9613316059112549, |
|
"logps/chosen": -0.9563964009284973, |
|
"logps/rejected": -1.0800330638885498, |
|
"loss": 1.2052, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -1.9127928018569946, |
|
"rewards/margins": 0.24727335572242737, |
|
"rewards/rejected": -2.1600661277770996, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 2.175792507204611, |
|
"grad_norm": 19.878484764331017, |
|
"learning_rate": 1.0644854146186406e-08, |
|
"logits/chosen": -2.0236928462982178, |
|
"logits/rejected": -2.0173866748809814, |
|
"logps/chosen": -1.0241403579711914, |
|
"logps/rejected": -1.1835166215896606, |
|
"loss": 1.1624, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -2.048280715942383, |
|
"rewards/margins": 0.318752646446228, |
|
"rewards/rejected": -2.3670332431793213, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 2.18299711815562, |
|
"grad_norm": 19.088076538610206, |
|
"learning_rate": 1.0473752998753114e-08, |
|
"logits/chosen": -2.004102945327759, |
|
"logits/rejected": -1.9954335689544678, |
|
"logps/chosen": -1.0195882320404053, |
|
"logps/rejected": -1.1798489093780518, |
|
"loss": 1.1535, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -2.0391764640808105, |
|
"rewards/margins": 0.3205214738845825, |
|
"rewards/rejected": -2.3596978187561035, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 2.1902017291066285, |
|
"grad_norm": 19.31361091042759, |
|
"learning_rate": 1.030367298650201e-08, |
|
"logits/chosen": -2.023881196975708, |
|
"logits/rejected": -2.0238354206085205, |
|
"logps/chosen": -1.0392138957977295, |
|
"logps/rejected": -1.19191312789917, |
|
"loss": 1.1578, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -2.078427791595459, |
|
"rewards/margins": 0.3053986132144928, |
|
"rewards/rejected": -2.38382625579834, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 2.1974063400576367, |
|
"grad_norm": 22.093759972479646, |
|
"learning_rate": 1.0134626065355675e-08, |
|
"logits/chosen": -2.0746548175811768, |
|
"logits/rejected": -2.0715177059173584, |
|
"logps/chosen": -1.0234037637710571, |
|
"logps/rejected": -1.1665077209472656, |
|
"loss": 1.1883, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.0468075275421143, |
|
"rewards/margins": 0.2862081527709961, |
|
"rewards/rejected": -2.3330154418945312, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 2.2046109510086453, |
|
"grad_norm": 19.64286406855496, |
|
"learning_rate": 9.966624118614611e-09, |
|
"logits/chosen": -2.006706476211548, |
|
"logits/rejected": -2.0016489028930664, |
|
"logps/chosen": -1.0631506443023682, |
|
"logps/rejected": -1.2085468769073486, |
|
"loss": 1.1876, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -2.1263012886047363, |
|
"rewards/margins": 0.29079198837280273, |
|
"rewards/rejected": -2.4170937538146973, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 2.211815561959654, |
|
"grad_norm": 14.856155733229528, |
|
"learning_rate": 9.799678956121976e-09, |
|
"logits/chosen": -1.9717843532562256, |
|
"logits/rejected": -1.9674240350723267, |
|
"logps/chosen": -1.0307656526565552, |
|
"logps/rejected": -1.1394712924957275, |
|
"loss": 1.2006, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -2.0615313053131104, |
|
"rewards/margins": 0.21741144359111786, |
|
"rewards/rejected": -2.278942584991455, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 2.2190201729106627, |
|
"grad_norm": 23.633018389781732, |
|
"learning_rate": 9.633802313433314e-09, |
|
"logits/chosen": -1.9454095363616943, |
|
"logits/rejected": -1.9511306285858154, |
|
"logps/chosen": -1.0190519094467163, |
|
"logps/rejected": -1.1248835325241089, |
|
"loss": 1.2055, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -2.0381038188934326, |
|
"rewards/margins": 0.21166305243968964, |
|
"rewards/rejected": -2.2497670650482178, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 2.2262247838616713, |
|
"grad_norm": 20.794315619142072, |
|
"learning_rate": 9.469005850991705e-09, |
|
"logits/chosen": -2.0088305473327637, |
|
"logits/rejected": -2.003154754638672, |
|
"logps/chosen": -1.0144597291946411, |
|
"logps/rejected": -1.1316652297973633, |
|
"loss": 1.2343, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -2.0289194583892822, |
|
"rewards/margins": 0.23441116511821747, |
|
"rewards/rejected": -2.2633304595947266, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 2.23342939481268, |
|
"grad_norm": 18.67038535819961, |
|
"learning_rate": 9.305301153307949e-09, |
|
"logits/chosen": -2.0057482719421387, |
|
"logits/rejected": -2.0133614540100098, |
|
"logps/chosen": -0.9462668299674988, |
|
"logps/rejected": -1.1108109951019287, |
|
"loss": 1.1573, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -1.8925336599349976, |
|
"rewards/margins": 0.3290883004665375, |
|
"rewards/rejected": -2.2216219902038574, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 2.2406340057636887, |
|
"grad_norm": 18.08187045245269, |
|
"learning_rate": 9.142699728146336e-09, |
|
"logits/chosen": -1.9763425588607788, |
|
"logits/rejected": -1.9695403575897217, |
|
"logps/chosen": -1.0319360494613647, |
|
"logps/rejected": -1.1644192934036255, |
|
"loss": 1.2014, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -2.0638720989227295, |
|
"rewards/margins": 0.26496636867523193, |
|
"rewards/rejected": -2.328838586807251, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 2.2478386167146973, |
|
"grad_norm": 16.765059853307356, |
|
"learning_rate": 8.981213005715627e-09, |
|
"logits/chosen": -2.002300500869751, |
|
"logits/rejected": -2.005335569381714, |
|
"logps/chosen": -0.9925374984741211, |
|
"logps/rejected": -1.1654067039489746, |
|
"loss": 1.1484, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -1.9850749969482422, |
|
"rewards/margins": 0.34573858976364136, |
|
"rewards/rejected": -2.330813407897949, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 2.255043227665706, |
|
"grad_norm": 21.826707648017194, |
|
"learning_rate": 8.820852337865611e-09, |
|
"logits/chosen": -2.0354738235473633, |
|
"logits/rejected": -2.031705617904663, |
|
"logps/chosen": -0.9956668019294739, |
|
"logps/rejected": -1.1440733671188354, |
|
"loss": 1.1717, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -1.9913336038589478, |
|
"rewards/margins": 0.296813428401947, |
|
"rewards/rejected": -2.288146734237671, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 2.2622478386167146, |
|
"grad_norm": 17.005533531498173, |
|
"learning_rate": 8.661628997289044e-09, |
|
"logits/chosen": -1.9752384424209595, |
|
"logits/rejected": -1.97113835811615, |
|
"logps/chosen": -1.0153406858444214, |
|
"logps/rejected": -1.1710517406463623, |
|
"loss": 1.1687, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -2.0306813716888428, |
|
"rewards/margins": 0.3114221394062042, |
|
"rewards/rejected": -2.3421034812927246, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 2.2694524495677233, |
|
"grad_norm": 16.411029038337308, |
|
"learning_rate": 8.503554176729341e-09, |
|
"logits/chosen": -1.976362943649292, |
|
"logits/rejected": -1.974590539932251, |
|
"logps/chosen": -1.026755928993225, |
|
"logps/rejected": -1.1865880489349365, |
|
"loss": 1.1696, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.05351185798645, |
|
"rewards/margins": 0.3196641802787781, |
|
"rewards/rejected": -2.373176097869873, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 2.276657060518732, |
|
"grad_norm": 24.940392400474, |
|
"learning_rate": 8.346638988193636e-09, |
|
"logits/chosen": -2.0030248165130615, |
|
"logits/rejected": -1.9979517459869385, |
|
"logps/chosen": -0.9251815676689148, |
|
"logps/rejected": -1.0761079788208008, |
|
"loss": 1.1768, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.8503631353378296, |
|
"rewards/margins": 0.30185258388519287, |
|
"rewards/rejected": -2.1522159576416016, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 2.2838616714697406, |
|
"grad_norm": 23.2363909978899, |
|
"learning_rate": 8.19089446217176e-09, |
|
"logits/chosen": -1.9777719974517822, |
|
"logits/rejected": -1.9676783084869385, |
|
"logps/chosen": -1.0022261142730713, |
|
"logps/rejected": -1.1919556856155396, |
|
"loss": 1.1202, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -2.0044522285461426, |
|
"rewards/margins": 0.3794591426849365, |
|
"rewards/rejected": -2.383911371231079, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 2.2910662824207493, |
|
"grad_norm": 17.14618373707155, |
|
"learning_rate": 8.036331546860777e-09, |
|
"logits/chosen": -1.982940435409546, |
|
"logits/rejected": -1.982465386390686, |
|
"logps/chosen": -0.9494163393974304, |
|
"logps/rejected": -1.039945125579834, |
|
"loss": 1.2442, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -1.8988326787948608, |
|
"rewards/margins": 0.18105748295783997, |
|
"rewards/rejected": -2.079890251159668, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 2.298270893371758, |
|
"grad_norm": 23.5198522631464, |
|
"learning_rate": 7.882961107395416e-09, |
|
"logits/chosen": -1.9984643459320068, |
|
"logits/rejected": -1.9926011562347412, |
|
"logps/chosen": -1.130748987197876, |
|
"logps/rejected": -1.177819848060608, |
|
"loss": 1.315, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -2.261497974395752, |
|
"rewards/margins": 0.09414196014404297, |
|
"rewards/rejected": -2.355639696121216, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 2.3054755043227666, |
|
"grad_norm": 25.590315233089598, |
|
"learning_rate": 7.73079392508428e-09, |
|
"logits/chosen": -1.9712879657745361, |
|
"logits/rejected": -1.9705880880355835, |
|
"logps/chosen": -1.0907241106033325, |
|
"logps/rejected": -1.2799861431121826, |
|
"loss": 1.1523, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.181448221206665, |
|
"rewards/margins": 0.37852445244789124, |
|
"rewards/rejected": -2.5599722862243652, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 2.3126801152737753, |
|
"grad_norm": 21.478168268234054, |
|
"learning_rate": 7.579840696651938e-09, |
|
"logits/chosen": -1.9963871240615845, |
|
"logits/rejected": -1.9932276010513306, |
|
"logps/chosen": -1.0474956035614014, |
|
"logps/rejected": -1.172515869140625, |
|
"loss": 1.2079, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -2.0949912071228027, |
|
"rewards/margins": 0.2500404119491577, |
|
"rewards/rejected": -2.34503173828125, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 2.319884726224784, |
|
"grad_norm": 20.9972814315902, |
|
"learning_rate": 7.43011203348704e-09, |
|
"logits/chosen": -1.9149713516235352, |
|
"logits/rejected": -1.9116861820220947, |
|
"logps/chosen": -1.0514217615127563, |
|
"logps/rejected": -1.1269280910491943, |
|
"loss": 1.2686, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -2.1028435230255127, |
|
"rewards/margins": 0.1510128229856491, |
|
"rewards/rejected": -2.2538561820983887, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 2.3270893371757926, |
|
"grad_norm": 18.760848272652197, |
|
"learning_rate": 7.281618460896344e-09, |
|
"logits/chosen": -1.995486855506897, |
|
"logits/rejected": -1.9929373264312744, |
|
"logps/chosen": -0.9654563665390015, |
|
"logps/rejected": -1.1074378490447998, |
|
"loss": 1.1729, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -1.930912733078003, |
|
"rewards/margins": 0.2839628756046295, |
|
"rewards/rejected": -2.2148756980895996, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 2.3342939481268012, |
|
"grad_norm": 20.42845258559301, |
|
"learning_rate": 7.134370417364849e-09, |
|
"logits/chosen": -1.9683917760849, |
|
"logits/rejected": -1.9679603576660156, |
|
"logps/chosen": -1.001461386680603, |
|
"logps/rejected": -1.1403329372406006, |
|
"loss": 1.2027, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -2.002922773361206, |
|
"rewards/margins": 0.2777433395385742, |
|
"rewards/rejected": -2.280665874481201, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 2.34149855907781, |
|
"grad_norm": 23.164358986342677, |
|
"learning_rate": 6.988378253821981e-09, |
|
"logits/chosen": -1.9697679281234741, |
|
"logits/rejected": -1.9687258005142212, |
|
"logps/chosen": -1.0258748531341553, |
|
"logps/rejected": -1.143920660018921, |
|
"loss": 1.209, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -2.0517497062683105, |
|
"rewards/margins": 0.23609168827533722, |
|
"rewards/rejected": -2.287841320037842, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 2.3487031700288186, |
|
"grad_norm": 20.30991552682094, |
|
"learning_rate": 6.8436522329140186e-09, |
|
"logits/chosen": -1.9788017272949219, |
|
"logits/rejected": -1.985569715499878, |
|
"logps/chosen": -1.0339092016220093, |
|
"logps/rejected": -1.1592271327972412, |
|
"loss": 1.2106, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -2.0678184032440186, |
|
"rewards/margins": 0.2506362795829773, |
|
"rewards/rejected": -2.3184542655944824, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 2.3559077809798272, |
|
"grad_norm": 21.894995604840652, |
|
"learning_rate": 6.700202528282603e-09, |
|
"logits/chosen": -1.978734016418457, |
|
"logits/rejected": -1.969061255455017, |
|
"logps/chosen": -1.0286333560943604, |
|
"logps/rejected": -1.1439108848571777, |
|
"loss": 1.2158, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -2.0572667121887207, |
|
"rewards/margins": 0.23055517673492432, |
|
"rewards/rejected": -2.2878217697143555, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 2.363112391930836, |
|
"grad_norm": 21.434677454334327, |
|
"learning_rate": 6.558039223849668e-09, |
|
"logits/chosen": -2.0271782875061035, |
|
"logits/rejected": -2.0174622535705566, |
|
"logps/chosen": -1.0365641117095947, |
|
"logps/rejected": -1.2443287372589111, |
|
"loss": 1.1155, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -2.0731282234191895, |
|
"rewards/margins": 0.41552942991256714, |
|
"rewards/rejected": -2.4886574745178223, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 2.3703170028818445, |
|
"grad_norm": 22.134638764373964, |
|
"learning_rate": 6.417172313108471e-09, |
|
"logits/chosen": -1.95876944065094, |
|
"logits/rejected": -1.9533073902130127, |
|
"logps/chosen": -0.9859912991523743, |
|
"logps/rejected": -1.1151400804519653, |
|
"loss": 1.1979, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.9719825983047485, |
|
"rewards/margins": 0.2582974135875702, |
|
"rewards/rejected": -2.2302801609039307, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 2.377521613832853, |
|
"grad_norm": 21.658570611710445, |
|
"learning_rate": 6.277611698421179e-09, |
|
"logits/chosen": -2.017606735229492, |
|
"logits/rejected": -2.0095458030700684, |
|
"logps/chosen": -0.9050453305244446, |
|
"logps/rejected": -1.098288893699646, |
|
"loss": 1.1252, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -1.8100906610488892, |
|
"rewards/margins": 0.3864876627922058, |
|
"rewards/rejected": -2.196577787399292, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 2.3847262247838614, |
|
"grad_norm": 22.8559892529762, |
|
"learning_rate": 6.139367190322714e-09, |
|
"logits/chosen": -2.0034892559051514, |
|
"logits/rejected": -2.0032081604003906, |
|
"logps/chosen": -1.0592529773712158, |
|
"logps/rejected": -1.2185790538787842, |
|
"loss": 1.1609, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -2.1185059547424316, |
|
"rewards/margins": 0.3186524510383606, |
|
"rewards/rejected": -2.4371581077575684, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 2.39193083573487, |
|
"grad_norm": 17.198608533100995, |
|
"learning_rate": 6.002448506831171e-09, |
|
"logits/chosen": -2.0061838626861572, |
|
"logits/rejected": -2.0014090538024902, |
|
"logps/chosen": -0.9808699488639832, |
|
"logps/rejected": -1.1246802806854248, |
|
"loss": 1.1731, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -1.9617398977279663, |
|
"rewards/margins": 0.2876203954219818, |
|
"rewards/rejected": -2.2493605613708496, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 2.3991354466858787, |
|
"grad_norm": 18.199025209277288, |
|
"learning_rate": 5.866865272764607e-09, |
|
"logits/chosen": -2.023648262023926, |
|
"logits/rejected": -2.0234923362731934, |
|
"logps/chosen": -1.0167878866195679, |
|
"logps/rejected": -1.1612762212753296, |
|
"loss": 1.1772, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -2.0335757732391357, |
|
"rewards/margins": 0.28897663950920105, |
|
"rewards/rejected": -2.322552442550659, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 2.4063400576368874, |
|
"grad_norm": 23.302252487813124, |
|
"learning_rate": 5.7326270190645595e-09, |
|
"logits/chosen": -1.896691918373108, |
|
"logits/rejected": -1.8979320526123047, |
|
"logps/chosen": -1.0594362020492554, |
|
"logps/rejected": -1.1698405742645264, |
|
"loss": 1.2168, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -2.1188724040985107, |
|
"rewards/margins": 0.2208089381456375, |
|
"rewards/rejected": -2.3396811485290527, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 2.413544668587896, |
|
"grad_norm": 18.446092862588884, |
|
"learning_rate": 5.599743182125938e-09, |
|
"logits/chosen": -2.043023109436035, |
|
"logits/rejected": -2.043013095855713, |
|
"logps/chosen": -1.0480068922042847, |
|
"logps/rejected": -1.1850215196609497, |
|
"loss": 1.179, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -2.0960137844085693, |
|
"rewards/margins": 0.2740294933319092, |
|
"rewards/rejected": -2.3700430393218994, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 2.4207492795389047, |
|
"grad_norm": 20.220307143059344, |
|
"learning_rate": 5.46822310313379e-09, |
|
"logits/chosen": -2.0473732948303223, |
|
"logits/rejected": -2.0569522380828857, |
|
"logps/chosen": -1.09086012840271, |
|
"logps/rejected": -1.1955832242965698, |
|
"loss": 1.2353, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -2.18172025680542, |
|
"rewards/margins": 0.2094462662935257, |
|
"rewards/rejected": -2.3911664485931396, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 2.4279538904899134, |
|
"grad_norm": 20.780990431383444, |
|
"learning_rate": 5.33807602740658e-09, |
|
"logits/chosen": -2.022789478302002, |
|
"logits/rejected": -2.0159573554992676, |
|
"logps/chosen": -0.9560559988021851, |
|
"logps/rejected": -1.160628318786621, |
|
"loss": 1.111, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.9121119976043701, |
|
"rewards/margins": 0.40914446115493774, |
|
"rewards/rejected": -2.321256637573242, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 2.435158501440922, |
|
"grad_norm": 21.245348975655457, |
|
"learning_rate": 5.209311103746334e-09, |
|
"logits/chosen": -2.0008084774017334, |
|
"logits/rejected": -2.0011303424835205, |
|
"logps/chosen": -1.0523884296417236, |
|
"logps/rejected": -1.224974274635315, |
|
"loss": 1.1587, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -2.1047768592834473, |
|
"rewards/margins": 0.3451715409755707, |
|
"rewards/rejected": -2.44994854927063, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 2.4423631123919307, |
|
"grad_norm": 24.352598699910715, |
|
"learning_rate": 5.081937383795484e-09, |
|
"logits/chosen": -1.972608208656311, |
|
"logits/rejected": -1.9717302322387695, |
|
"logps/chosen": -0.9721845388412476, |
|
"logps/rejected": -1.137112021446228, |
|
"loss": 1.1484, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -1.9443690776824951, |
|
"rewards/margins": 0.32985490560531616, |
|
"rewards/rejected": -2.274224042892456, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 2.4495677233429394, |
|
"grad_norm": 18.574622449743107, |
|
"learning_rate": 4.955963821400599e-09, |
|
"logits/chosen": -2.0249781608581543, |
|
"logits/rejected": -2.019134759902954, |
|
"logps/chosen": -1.029394507408142, |
|
"logps/rejected": -1.167999505996704, |
|
"loss": 1.1922, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.058789014816284, |
|
"rewards/margins": 0.2772100567817688, |
|
"rewards/rejected": -2.335999011993408, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 2.456772334293948, |
|
"grad_norm": 15.429482416255146, |
|
"learning_rate": 4.831399271982928e-09, |
|
"logits/chosen": -1.9512850046157837, |
|
"logits/rejected": -1.9432300329208374, |
|
"logps/chosen": -1.0431114435195923, |
|
"logps/rejected": -1.1738238334655762, |
|
"loss": 1.2095, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -2.0862228870391846, |
|
"rewards/margins": 0.26142507791519165, |
|
"rewards/rejected": -2.3476476669311523, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 2.4639769452449567, |
|
"grad_norm": 25.103110732614255, |
|
"learning_rate": 4.708252491915951e-09, |
|
"logits/chosen": -2.030299663543701, |
|
"logits/rejected": -2.024203300476074, |
|
"logps/chosen": -1.0459200143814087, |
|
"logps/rejected": -1.1930789947509766, |
|
"loss": 1.197, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -2.0918400287628174, |
|
"rewards/margins": 0.2943178713321686, |
|
"rewards/rejected": -2.386157989501953, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 2.4711815561959654, |
|
"grad_norm": 25.280433628761124, |
|
"learning_rate": 4.58653213790981e-09, |
|
"logits/chosen": -2.006598472595215, |
|
"logits/rejected": -1.9983857870101929, |
|
"logps/chosen": -1.025721549987793, |
|
"logps/rejected": -1.1746145486831665, |
|
"loss": 1.1792, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -2.051443099975586, |
|
"rewards/margins": 0.29778599739074707, |
|
"rewards/rejected": -2.349229097366333, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 2.478386167146974, |
|
"grad_norm": 18.242083284353217, |
|
"learning_rate": 4.466246766402773e-09, |
|
"logits/chosen": -1.9907543659210205, |
|
"logits/rejected": -1.9845359325408936, |
|
"logps/chosen": -1.0393613576889038, |
|
"logps/rejected": -1.1935051679611206, |
|
"loss": 1.1827, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -2.0787227153778076, |
|
"rewards/margins": 0.30828770995140076, |
|
"rewards/rejected": -2.387010335922241, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 2.4855907780979827, |
|
"grad_norm": 22.018503196573274, |
|
"learning_rate": 4.347404832959775e-09, |
|
"logits/chosen": -2.0336432456970215, |
|
"logits/rejected": -2.0338807106018066, |
|
"logps/chosen": -1.032832384109497, |
|
"logps/rejected": -1.1933454275131226, |
|
"loss": 1.1612, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -2.065664768218994, |
|
"rewards/margins": 0.32102587819099426, |
|
"rewards/rejected": -2.386690855026245, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 2.4927953890489913, |
|
"grad_norm": 33.037428693429234, |
|
"learning_rate": 4.230014691678016e-09, |
|
"logits/chosen": -1.9883922338485718, |
|
"logits/rejected": -1.9890626668930054, |
|
"logps/chosen": -1.0595102310180664, |
|
"logps/rejected": -1.126479148864746, |
|
"loss": 1.272, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -2.119020462036133, |
|
"rewards/margins": 0.1339379847049713, |
|
"rewards/rejected": -2.252958297729492, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 17.82843912451702, |
|
"learning_rate": 4.114084594599707e-09, |
|
"logits/chosen": -1.9903564453125, |
|
"logits/rejected": -1.9900470972061157, |
|
"logps/chosen": -1.0114375352859497, |
|
"logps/rejected": -1.229552984237671, |
|
"loss": 1.1011, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -2.0228750705718994, |
|
"rewards/margins": 0.4362305998802185, |
|
"rewards/rejected": -2.459105968475342, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 2.5072046109510087, |
|
"grad_norm": 22.102059612075095, |
|
"learning_rate": 3.9996226911319546e-09, |
|
"logits/chosen": -1.993326187133789, |
|
"logits/rejected": -1.981066107749939, |
|
"logps/chosen": -1.0159366130828857, |
|
"logps/rejected": -1.1457350254058838, |
|
"loss": 1.1898, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -2.0318732261657715, |
|
"rewards/margins": 0.25959664583206177, |
|
"rewards/rejected": -2.2914700508117676, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 2.5144092219020173, |
|
"grad_norm": 17.973603590541654, |
|
"learning_rate": 3.886637027473949e-09, |
|
"logits/chosen": -2.0013790130615234, |
|
"logits/rejected": -2.0035085678100586, |
|
"logps/chosen": -1.076293706893921, |
|
"logps/rejected": -1.2393258810043335, |
|
"loss": 1.1562, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.152587413787842, |
|
"rewards/margins": 0.3260645270347595, |
|
"rewards/rejected": -2.478651762008667, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 2.521613832853026, |
|
"grad_norm": 19.37957776631117, |
|
"learning_rate": 3.775135546051295e-09, |
|
"logits/chosen": -1.9389715194702148, |
|
"logits/rejected": -1.9399712085723877, |
|
"logps/chosen": -1.0256609916687012, |
|
"logps/rejected": -1.1517935991287231, |
|
"loss": 1.1987, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -2.0513219833374023, |
|
"rewards/margins": 0.252265065908432, |
|
"rewards/rejected": -2.3035871982574463, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.5288184438040346, |
|
"grad_norm": 23.33567215234884, |
|
"learning_rate": 3.665126084957723e-09, |
|
"logits/chosen": -1.984487533569336, |
|
"logits/rejected": -1.9886258840560913, |
|
"logps/chosen": -1.1339917182922363, |
|
"logps/rejected": -1.233039140701294, |
|
"loss": 1.2585, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -2.2679834365844727, |
|
"rewards/margins": 0.19809459149837494, |
|
"rewards/rejected": -2.466078281402588, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 2.5360230547550433, |
|
"grad_norm": 19.609640038869685, |
|
"learning_rate": 3.556616377404101e-09, |
|
"logits/chosen": -2.00850248336792, |
|
"logits/rejected": -2.006412982940674, |
|
"logps/chosen": -1.07861328125, |
|
"logps/rejected": -1.236485242843628, |
|
"loss": 1.154, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.1572265625, |
|
"rewards/margins": 0.31574416160583496, |
|
"rewards/rejected": -2.472970485687256, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 2.543227665706052, |
|
"grad_norm": 19.767394228725337, |
|
"learning_rate": 3.4496140511748125e-09, |
|
"logits/chosen": -1.9994624853134155, |
|
"logits/rejected": -1.9942439794540405, |
|
"logps/chosen": -1.0551049709320068, |
|
"logps/rejected": -1.1994330883026123, |
|
"loss": 1.1752, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -2.1102099418640137, |
|
"rewards/margins": 0.28865596652030945, |
|
"rewards/rejected": -2.3988661766052246, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 2.5504322766570606, |
|
"grad_norm": 31.22852578343729, |
|
"learning_rate": 3.3441266280915427e-09, |
|
"logits/chosen": -1.9827390909194946, |
|
"logits/rejected": -1.983473539352417, |
|
"logps/chosen": -1.093752145767212, |
|
"logps/rejected": -1.20872163772583, |
|
"loss": 1.2131, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -2.187504291534424, |
|
"rewards/margins": 0.229939267039299, |
|
"rewards/rejected": -2.41744327545166, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 2.5576368876080693, |
|
"grad_norm": 23.706412362537016, |
|
"learning_rate": 3.2401615234845693e-09, |
|
"logits/chosen": -2.0068211555480957, |
|
"logits/rejected": -2.0008292198181152, |
|
"logps/chosen": -1.0923867225646973, |
|
"logps/rejected": -1.2357128858566284, |
|
"loss": 1.1896, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -2.1847734451293945, |
|
"rewards/margins": 0.28665226697921753, |
|
"rewards/rejected": -2.471425771713257, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 2.564841498559078, |
|
"grad_norm": 16.13728529223842, |
|
"learning_rate": 3.1377260456714375e-09, |
|
"logits/chosen": -1.901414155960083, |
|
"logits/rejected": -1.8929615020751953, |
|
"logps/chosen": -1.0596764087677002, |
|
"logps/rejected": -1.2023025751113892, |
|
"loss": 1.1686, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -2.1193528175354004, |
|
"rewards/margins": 0.28525251150131226, |
|
"rewards/rejected": -2.4046051502227783, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 2.5720461095100866, |
|
"grad_norm": 18.028717215705484, |
|
"learning_rate": 3.0368273954432698e-09, |
|
"logits/chosen": -2.0307531356811523, |
|
"logits/rejected": -2.022324800491333, |
|
"logps/chosen": -1.0493528842926025, |
|
"logps/rejected": -1.1534329652786255, |
|
"loss": 1.2252, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -2.098705768585205, |
|
"rewards/margins": 0.20816004276275635, |
|
"rewards/rejected": -2.306865930557251, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 2.5792507204610953, |
|
"grad_norm": 17.367490930434325, |
|
"learning_rate": 2.937472665558541e-09, |
|
"logits/chosen": -2.019484281539917, |
|
"logits/rejected": -2.020643711090088, |
|
"logps/chosen": -1.036195993423462, |
|
"logps/rejected": -1.147991418838501, |
|
"loss": 1.2267, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -2.072391986846924, |
|
"rewards/margins": 0.22359101474285126, |
|
"rewards/rejected": -2.295982837677002, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 2.586455331412104, |
|
"grad_norm": 21.805325598847563, |
|
"learning_rate": 2.8396688402445053e-09, |
|
"logits/chosen": -2.0637335777282715, |
|
"logits/rejected": -2.0563552379608154, |
|
"logps/chosen": -1.0100147724151611, |
|
"logps/rejected": -1.2180942296981812, |
|
"loss": 1.1063, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -2.0200295448303223, |
|
"rewards/margins": 0.4161592125892639, |
|
"rewards/rejected": -2.4361884593963623, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 2.5936599423631126, |
|
"grad_norm": 24.439180591540023, |
|
"learning_rate": 2.7434227947062324e-09, |
|
"logits/chosen": -2.0062127113342285, |
|
"logits/rejected": -1.9998852014541626, |
|
"logps/chosen": -1.1317923069000244, |
|
"logps/rejected": -1.2398041486740112, |
|
"loss": 1.2326, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -2.263584613800049, |
|
"rewards/margins": 0.21602365374565125, |
|
"rewards/rejected": -2.4796082973480225, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 2.6008645533141213, |
|
"grad_norm": 18.265831934479376, |
|
"learning_rate": 2.6487412946432976e-09, |
|
"logits/chosen": -1.9716873168945312, |
|
"logits/rejected": -1.966560959815979, |
|
"logps/chosen": -1.0693469047546387, |
|
"logps/rejected": -1.2056225538253784, |
|
"loss": 1.1912, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -2.1386938095092773, |
|
"rewards/margins": 0.27255168557167053, |
|
"rewards/rejected": -2.411245107650757, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 2.60806916426513, |
|
"grad_norm": 22.910160520824302, |
|
"learning_rate": 2.5556309957742024e-09, |
|
"logits/chosen": -1.97675359249115, |
|
"logits/rejected": -1.9716304540634155, |
|
"logps/chosen": -1.0250674486160278, |
|
"logps/rejected": -1.2212371826171875, |
|
"loss": 1.1161, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -2.0501348972320557, |
|
"rewards/margins": 0.3923397660255432, |
|
"rewards/rejected": -2.442474365234375, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 2.6152737752161386, |
|
"grad_norm": 22.671601957903725, |
|
"learning_rate": 2.4640984433684758e-09, |
|
"logits/chosen": -2.0380005836486816, |
|
"logits/rejected": -2.0387332439422607, |
|
"logps/chosen": -1.1190853118896484, |
|
"logps/rejected": -1.235012173652649, |
|
"loss": 1.2353, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -2.238170623779297, |
|
"rewards/margins": 0.231853649020195, |
|
"rewards/rejected": -2.470024347305298, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 2.6224783861671472, |
|
"grad_norm": 17.024567886757257, |
|
"learning_rate": 2.3741500717865987e-09, |
|
"logits/chosen": -1.9916216135025024, |
|
"logits/rejected": -2.0025291442871094, |
|
"logps/chosen": -1.0068811178207397, |
|
"logps/rejected": -1.1515626907348633, |
|
"loss": 1.1789, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -2.0137622356414795, |
|
"rewards/margins": 0.2893627882003784, |
|
"rewards/rejected": -2.3031253814697266, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 2.629682997118156, |
|
"grad_norm": 17.494575910236158, |
|
"learning_rate": 2.285792204027678e-09, |
|
"logits/chosen": -1.9781382083892822, |
|
"logits/rejected": -1.9753141403198242, |
|
"logps/chosen": -1.013346791267395, |
|
"logps/rejected": -1.211428165435791, |
|
"loss": 1.1021, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -2.02669358253479, |
|
"rewards/margins": 0.39616289734840393, |
|
"rewards/rejected": -2.422856330871582, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 2.636887608069164, |
|
"grad_norm": 20.794166929263792, |
|
"learning_rate": 2.199031051284972e-09, |
|
"logits/chosen": -2.007935047149658, |
|
"logits/rejected": -2.0033650398254395, |
|
"logps/chosen": -1.069888710975647, |
|
"logps/rejected": -1.1960642337799072, |
|
"loss": 1.219, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.139777421951294, |
|
"rewards/margins": 0.25235068798065186, |
|
"rewards/rejected": -2.3921284675598145, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 2.6440922190201728, |
|
"grad_norm": 16.98386285768041, |
|
"learning_rate": 2.113872712509254e-09, |
|
"logits/chosen": -1.9919393062591553, |
|
"logits/rejected": -1.9845707416534424, |
|
"logps/chosen": -1.1297125816345215, |
|
"logps/rejected": -1.241287112236023, |
|
"loss": 1.2279, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -2.259425163269043, |
|
"rewards/margins": 0.22314925491809845, |
|
"rewards/rejected": -2.482574224472046, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 2.6512968299711814, |
|
"grad_norm": 14.064145090241722, |
|
"learning_rate": 2.0303231739801143e-09, |
|
"logits/chosen": -1.9741106033325195, |
|
"logits/rejected": -1.9633283615112305, |
|
"logps/chosen": -1.0185304880142212, |
|
"logps/rejected": -1.1587377786636353, |
|
"loss": 1.1825, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.0370609760284424, |
|
"rewards/margins": 0.28041452169418335, |
|
"rewards/rejected": -2.3174755573272705, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 2.65850144092219, |
|
"grad_norm": 23.56396327392751, |
|
"learning_rate": 1.948388308885102e-09, |
|
"logits/chosen": -2.0366296768188477, |
|
"logits/rejected": -2.02805757522583, |
|
"logps/chosen": -1.0637743473052979, |
|
"logps/rejected": -1.1742548942565918, |
|
"loss": 1.2171, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -2.1275486946105957, |
|
"rewards/margins": 0.22096149623394012, |
|
"rewards/rejected": -2.3485097885131836, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 2.6657060518731988, |
|
"grad_norm": 25.21273485809688, |
|
"learning_rate": 1.86807387690692e-09, |
|
"logits/chosen": -2.0631988048553467, |
|
"logits/rejected": -2.0600669384002686, |
|
"logps/chosen": -1.0889419317245483, |
|
"logps/rejected": -1.2770618200302124, |
|
"loss": 1.1162, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -2.1778838634490967, |
|
"rewards/margins": 0.3762398660182953, |
|
"rewards/rejected": -2.554123640060425, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 2.6729106628242074, |
|
"grad_norm": 19.210332180481718, |
|
"learning_rate": 1.789385523818493e-09, |
|
"logits/chosen": -2.027967929840088, |
|
"logits/rejected": -2.0294251441955566, |
|
"logps/chosen": -1.0404349565505981, |
|
"logps/rejected": -1.209099531173706, |
|
"loss": 1.149, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -2.0808699131011963, |
|
"rewards/margins": 0.33732882142066956, |
|
"rewards/rejected": -2.418199062347412, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 2.680115273775216, |
|
"grad_norm": 25.919412237452388, |
|
"learning_rate": 1.712328781086131e-09, |
|
"logits/chosen": -2.0483665466308594, |
|
"logits/rejected": -2.0430164337158203, |
|
"logps/chosen": -1.1230162382125854, |
|
"logps/rejected": -1.2185190916061401, |
|
"loss": 1.2401, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -2.246032476425171, |
|
"rewards/margins": 0.191005676984787, |
|
"rewards/rejected": -2.4370381832122803, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 2.6873198847262247, |
|
"grad_norm": 21.180605350865044, |
|
"learning_rate": 1.6369090654806543e-09, |
|
"logits/chosen": -2.0540661811828613, |
|
"logits/rejected": -2.0474164485931396, |
|
"logps/chosen": -1.0206701755523682, |
|
"logps/rejected": -1.1645678281784058, |
|
"loss": 1.1684, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.0413403511047363, |
|
"rewards/margins": 0.2877953350543976, |
|
"rewards/rejected": -2.3291356563568115, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 2.6945244956772334, |
|
"grad_norm": 19.151584962250723, |
|
"learning_rate": 1.5631316786966498e-09, |
|
"logits/chosen": -1.9853427410125732, |
|
"logits/rejected": -1.978816270828247, |
|
"logps/chosen": -1.0220520496368408, |
|
"logps/rejected": -1.1623871326446533, |
|
"loss": 1.1969, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -2.0441040992736816, |
|
"rewards/margins": 0.2806701064109802, |
|
"rewards/rejected": -2.3247742652893066, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 2.701729106628242, |
|
"grad_norm": 18.499060326329523, |
|
"learning_rate": 1.491001806979772e-09, |
|
"logits/chosen": -2.035274028778076, |
|
"logits/rejected": -2.028480052947998, |
|
"logps/chosen": -1.077332854270935, |
|
"logps/rejected": -1.2263195514678955, |
|
"loss": 1.1742, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -2.15466570854187, |
|
"rewards/margins": 0.29797306656837463, |
|
"rewards/rejected": -2.452639102935791, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 2.7089337175792507, |
|
"grad_norm": 29.16839407251503, |
|
"learning_rate": 1.4205245207621508e-09, |
|
"logits/chosen": -1.9820353984832764, |
|
"logits/rejected": -1.9796836376190186, |
|
"logps/chosen": -1.1182725429534912, |
|
"logps/rejected": -1.2864872217178345, |
|
"loss": 1.1548, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -2.2365450859069824, |
|
"rewards/margins": 0.33642950654029846, |
|
"rewards/rejected": -2.572974443435669, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 2.7161383285302594, |
|
"grad_norm": 17.613059928527296, |
|
"learning_rate": 1.3517047743059978e-09, |
|
"logits/chosen": -2.0181725025177, |
|
"logits/rejected": -2.0215516090393066, |
|
"logps/chosen": -1.0734504461288452, |
|
"logps/rejected": -1.2342610359191895, |
|
"loss": 1.1565, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.1469008922576904, |
|
"rewards/margins": 0.3216209411621094, |
|
"rewards/rejected": -2.468522071838379, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 2.723342939481268, |
|
"grad_norm": 17.0753116834011, |
|
"learning_rate": 1.2845474053553156e-09, |
|
"logits/chosen": -2.0134406089782715, |
|
"logits/rejected": -2.0093090534210205, |
|
"logps/chosen": -1.0317740440368652, |
|
"logps/rejected": -1.168919324874878, |
|
"loss": 1.2025, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -2.0635480880737305, |
|
"rewards/margins": 0.2742905914783478, |
|
"rewards/rejected": -2.337838649749756, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 2.7305475504322767, |
|
"grad_norm": 22.583791154808193, |
|
"learning_rate": 1.2190571347958422e-09, |
|
"logits/chosen": -2.044787883758545, |
|
"logits/rejected": -2.046135187149048, |
|
"logps/chosen": -0.9667074084281921, |
|
"logps/rejected": -1.1686309576034546, |
|
"loss": 1.1099, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -1.9334148168563843, |
|
"rewards/margins": 0.4038470387458801, |
|
"rewards/rejected": -2.337261915206909, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 2.7377521613832854, |
|
"grad_norm": 18.287187828533536, |
|
"learning_rate": 1.1552385663231634e-09, |
|
"logits/chosen": -1.9937756061553955, |
|
"logits/rejected": -1.9841327667236328, |
|
"logps/chosen": -1.0935721397399902, |
|
"logps/rejected": -1.190500020980835, |
|
"loss": 1.2381, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -2.1871442794799805, |
|
"rewards/margins": 0.19385603070259094, |
|
"rewards/rejected": -2.38100004196167, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 2.744956772334294, |
|
"grad_norm": 18.99456309056716, |
|
"learning_rate": 1.0930961861191302e-09, |
|
"logits/chosen": -1.9580612182617188, |
|
"logits/rejected": -1.9627430438995361, |
|
"logps/chosen": -1.0382286310195923, |
|
"logps/rejected": -1.1804331541061401, |
|
"loss": 1.2008, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -2.0764572620391846, |
|
"rewards/margins": 0.2844088673591614, |
|
"rewards/rejected": -2.3608663082122803, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 2.7521613832853027, |
|
"grad_norm": 16.764883597440075, |
|
"learning_rate": 1.0326343625364608e-09, |
|
"logits/chosen": -1.9691221714019775, |
|
"logits/rejected": -1.9639511108398438, |
|
"logps/chosen": -1.0410795211791992, |
|
"logps/rejected": -1.2136642932891846, |
|
"loss": 1.138, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.0821590423583984, |
|
"rewards/margins": 0.3451697826385498, |
|
"rewards/rejected": -2.427328586578369, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 2.7593659942363113, |
|
"grad_norm": 18.44911571731718, |
|
"learning_rate": 9.738573457917066e-10, |
|
"logits/chosen": -2.043980836868286, |
|
"logits/rejected": -2.042267084121704, |
|
"logps/chosen": -1.0499022006988525, |
|
"logps/rejected": -1.2412595748901367, |
|
"loss": 1.11, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -2.099804401397705, |
|
"rewards/margins": 0.3827148973941803, |
|
"rewards/rejected": -2.4825191497802734, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 2.76657060518732, |
|
"grad_norm": 18.764417824451066, |
|
"learning_rate": 9.16769267666434e-10, |
|
"logits/chosen": -2.0091567039489746, |
|
"logits/rejected": -2.0069632530212402, |
|
"logps/chosen": -1.074094295501709, |
|
"logps/rejected": -1.1498383283615112, |
|
"loss": 1.2637, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -2.148188591003418, |
|
"rewards/margins": 0.15148821473121643, |
|
"rewards/rejected": -2.2996766567230225, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 2.7737752161383287, |
|
"grad_norm": 20.046312375742783, |
|
"learning_rate": 8.613741412168113e-10, |
|
"logits/chosen": -2.027498245239258, |
|
"logits/rejected": -2.026846408843994, |
|
"logps/chosen": -1.0808565616607666, |
|
"logps/rejected": -1.2099745273590088, |
|
"loss": 1.1798, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.161713123321533, |
|
"rewards/margins": 0.2582358717918396, |
|
"rewards/rejected": -2.4199490547180176, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 2.7809798270893373, |
|
"grad_norm": 19.84763607582755, |
|
"learning_rate": 8.076758604914802e-10, |
|
"logits/chosen": -1.957332968711853, |
|
"logits/rejected": -1.9527628421783447, |
|
"logps/chosen": -0.9819733500480652, |
|
"logps/rejected": -1.114538550376892, |
|
"loss": 1.1997, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -1.9639467000961304, |
|
"rewards/margins": 0.2651303708553314, |
|
"rewards/rejected": -2.229077100753784, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 2.7881844380403455, |
|
"grad_norm": 22.904658084781477, |
|
"learning_rate": 7.55678200257856e-10, |
|
"logits/chosen": -1.9844300746917725, |
|
"logits/rejected": -1.9776723384857178, |
|
"logps/chosen": -1.0327974557876587, |
|
"logps/rejected": -1.1763405799865723, |
|
"loss": 1.1751, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -2.0655949115753174, |
|
"rewards/margins": 0.2870861887931824, |
|
"rewards/rejected": -2.3526811599731445, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 2.795389048991354, |
|
"grad_norm": 17.15291998943784, |
|
"learning_rate": 7.053848157367315e-10, |
|
"logits/chosen": -1.9995343685150146, |
|
"logits/rejected": -1.9940645694732666, |
|
"logps/chosen": -1.0412391424179077, |
|
"logps/rejected": -1.1907306909561157, |
|
"loss": 1.1831, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -2.0824782848358154, |
|
"rewards/margins": 0.29898306727409363, |
|
"rewards/rejected": -2.3814613819122314, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 2.802593659942363, |
|
"grad_norm": 15.812884819551362, |
|
"learning_rate": 6.567992423453794e-10, |
|
"logits/chosen": -2.0206310749053955, |
|
"logits/rejected": -2.019430637359619, |
|
"logps/chosen": -0.9630235433578491, |
|
"logps/rejected": -1.0794202089309692, |
|
"loss": 1.2021, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -1.9260470867156982, |
|
"rewards/margins": 0.23279304802417755, |
|
"rewards/rejected": -2.1588404178619385, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 2.8097982708933715, |
|
"grad_norm": 19.54993986750196, |
|
"learning_rate": 6.099248954489794e-10, |
|
"logits/chosen": -1.953507423400879, |
|
"logits/rejected": -1.9511181116104126, |
|
"logps/chosen": -1.0681465864181519, |
|
"logps/rejected": -1.229273796081543, |
|
"loss": 1.1568, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.1362931728363037, |
|
"rewards/margins": 0.32225483655929565, |
|
"rewards/rejected": -2.458547592163086, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 2.81700288184438, |
|
"grad_norm": 22.672929732957467, |
|
"learning_rate": 5.647650701205653e-10, |
|
"logits/chosen": -2.026876449584961, |
|
"logits/rejected": -2.018667697906494, |
|
"logps/chosen": -1.1109135150909424, |
|
"logps/rejected": -1.2674014568328857, |
|
"loss": 1.1766, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -2.2218270301818848, |
|
"rewards/margins": 0.3129761219024658, |
|
"rewards/rejected": -2.5348029136657715, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 2.824207492795389, |
|
"grad_norm": 16.28695288206369, |
|
"learning_rate": 5.213229409093856e-10, |
|
"logits/chosen": -2.0310721397399902, |
|
"logits/rejected": -2.0254709720611572, |
|
"logps/chosen": -1.05387282371521, |
|
"logps/rejected": -1.1856187582015991, |
|
"loss": 1.2009, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -2.10774564743042, |
|
"rewards/margins": 0.263491690158844, |
|
"rewards/rejected": -2.3712375164031982, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 2.8314121037463975, |
|
"grad_norm": 20.975683447759703, |
|
"learning_rate": 4.796015616177401e-10, |
|
"logits/chosen": -2.0005106925964355, |
|
"logits/rejected": -1.994783639907837, |
|
"logps/chosen": -1.0665435791015625, |
|
"logps/rejected": -1.1785615682601929, |
|
"loss": 1.2143, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -2.133087158203125, |
|
"rewards/margins": 0.22403590381145477, |
|
"rewards/rejected": -2.3571231365203857, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 2.838616714697406, |
|
"grad_norm": 16.71900486734478, |
|
"learning_rate": 4.3960386508631595e-10, |
|
"logits/chosen": -1.937182068824768, |
|
"logits/rejected": -1.9297128915786743, |
|
"logps/chosen": -0.9666848182678223, |
|
"logps/rejected": -1.0865039825439453, |
|
"loss": 1.2256, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.9333696365356445, |
|
"rewards/margins": 0.23963849246501923, |
|
"rewards/rejected": -2.1730079650878906, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 2.845821325648415, |
|
"grad_norm": 35.92105468101964, |
|
"learning_rate": 4.013326629880243e-10, |
|
"logits/chosen": -1.9777085781097412, |
|
"logits/rejected": -1.968076467514038, |
|
"logps/chosen": -1.1062713861465454, |
|
"logps/rejected": -1.2339928150177002, |
|
"loss": 1.2044, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -2.212542772293091, |
|
"rewards/margins": 0.25544288754463196, |
|
"rewards/rejected": -2.4679856300354004, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 2.8530259365994235, |
|
"grad_norm": 19.697159928360417, |
|
"learning_rate": 3.64790645630339e-10, |
|
"logits/chosen": -1.942647933959961, |
|
"logits/rejected": -1.9421268701553345, |
|
"logps/chosen": -1.0547170639038086, |
|
"logps/rejected": -1.1250708103179932, |
|
"loss": 1.2627, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -2.109434127807617, |
|
"rewards/margins": 0.1407076120376587, |
|
"rewards/rejected": -2.2501416206359863, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 2.860230547550432, |
|
"grad_norm": 21.359517678769173, |
|
"learning_rate": 3.2998038176619e-10, |
|
"logits/chosen": -1.9776138067245483, |
|
"logits/rejected": -1.9692010879516602, |
|
"logps/chosen": -1.0567617416381836, |
|
"logps/rejected": -1.1803498268127441, |
|
"loss": 1.2064, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -2.113523483276367, |
|
"rewards/margins": 0.24717645347118378, |
|
"rewards/rejected": -2.3606996536254883, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 2.867435158501441, |
|
"grad_norm": 20.96219918565088, |
|
"learning_rate": 2.969043184133907e-10, |
|
"logits/chosen": -2.046151638031006, |
|
"logits/rejected": -2.044818639755249, |
|
"logps/chosen": -0.9711786508560181, |
|
"logps/rejected": -1.1876708269119263, |
|
"loss": 1.0771, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.9423573017120361, |
|
"rewards/margins": 0.43298429250717163, |
|
"rewards/rejected": -2.3753416538238525, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 2.8746397694524495, |
|
"grad_norm": 17.910920824523004, |
|
"learning_rate": 2.6556478068261447e-10, |
|
"logits/chosen": -1.9732444286346436, |
|
"logits/rejected": -1.9708236455917358, |
|
"logps/chosen": -0.9731259346008301, |
|
"logps/rejected": -1.1022650003433228, |
|
"loss": 1.2084, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -1.9462518692016602, |
|
"rewards/margins": 0.2582783102989197, |
|
"rewards/rejected": -2.2045300006866455, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 2.881844380403458, |
|
"grad_norm": 20.734203195977276, |
|
"learning_rate": 2.3596397161395607e-10, |
|
"logits/chosen": -2.044921875, |
|
"logits/rejected": -2.0331034660339355, |
|
"logps/chosen": -1.0672800540924072, |
|
"logps/rejected": -1.2321101427078247, |
|
"loss": 1.1588, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -2.1345601081848145, |
|
"rewards/margins": 0.32965999841690063, |
|
"rewards/rejected": -2.4642202854156494, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.889048991354467, |
|
"grad_norm": 25.7016655841959, |
|
"learning_rate": 2.0810397202206399e-10, |
|
"logits/chosen": -1.9520553350448608, |
|
"logits/rejected": -1.9573888778686523, |
|
"logps/chosen": -1.063836693763733, |
|
"logps/rejected": -1.193362832069397, |
|
"loss": 1.1905, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.127673387527466, |
|
"rewards/margins": 0.2590521574020386, |
|
"rewards/rejected": -2.386725664138794, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 2.8962536023054755, |
|
"grad_norm": 22.599478343097772, |
|
"learning_rate": 1.819867403498737e-10, |
|
"logits/chosen": -2.0386157035827637, |
|
"logits/rejected": -2.036118984222412, |
|
"logps/chosen": -1.0686347484588623, |
|
"logps/rejected": -1.2007242441177368, |
|
"loss": 1.2018, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.1372694969177246, |
|
"rewards/margins": 0.26417914032936096, |
|
"rewards/rejected": -2.4014484882354736, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 2.903458213256484, |
|
"grad_norm": 21.70269511981427, |
|
"learning_rate": 1.5761411253092382e-10, |
|
"logits/chosen": -1.964998483657837, |
|
"logits/rejected": -1.9548912048339844, |
|
"logps/chosen": -0.9872833490371704, |
|
"logps/rejected": -1.1099205017089844, |
|
"loss": 1.1994, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.9745666980743408, |
|
"rewards/margins": 0.24527449905872345, |
|
"rewards/rejected": -2.2198410034179688, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 2.910662824207493, |
|
"grad_norm": 20.259335859045336, |
|
"learning_rate": 1.3498780186031455e-10, |
|
"logits/chosen": -2.010437488555908, |
|
"logits/rejected": -2.0069775581359863, |
|
"logps/chosen": -1.162232756614685, |
|
"logps/rejected": -1.281508207321167, |
|
"loss": 1.2266, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -2.32446551322937, |
|
"rewards/margins": 0.2385510504245758, |
|
"rewards/rejected": -2.563016414642334, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 2.9178674351585014, |
|
"grad_norm": 15.586122569686582, |
|
"learning_rate": 1.1410939887425141e-10, |
|
"logits/chosen": -1.9975839853286743, |
|
"logits/rejected": -1.9993999004364014, |
|
"logps/chosen": -1.045862078666687, |
|
"logps/rejected": -1.1738336086273193, |
|
"loss": 1.212, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -2.091724157333374, |
|
"rewards/margins": 0.25594305992126465, |
|
"rewards/rejected": -2.3476672172546387, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 2.92507204610951, |
|
"grad_norm": 18.548109162992386, |
|
"learning_rate": 9.498037123825686e-11, |
|
"logits/chosen": -2.0100975036621094, |
|
"logits/rejected": -2.0066418647766113, |
|
"logps/chosen": -1.0210684537887573, |
|
"logps/rejected": -1.1468260288238525, |
|
"loss": 1.1987, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -2.0421369075775146, |
|
"rewards/margins": 0.2515150308609009, |
|
"rewards/rejected": -2.293652057647705, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 2.9322766570605188, |
|
"grad_norm": 21.54852206068809, |
|
"learning_rate": 7.760206364398614e-11, |
|
"logits/chosen": -2.0660743713378906, |
|
"logits/rejected": -2.063163995742798, |
|
"logps/chosen": -1.0767936706542969, |
|
"logps/rejected": -1.2189406156539917, |
|
"loss": 1.1849, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.1535873413085938, |
|
"rewards/margins": 0.2842939794063568, |
|
"rewards/rejected": -2.4378812313079834, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 2.9394812680115274, |
|
"grad_norm": 21.178294648611878, |
|
"learning_rate": 6.19756977147029e-11, |
|
"logits/chosen": -1.9951989650726318, |
|
"logits/rejected": -1.9918142557144165, |
|
"logps/chosen": -1.02787446975708, |
|
"logps/rejected": -1.2345163822174072, |
|
"loss": 1.1095, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -2.05574893951416, |
|
"rewards/margins": 0.41328415274620056, |
|
"rewards/rejected": -2.4690327644348145, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 2.946685878962536, |
|
"grad_norm": 20.81149789203122, |
|
"learning_rate": 4.810237191940625e-11, |
|
"logits/chosen": -1.974111557006836, |
|
"logits/rejected": -1.9727462530136108, |
|
"logps/chosen": -1.0376461744308472, |
|
"logps/rejected": -1.1693501472473145, |
|
"loss": 1.217, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -2.0752923488616943, |
|
"rewards/margins": 0.2634081244468689, |
|
"rewards/rejected": -2.338700294494629, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 2.9538904899135447, |
|
"grad_norm": 20.108876799029805, |
|
"learning_rate": 3.5983061495617476e-11, |
|
"logits/chosen": -2.032691240310669, |
|
"logits/rejected": -2.0327444076538086, |
|
"logps/chosen": -1.1233651638031006, |
|
"logps/rejected": -1.2714459896087646, |
|
"loss": 1.1825, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.246730327606201, |
|
"rewards/margins": 0.2961619794368744, |
|
"rewards/rejected": -2.5428919792175293, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 2.9610951008645534, |
|
"grad_norm": 21.51546113795096, |
|
"learning_rate": 2.5618618380812694e-11, |
|
"logits/chosen": -2.0210888385772705, |
|
"logits/rejected": -2.0106148719787598, |
|
"logps/chosen": -1.002300500869751, |
|
"logps/rejected": -1.166154146194458, |
|
"loss": 1.1714, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.004601001739502, |
|
"rewards/margins": 0.32770711183547974, |
|
"rewards/rejected": -2.332308292388916, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 2.968299711815562, |
|
"grad_norm": 22.762605833671383, |
|
"learning_rate": 1.700977115254576e-11, |
|
"logits/chosen": -1.9953645467758179, |
|
"logits/rejected": -1.9921376705169678, |
|
"logps/chosen": -0.9968992471694946, |
|
"logps/rejected": -1.1455665826797485, |
|
"loss": 1.1674, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.9937984943389893, |
|
"rewards/margins": 0.29733437299728394, |
|
"rewards/rejected": -2.291133165359497, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 2.9755043227665707, |
|
"grad_norm": 20.444100868277733, |
|
"learning_rate": 1.0157124977230868e-11, |
|
"logits/chosen": -1.9724935293197632, |
|
"logits/rejected": -1.9707790613174438, |
|
"logps/chosen": -0.9694275856018066, |
|
"logps/rejected": -1.117763876914978, |
|
"loss": 1.1687, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -1.9388551712036133, |
|
"rewards/margins": 0.2966724932193756, |
|
"rewards/rejected": -2.235527753829956, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 2.9827089337175794, |
|
"grad_norm": 21.99215491997881, |
|
"learning_rate": 5.061161567596061e-12, |
|
"logits/chosen": -1.9936805963516235, |
|
"logits/rejected": -1.98941171169281, |
|
"logps/chosen": -1.0571701526641846, |
|
"logps/rejected": -1.1420987844467163, |
|
"loss": 1.2615, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -2.114340305328369, |
|
"rewards/margins": 0.16985730826854706, |
|
"rewards/rejected": -2.2841975688934326, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 2.989913544668588, |
|
"grad_norm": 20.878532080632212, |
|
"learning_rate": 1.7222391488297406e-12, |
|
"logits/chosen": -2.013947010040283, |
|
"logits/rejected": -2.010057210922241, |
|
"logps/chosen": -1.1070269346237183, |
|
"logps/rejected": -1.254369854927063, |
|
"loss": 1.1756, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -2.2140538692474365, |
|
"rewards/margins": 0.2946857511997223, |
|
"rewards/rejected": -2.508739709854126, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 2.9971181556195967, |
|
"grad_norm": 19.732213045865922, |
|
"learning_rate": 1.4059243338693238e-13, |
|
"logits/chosen": -1.9882125854492188, |
|
"logits/rejected": -1.9810755252838135, |
|
"logps/chosen": -1.059184193611145, |
|
"logps/rejected": -1.1826164722442627, |
|
"loss": 1.1942, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -2.11836838722229, |
|
"rewards/margins": 0.2468646764755249, |
|
"rewards/rejected": -2.3652329444885254, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 4164, |
|
"total_flos": 0.0, |
|
"train_loss": 1.2025116606473236, |
|
"train_runtime": 6278.9508, |
|
"train_samples_per_second": 10.608, |
|
"train_steps_per_second": 0.663 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 4164, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|