{ "best_metric": 1.171636939048767, "best_model_checkpoint": "models/qwen2.5-3b-orpo-coarse/checkpoint-5000", "epoch": 0.9999620277197646, "eval_steps": 5000, "global_step": 13167, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0007594456047085627, "grad_norm": 13.86628905671021, "learning_rate": 8e-07, "log_odds_chosen": 0.21778564155101776, "log_odds_ratio": -0.879101574420929, "logits/chosen": -0.852734386920929, "logits/rejected": -0.749804675579071, "logps/chosen": -1.7800781726837158, "logps/rejected": -1.9500000476837158, "loss": 1.9613, "nll_loss": 1.9699218273162842, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.17805175483226776, "rewards/margins": 0.01707763597369194, "rewards/rejected": -0.19501952826976776, "step": 10 }, { "epoch": 0.0015188912094171254, "grad_norm": 8.092628653220562, "learning_rate": 1.6e-06, "log_odds_chosen": 0.558544933795929, "log_odds_ratio": -0.636669933795929, "logits/chosen": -0.859179675579071, "logits/rejected": -0.6937500238418579, "logps/chosen": -1.542578101158142, "logps/rejected": -2.01171875, "loss": 1.9374, "nll_loss": 1.9265625476837158, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.15439453721046448, "rewards/margins": 0.04685363918542862, "rewards/rejected": -0.20114746689796448, "step": 20 }, { "epoch": 0.002278336814125688, "grad_norm": 6.246912317314842, "learning_rate": 2.4e-06, "log_odds_chosen": 0.592181384563446, "log_odds_ratio": -0.623242199420929, "logits/chosen": -0.8861328363418579, "logits/rejected": -0.7337890863418579, "logps/chosen": -1.291015625, "logps/rejected": -1.783203125, "loss": 1.687, "nll_loss": 1.516015648841858, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.12905272841453552, "rewards/margins": 0.04929962009191513, "rewards/rejected": -0.17829589545726776, "step": 30 }, { "epoch": 0.003037782418834251, "grad_norm": 3.26605061245338, "learning_rate": 3.2e-06, "log_odds_chosen": 0.4122558534145355, "log_odds_ratio": -0.7138671875, "logits/chosen": -0.876757800579071, "logits/rejected": -0.783398449420929, "logps/chosen": -1.330468773841858, "logps/rejected": -1.6613280773162842, "loss": 1.5582, "nll_loss": 1.4871094226837158, "rewards/accuracies": 0.65625, "rewards/chosen": -0.13308104872703552, "rewards/margins": 0.03313903883099556, "rewards/rejected": -0.1661376953125, "step": 40 }, { "epoch": 0.0037972280235428137, "grad_norm": 3.7689330842482973, "learning_rate": 4e-06, "log_odds_chosen": 0.28266602754592896, "log_odds_ratio": -0.699414074420929, "logits/chosen": -1.007226586341858, "logits/rejected": -0.8794921636581421, "logps/chosen": -1.223242163658142, "logps/rejected": -1.446679711341858, "loss": 1.4684, "nll_loss": 1.371484398841858, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.12238769233226776, "rewards/margins": 0.02219085767865181, "rewards/rejected": -0.14453125, "step": 50 }, { "epoch": 0.004556673628251376, "grad_norm": 3.658901977639421, "learning_rate": 4.8e-06, "log_odds_chosen": 0.4595703184604645, "log_odds_ratio": -0.633496105670929, "logits/chosen": -1.0146484375, "logits/rejected": -0.8607422113418579, "logps/chosen": -1.15625, "logps/rejected": -1.523828148841858, "loss": 1.4121, "nll_loss": 1.310156226158142, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -0.11564941704273224, "rewards/margins": 0.03672180324792862, "rewards/rejected": -0.1524658203125, "step": 60 }, { "epoch": 0.005316119232959939, "grad_norm": 3.4986780645399937, "learning_rate": 5.6e-06, "log_odds_chosen": 0.41401368379592896, "log_odds_ratio": -0.6317383050918579, "logits/chosen": -0.908007800579071, "logits/rejected": -0.790820300579071, "logps/chosen": -1.119726538658142, "logps/rejected": -1.448828101158142, "loss": 1.4158, "nll_loss": 1.39453125, "rewards/accuracies": 0.65625, "rewards/chosen": -0.11196289211511612, "rewards/margins": 0.03286438062787056, "rewards/rejected": -0.1448974609375, "step": 70 }, { "epoch": 0.006075564837668502, "grad_norm": 3.65265887165975, "learning_rate": 6.4e-06, "log_odds_chosen": 0.516162097454071, "log_odds_ratio": -0.6009765863418579, "logits/chosen": -0.9330078363418579, "logits/rejected": -0.7992187738418579, "logps/chosen": -1.078515648841858, "logps/rejected": -1.4921875, "loss": 1.3997, "nll_loss": 1.4269530773162842, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.10786132514476776, "rewards/margins": 0.04135436937212944, "rewards/rejected": -0.14914551377296448, "step": 80 }, { "epoch": 0.0068350104423770645, "grad_norm": 3.406581666947256, "learning_rate": 7.2e-06, "log_odds_chosen": 0.3199706971645355, "log_odds_ratio": -0.71533203125, "logits/chosen": -0.925585925579071, "logits/rejected": -0.806640625, "logps/chosen": -1.0402343273162842, "logps/rejected": -1.329492211341858, "loss": 1.3594, "nll_loss": 1.275390625, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.10402832180261612, "rewards/margins": 0.02887267991900444, "rewards/rejected": -0.13303223252296448, "step": 90 }, { "epoch": 0.007594456047085627, "grad_norm": 3.5184077368926445, "learning_rate": 8e-06, "log_odds_chosen": 0.576733410358429, "log_odds_ratio": -0.5511718988418579, "logits/chosen": -1.021875023841858, "logits/rejected": -0.8369140625, "logps/chosen": -0.9267578125, "logps/rejected": -1.362695336341858, "loss": 1.302, "nll_loss": 1.2121093273162842, "rewards/accuracies": 0.65625, "rewards/chosen": -0.09260253608226776, "rewards/margins": 0.04358673095703125, "rewards/rejected": -0.13608399033546448, "step": 100 }, { "epoch": 0.00835390165179419, "grad_norm": 3.410860782832738, "learning_rate": 7.627700713964738e-06, "log_odds_chosen": 0.752978503704071, "log_odds_ratio": -0.4966796934604645, "logits/chosen": -1.0154297351837158, "logits/rejected": -0.8248046636581421, "logps/chosen": -0.9130859375, "logps/rejected": -1.4558594226837158, "loss": 1.2731, "nll_loss": 1.2335937023162842, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.09135742485523224, "rewards/margins": 0.0542144775390625, "rewards/rejected": -0.14560547471046448, "step": 110 }, { "epoch": 0.009113347256502752, "grad_norm": 2.154734358453151, "learning_rate": 7.3029674334022146e-06, "log_odds_chosen": 0.4745849668979645, "log_odds_ratio": -0.607617199420929, "logits/chosen": -0.926953136920929, "logits/rejected": -0.7845703363418579, "logps/chosen": -0.8677734136581421, "logps/rejected": -1.217382788658142, "loss": 1.2572, "nll_loss": 1.286523461341858, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.08676757663488388, "rewards/margins": 0.03490447998046875, "rewards/rejected": -0.12165527045726776, "step": 120 }, { "epoch": 0.009872792861211316, "grad_norm": 1.6469850200192648, "learning_rate": 7.016464154456233e-06, "log_odds_chosen": 0.46943360567092896, "log_odds_ratio": -0.653027355670929, "logits/chosen": -0.958984375, "logits/rejected": -0.80859375, "logps/chosen": -0.875, "logps/rejected": -1.253320336341858, "loss": 1.2276, "nll_loss": 1.1544921398162842, "rewards/accuracies": 0.625, "rewards/chosen": -0.08754882961511612, "rewards/margins": 0.037818145006895065, "rewards/rejected": -0.12531737983226776, "step": 130 }, { "epoch": 0.010632238465919878, "grad_norm": 1.4545268658142032, "learning_rate": 6.7612340378281325e-06, "log_odds_chosen": 0.43743896484375, "log_odds_ratio": -0.641796886920929, "logits/chosen": -0.9302734136581421, "logits/rejected": -0.799609363079071, "logps/chosen": -0.8802734613418579, "logps/rejected": -1.219335913658142, "loss": 1.232, "nll_loss": 1.203515648841858, "rewards/accuracies": 0.625, "rewards/chosen": -0.0880126953125, "rewards/margins": 0.03392791748046875, "rewards/rejected": -0.1219482421875, "step": 140 }, { "epoch": 0.011391684070628441, "grad_norm": 1.5686155088472593, "learning_rate": 6.531972647421809e-06, "log_odds_chosen": 0.5901855230331421, "log_odds_ratio": -0.570996105670929, "logits/chosen": -0.970898449420929, "logits/rejected": -0.866015613079071, "logps/chosen": -0.8382812738418579, "logps/rejected": -1.291601538658142, "loss": 1.1925, "nll_loss": 1.1337890625, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -0.08380126953125, "rewards/margins": 0.045166015625, "rewards/rejected": -0.12905272841453552, "step": 150 }, { "epoch": 0.012151129675337003, "grad_norm": 1.6192219524800442, "learning_rate": 6.3245553203367575e-06, "log_odds_chosen": 0.502880871295929, "log_odds_ratio": -0.5879882574081421, "logits/chosen": -0.9671875238418579, "logits/rejected": -0.842968761920929, "logps/chosen": -0.8861328363418579, "logps/rejected": -1.231054663658142, "loss": 1.2332, "nll_loss": 1.1785156726837158, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.08859863132238388, "rewards/margins": 0.03454742580652237, "rewards/rejected": -0.12324218451976776, "step": 160 }, { "epoch": 0.012910575280045567, "grad_norm": 1.7262176390305861, "learning_rate": 6.135719910778963e-06, "log_odds_chosen": 0.6170409917831421, "log_odds_ratio": -0.559863269329071, "logits/chosen": -0.988476574420929, "logits/rejected": -0.8384765386581421, "logps/chosen": -0.8173828125, "logps/rejected": -1.252343773841858, "loss": 1.1904, "nll_loss": 1.1330077648162842, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08175048977136612, "rewards/margins": 0.04349059984087944, "rewards/rejected": -0.125244140625, "step": 170 }, { "epoch": 0.013670020884754129, "grad_norm": 1.4528535157038984, "learning_rate": 5.962847939999439e-06, "log_odds_chosen": 0.641040027141571, "log_odds_ratio": -0.558349609375, "logits/chosen": -0.9375, "logits/rejected": -0.808789074420929, "logps/chosen": -0.824414074420929, "logps/rejected": -1.287695288658142, "loss": 1.1938, "nll_loss": 1.1994140148162842, "rewards/accuracies": 0.71875, "rewards/chosen": -0.08251953125, "rewards/margins": 0.04627075046300888, "rewards/rejected": -0.12874755263328552, "step": 180 }, { "epoch": 0.014429466489462693, "grad_norm": 1.5571125621656106, "learning_rate": 5.803810000880094e-06, "log_odds_chosen": 0.666943371295929, "log_odds_ratio": -0.548632800579071, "logits/chosen": -0.958789050579071, "logits/rejected": -0.7939453125, "logps/chosen": -0.8203125, "logps/rejected": -1.3058593273162842, "loss": 1.1816, "nll_loss": 1.134765625, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.08206786960363388, "rewards/margins": 0.04870147630572319, "rewards/rejected": -0.13076171278953552, "step": 190 }, { "epoch": 0.015188912094171255, "grad_norm": 1.4049284198953822, "learning_rate": 5.65685424949238e-06, "log_odds_chosen": 0.554760754108429, "log_odds_ratio": -0.5804687738418579, "logits/chosen": -0.952929675579071, "logits/rejected": -0.835742175579071, "logps/chosen": -0.8212890625, "logps/rejected": -1.1980469226837158, "loss": 1.2031, "nll_loss": 1.18359375, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.08212890475988388, "rewards/margins": 0.03766479343175888, "rewards/rejected": -0.11979980766773224, "step": 200 }, { "epoch": 0.01594835769887982, "grad_norm": 1.4566183530281651, "learning_rate": 5.5205244747388325e-06, "log_odds_chosen": 0.6102050542831421, "log_odds_ratio": -0.575390636920929, "logits/chosen": -0.9644531011581421, "logits/rejected": -0.8070312738418579, "logps/chosen": -0.8148437738418579, "logps/rejected": -1.2580077648162842, "loss": 1.2038, "nll_loss": 1.204687476158142, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.08154296875, "rewards/margins": 0.04420166090130806, "rewards/rejected": -0.12565918266773224, "step": 210 }, { "epoch": 0.01670780330358838, "grad_norm": 1.3317347692118253, "learning_rate": 5.393598899705936e-06, "log_odds_chosen": 0.46563720703125, "log_odds_ratio": -0.63330078125, "logits/chosen": -0.972460925579071, "logits/rejected": -0.8589843511581421, "logps/chosen": -0.8246093988418579, "logps/rejected": -1.1642577648162842, "loss": 1.1689, "nll_loss": 1.149999976158142, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.0823974609375, "rewards/margins": 0.03399963304400444, "rewards/rejected": -0.1163330078125, "step": 220 }, { "epoch": 0.017467248908296942, "grad_norm": 1.181139702799225, "learning_rate": 5.275043787166296e-06, "log_odds_chosen": 0.577014148235321, "log_odds_ratio": -0.596875011920929, "logits/chosen": -0.9917968511581421, "logits/rejected": -0.8720703125, "logps/chosen": -0.8345702886581421, "logps/rejected": -1.2371094226837158, "loss": 1.2061, "nll_loss": 1.1945312023162842, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -0.08342285454273224, "rewards/margins": 0.04030304029583931, "rewards/rejected": -0.12380371242761612, "step": 230 }, { "epoch": 0.018226694513005504, "grad_norm": 1.200048614697199, "learning_rate": 5.163977794943223e-06, "log_odds_chosen": 0.598925769329071, "log_odds_ratio": -0.58056640625, "logits/chosen": -1.0380859375, "logits/rejected": -0.860156238079071, "logps/chosen": -0.824999988079071, "logps/rejected": -1.2322266101837158, "loss": 1.179, "nll_loss": 1.135351538658142, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.0823974609375, "rewards/margins": 0.04070129245519638, "rewards/rejected": -0.12324218451976776, "step": 240 }, { "epoch": 0.01898614011771407, "grad_norm": 1.1762959943133902, "learning_rate": 5.059644256269407e-06, "log_odds_chosen": 0.51861572265625, "log_odds_ratio": -0.6231445074081421, "logits/chosen": -0.990234375, "logits/rejected": -0.849414050579071, "logps/chosen": -0.809374988079071, "logps/rejected": -1.1974608898162842, "loss": 1.1635, "nll_loss": 1.1248047351837158, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.08089599758386612, "rewards/margins": 0.03888855129480362, "rewards/rejected": -0.11982421576976776, "step": 250 }, { "epoch": 0.01974558572242263, "grad_norm": 1.1603321265967979, "learning_rate": 4.961389383568338e-06, "log_odds_chosen": 0.639025866985321, "log_odds_ratio": -0.5658203363418579, "logits/chosen": -0.9554687738418579, "logits/rejected": -0.811718761920929, "logps/chosen": -0.8099609613418579, "logps/rejected": -1.266992211341858, "loss": 1.191, "nll_loss": 1.200781226158142, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.0809326171875, "rewards/margins": 0.04570770263671875, "rewards/rejected": -0.12666015326976776, "step": 260 }, { "epoch": 0.020505031327131194, "grad_norm": 1.1254409717049783, "learning_rate": 4.8686449556014755e-06, "log_odds_chosen": 0.6888672113418579, "log_odds_ratio": -0.5379883050918579, "logits/chosen": -0.9378906488418579, "logits/rejected": -0.772265613079071, "logps/chosen": -0.8062499761581421, "logps/rejected": -1.298437476158142, "loss": 1.1908, "nll_loss": 1.240820288658142, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.08056640625, "rewards/margins": 0.04934387281537056, "rewards/rejected": -0.12983398139476776, "step": 270 }, { "epoch": 0.021264476931839756, "grad_norm": 1.5887420167810924, "learning_rate": 4.780914437337574e-06, "log_odds_chosen": 0.78173828125, "log_odds_ratio": -0.5516601800918579, "logits/chosen": -1.0041015148162842, "logits/rejected": -0.875781238079071, "logps/chosen": -0.789843738079071, "logps/rejected": -1.3533203601837158, "loss": 1.1471, "nll_loss": 1.066796898841858, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.07894287258386612, "rewards/margins": 0.05640258640050888, "rewards/rejected": -0.13527831435203552, "step": 280 }, { "epoch": 0.02202392253654832, "grad_norm": 1.2864595824780236, "learning_rate": 4.697761756117627e-06, "log_odds_chosen": 0.586669921875, "log_odds_ratio": -0.6107422113418579, "logits/chosen": -1.027734398841858, "logits/rejected": -0.8792968988418579, "logps/chosen": -0.8570312261581421, "logps/rejected": -1.269140601158142, "loss": 1.1837, "nll_loss": 1.2062499523162842, "rewards/accuracies": 0.65625, "rewards/chosen": -0.08576659858226776, "rewards/margins": 0.0410919189453125, "rewards/rejected": -0.12687988579273224, "step": 290 }, { "epoch": 0.022783368141256883, "grad_norm": 1.2698446440524298, "learning_rate": 4.618802153517006e-06, "log_odds_chosen": 0.615771472454071, "log_odds_ratio": -0.567089855670929, "logits/chosen": -1.0138671398162842, "logits/rejected": -0.8890625238418579, "logps/chosen": -0.793164074420929, "logps/rejected": -1.229882836341858, "loss": 1.1641, "nll_loss": 1.1337890625, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.07933349907398224, "rewards/margins": 0.04369049146771431, "rewards/rejected": -0.12302245944738388, "step": 300 }, { "epoch": 0.023542813745965445, "grad_norm": 1.1172910062288641, "learning_rate": 4.543694673976518e-06, "log_odds_chosen": 0.63641357421875, "log_odds_ratio": -0.564208984375, "logits/chosen": -0.949999988079071, "logits/rejected": -0.814648449420929, "logps/chosen": -0.815625011920929, "logps/rejected": -1.2546875476837158, "loss": 1.1649, "nll_loss": 1.1271483898162842, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.08155517280101776, "rewards/margins": 0.04389037936925888, "rewards/rejected": -0.12548828125, "step": 310 }, { "epoch": 0.024302259350674007, "grad_norm": 1.4012717628726807, "learning_rate": 4.472135954999579e-06, "log_odds_chosen": 0.576708972454071, "log_odds_ratio": -0.581347644329071, "logits/chosen": -0.9892578125, "logits/rejected": -0.8013671636581421, "logps/chosen": -0.8212890625, "logps/rejected": -1.2166016101837158, "loss": 1.165, "nll_loss": 1.1007812023162842, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.08208008110523224, "rewards/margins": 0.03956909105181694, "rewards/rejected": -0.12155761569738388, "step": 320 }, { "epoch": 0.025061704955382572, "grad_norm": 1.2471549753648214, "learning_rate": 4.403855060505443e-06, "log_odds_chosen": 0.7884277105331421, "log_odds_ratio": -0.5328124761581421, "logits/chosen": -1.0359375476837158, "logits/rejected": -0.827929675579071, "logps/chosen": -0.768359363079071, "logps/rejected": -1.326562523841858, "loss": 1.1587, "nll_loss": 1.140625, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.07684326171875, "rewards/margins": 0.05566711351275444, "rewards/rejected": -0.13254395127296448, "step": 330 }, { "epoch": 0.025821150560091134, "grad_norm": 1.1704761961574872, "learning_rate": 4.338609156373123e-06, "log_odds_chosen": 0.820385754108429, "log_odds_ratio": -0.500439465045929, "logits/chosen": -0.9710937738418579, "logits/rejected": -0.8115234375, "logps/chosen": -0.7509765625, "logps/rejected": -1.329687476158142, "loss": 1.1569, "nll_loss": 1.087304711341858, "rewards/accuracies": 0.71875, "rewards/chosen": -0.07510986179113388, "rewards/margins": 0.05777435377240181, "rewards/rejected": -0.1329345703125, "step": 340 }, { "epoch": 0.026580596164799696, "grad_norm": 1.2426327441185534, "learning_rate": 4.27617987059879e-06, "log_odds_chosen": 0.7613891363143921, "log_odds_ratio": -0.523730456829071, "logits/chosen": -1.052148461341858, "logits/rejected": -0.8671875, "logps/chosen": -0.7865234613418579, "logps/rejected": -1.3214843273162842, "loss": 1.1376, "nll_loss": 1.100976586341858, "rewards/accuracies": 0.75, "rewards/chosen": -0.07866211235523224, "rewards/margins": 0.053466796875, "rewards/rejected": -0.13227538764476776, "step": 350 }, { "epoch": 0.027340041769508258, "grad_norm": 1.1826530531996458, "learning_rate": 4.216370213557839e-06, "log_odds_chosen": 0.747509777545929, "log_odds_ratio": -0.510058581829071, "logits/chosen": -1.000585913658142, "logits/rejected": -0.8384765386581421, "logps/chosen": -0.758984386920929, "logps/rejected": -1.270117163658142, "loss": 1.1798, "nll_loss": 1.0333983898162842, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.07584228366613388, "rewards/margins": 0.05108032375574112, "rewards/rejected": -0.126953125, "step": 360 }, { "epoch": 0.02809948737421682, "grad_norm": 1.2614454515858224, "learning_rate": 4.15900195928029e-06, "log_odds_chosen": 0.771191418170929, "log_odds_ratio": -0.512011706829071, "logits/chosen": -0.9599609375, "logits/rejected": -0.8255859613418579, "logps/chosen": -0.764843761920929, "logps/rejected": -1.2931640148162842, "loss": 1.1381, "nll_loss": 1.121679663658142, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.07650146633386612, "rewards/margins": 0.05285034328699112, "rewards/rejected": -0.12932129204273224, "step": 370 }, { "epoch": 0.028858932978925386, "grad_norm": 1.235335063867129, "learning_rate": 4.103913408340617e-06, "log_odds_chosen": 0.6644653081893921, "log_odds_ratio": -0.5732421875, "logits/chosen": -1.005859375, "logits/rejected": -0.8218749761581421, "logps/chosen": -0.760546863079071, "logps/rejected": -1.247656226158142, "loss": 1.1545, "nll_loss": 1.025390625, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.0760498046875, "rewards/margins": 0.04880828782916069, "rewards/rejected": -0.124755859375, "step": 380 }, { "epoch": 0.029618378583633947, "grad_norm": 2.2602118601651635, "learning_rate": 4.050957468334666e-06, "log_odds_chosen": 0.6941894292831421, "log_odds_ratio": -0.556640625, "logits/chosen": -0.960156261920929, "logits/rejected": -0.8388671875, "logps/chosen": -0.798632800579071, "logps/rejected": -1.298437476158142, "loss": 1.145, "nll_loss": 1.136328101158142, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.07996825873851776, "rewards/margins": 0.0499725341796875, "rewards/rejected": -0.12993164360523224, "step": 390 }, { "epoch": 0.03037782418834251, "grad_norm": 1.2337628354106855, "learning_rate": 4e-06, "log_odds_chosen": 0.8100951910018921, "log_odds_ratio": -0.501757800579071, "logits/chosen": -1.0193359851837158, "logits/rejected": -0.8480468988418579, "logps/chosen": -0.7603515386581421, "logps/rejected": -1.2986328601837158, "loss": 1.154, "nll_loss": 1.0320312976837158, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.07602538913488388, "rewards/margins": 0.053879547864198685, "rewards/rejected": -0.12987060844898224, "step": 400 }, { "epoch": 0.03113726979305107, "grad_norm": 1.1395837108161198, "learning_rate": 3.950918386598359e-06, "log_odds_chosen": 0.549243152141571, "log_odds_ratio": -0.6294921636581421, "logits/chosen": -0.9613281488418579, "logits/rejected": -0.835156261920929, "logps/chosen": -0.7978515625, "logps/rejected": -1.1808593273162842, "loss": 1.1498, "nll_loss": 1.1248047351837158, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.07978515326976776, "rewards/margins": 0.03834228590130806, "rewards/rejected": -0.11807861179113388, "step": 410 }, { "epoch": 0.03189671539775964, "grad_norm": 1.371040522468586, "learning_rate": 3.903600291794132e-06, "log_odds_chosen": 0.862231433391571, "log_odds_ratio": -0.47392576932907104, "logits/chosen": -1.010351538658142, "logits/rejected": -0.819140613079071, "logps/chosen": -0.7408202886581421, "logps/rejected": -1.3115234375, "loss": 1.1793, "nll_loss": 1.1121094226837158, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.07404784858226776, "rewards/margins": 0.05706787109375, "rewards/rejected": -0.13115234673023224, "step": 420 }, { "epoch": 0.032656161002468195, "grad_norm": 1.2674091700636376, "learning_rate": 3.857942577363297e-06, "log_odds_chosen": 0.4953247010707855, "log_odds_ratio": -0.6182616949081421, "logits/chosen": -0.9994140863418579, "logits/rejected": -0.8949218988418579, "logps/chosen": -0.85546875, "logps/rejected": -1.190039038658142, "loss": 1.1612, "nll_loss": 1.1115233898162842, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.08547363430261612, "rewards/margins": 0.03353118896484375, "rewards/rejected": -0.11904297024011612, "step": 430 }, { "epoch": 0.03341560660717676, "grad_norm": 1.294937616946804, "learning_rate": 3.813850356982369e-06, "log_odds_chosen": 0.824511706829071, "log_odds_ratio": -0.4951171875, "logits/chosen": -1.025390625, "logits/rejected": -0.885546863079071, "logps/chosen": -0.7835937738418579, "logps/rejected": -1.329492211341858, "loss": 1.1386, "nll_loss": 1.127539038658142, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.07833252102136612, "rewards/margins": 0.05462799221277237, "rewards/rejected": -0.1329345703125, "step": 440 }, { "epoch": 0.034175052211885326, "grad_norm": 1.262958483549515, "learning_rate": 3.7712361663282537e-06, "log_odds_chosen": 0.66766357421875, "log_odds_ratio": -0.5707031488418579, "logits/chosen": -1.025781273841858, "logits/rejected": -0.8939453363418579, "logps/chosen": -0.849804699420929, "logps/rejected": -1.3191406726837158, "loss": 1.1549, "nll_loss": 1.1142578125, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.0849609375, "rewards/margins": 0.046905517578125, "rewards/rejected": -0.13186034560203552, "step": 450 }, { "epoch": 0.034934497816593885, "grad_norm": 1.2612182491383581, "learning_rate": 3.730019232961255e-06, "log_odds_chosen": 0.678662121295929, "log_odds_ratio": -0.550000011920929, "logits/chosen": -0.972460925579071, "logits/rejected": -0.829296886920929, "logps/chosen": -0.8160156011581421, "logps/rejected": -1.276953101158142, "loss": 1.1477, "nll_loss": 1.0916016101837158, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.08156738430261612, "rewards/margins": 0.04622802883386612, "rewards/rejected": -0.12790527939796448, "step": 460 }, { "epoch": 0.03569394342130245, "grad_norm": 1.1867499674284254, "learning_rate": 3.6901248321155403e-06, "log_odds_chosen": 0.6483154296875, "log_odds_ratio": -0.546582043170929, "logits/chosen": -1.040624976158142, "logits/rejected": -0.844921886920929, "logps/chosen": -0.824023425579071, "logps/rejected": -1.2683594226837158, "loss": 1.1355, "nll_loss": 1.1193358898162842, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.08234862983226776, "rewards/margins": 0.04449005052447319, "rewards/rejected": -0.1268310546875, "step": 470 }, { "epoch": 0.03645338902601101, "grad_norm": 1.2917386086257876, "learning_rate": 3.6514837167011073e-06, "log_odds_chosen": 0.6243896484375, "log_odds_ratio": -0.6068359613418579, "logits/chosen": -0.9541015625, "logits/rejected": -0.8687499761581421, "logps/chosen": -0.812695324420929, "logps/rejected": -1.240820288658142, "loss": 1.1424, "nll_loss": 1.130273461341858, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.08131103217601776, "rewards/margins": 0.04276428371667862, "rewards/rejected": -0.1240234375, "step": 480 }, { "epoch": 0.037212834630719574, "grad_norm": 1.1186491161024104, "learning_rate": 3.6140316116210052e-06, "log_odds_chosen": 0.64251708984375, "log_odds_ratio": -0.570996105670929, "logits/chosen": -1.0304687023162842, "logits/rejected": -0.8837890625, "logps/chosen": -0.7513672113418579, "logps/rejected": -1.1941406726837158, "loss": 1.1313, "nll_loss": 1.1144530773162842, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.07512207329273224, "rewards/margins": 0.04434509202837944, "rewards/rejected": -0.11948242038488388, "step": 490 }, { "epoch": 0.03797228023542814, "grad_norm": 1.3019471686514312, "learning_rate": 3.5777087639996634e-06, "log_odds_chosen": 0.56317138671875, "log_odds_ratio": -0.609082043170929, "logits/chosen": -0.9876953363418579, "logits/rejected": -0.8755859136581421, "logps/chosen": -0.77685546875, "logps/rejected": -1.1484375, "loss": 1.1368, "nll_loss": 1.059179663658142, "rewards/accuracies": 0.6875, "rewards/chosen": -0.07772216945886612, "rewards/margins": 0.03708953782916069, "rewards/rejected": -0.11479492485523224, "step": 500 }, { "epoch": 0.0387317258401367, "grad_norm": 1.253322759788317, "learning_rate": 3.5424595421603814e-06, "log_odds_chosen": 1.019140601158142, "log_odds_ratio": -0.46337890625, "logits/chosen": -1.047265648841858, "logits/rejected": -0.903124988079071, "logps/chosen": -0.778515636920929, "logps/rejected": -1.500585913658142, "loss": 1.1401, "nll_loss": 1.0109374523162842, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.07789306342601776, "rewards/margins": 0.07218017429113388, "rewards/rejected": -0.15019531548023224, "step": 510 }, { "epoch": 0.03949117144484526, "grad_norm": 1.1806383392582152, "learning_rate": 3.5082320772281165e-06, "log_odds_chosen": 0.7989746332168579, "log_odds_ratio": -0.5084472894668579, "logits/chosen": -1.041015625, "logits/rejected": -0.871874988079071, "logps/chosen": -0.8121093511581421, "logps/rejected": -1.358789086341858, "loss": 1.1451, "nll_loss": 1.1056640148162842, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.08123779296875, "rewards/margins": 0.05465088039636612, "rewards/rejected": -0.1358642578125, "step": 520 }, { "epoch": 0.04025061704955383, "grad_norm": 1.1625019947969621, "learning_rate": 3.474977942104555e-06, "log_odds_chosen": 0.8888183832168579, "log_odds_ratio": -0.5042968988418579, "logits/chosen": -1.0607421398162842, "logits/rejected": -0.885937511920929, "logps/chosen": -0.748828113079071, "logps/rejected": -1.3966796398162842, "loss": 1.1281, "nll_loss": 1.0353515148162842, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.07485351711511612, "rewards/margins": 0.06481017917394638, "rewards/rejected": -0.13974609971046448, "step": 530 }, { "epoch": 0.04101006265426239, "grad_norm": 4.594951428570033, "learning_rate": 3.442651863295481e-06, "log_odds_chosen": 0.5628417730331421, "log_odds_ratio": -0.6055663824081421, "logits/chosen": -1.051367163658142, "logits/rejected": -0.8960937261581421, "logps/chosen": -0.8080078363418579, "logps/rejected": -1.224218726158142, "loss": 1.1385, "nll_loss": 1.1033203601837158, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.08078613132238388, "rewards/margins": 0.04152526706457138, "rewards/rejected": -0.12236328423023224, "step": 540 }, { "epoch": 0.04176950825897095, "grad_norm": 1.2090024890197002, "learning_rate": 3.4112114616897665e-06, "log_odds_chosen": 0.5767822265625, "log_odds_ratio": -0.570507824420929, "logits/chosen": -1.0291016101837158, "logits/rejected": -0.8828125, "logps/chosen": -0.8072265386581421, "logps/rejected": -1.181640625, "loss": 1.1039, "nll_loss": 1.0779297351837158, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.0806884765625, "rewards/margins": 0.03731689602136612, "rewards/rejected": -0.11806640774011612, "step": 550 }, { "epoch": 0.04252895386367951, "grad_norm": 1.1594339920121948, "learning_rate": 3.3806170189140663e-06, "log_odds_chosen": 0.5506347417831421, "log_odds_ratio": -0.6226562261581421, "logits/chosen": -1.0158202648162842, "logits/rejected": -0.8511718511581421, "logps/chosen": -0.823437511920929, "logps/rejected": -1.191015601158142, "loss": 1.1467, "nll_loss": 1.0554687976837158, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -0.08237304538488388, "rewards/margins": 0.0366668701171875, "rewards/rejected": -0.11896972358226776, "step": 560 }, { "epoch": 0.04328839946838808, "grad_norm": 1.201461881697989, "learning_rate": 3.350831266333564e-06, "log_odds_chosen": 0.897265613079071, "log_odds_ratio": -0.4810546934604645, "logits/chosen": -1.0353515148162842, "logits/rejected": -0.893359363079071, "logps/chosen": -0.724804699420929, "logps/rejected": -1.335351586341858, "loss": 1.1069, "nll_loss": 0.998046875, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.0723876953125, "rewards/margins": 0.06114501878619194, "rewards/rejected": -0.13352051377296448, "step": 570 }, { "epoch": 0.04404784507309664, "grad_norm": 1.1413068930196963, "learning_rate": 3.3218191941495984e-06, "log_odds_chosen": 0.873828113079071, "log_odds_ratio": -0.4803710877895355, "logits/chosen": -1.0652344226837158, "logits/rejected": -0.873828113079071, "logps/chosen": -0.767578125, "logps/rejected": -1.3466796875, "loss": 1.132, "nll_loss": 1.029687523841858, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.07666015625, "rewards/margins": 0.05785827711224556, "rewards/rejected": -0.13461914658546448, "step": 580 }, { "epoch": 0.0448072906778052, "grad_norm": 1.5485383373271366, "learning_rate": 3.293547878370473e-06, "log_odds_chosen": 0.768261730670929, "log_odds_ratio": -0.498046875, "logits/chosen": -1.0857422351837158, "logits/rejected": -0.8833984136581421, "logps/chosen": -0.757031261920929, "logps/rejected": -1.2986328601837158, "loss": 1.1077, "nll_loss": 1.0673828125, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.07565917819738388, "rewards/margins": 0.05410461500287056, "rewards/rejected": -0.12980957329273224, "step": 590 }, { "epoch": 0.045566736282513766, "grad_norm": 1.1073500826019294, "learning_rate": 3.2659863237109044e-06, "log_odds_chosen": 0.7096191644668579, "log_odds_ratio": -0.558300793170929, "logits/chosen": -1.0310547351837158, "logits/rejected": -0.8984375, "logps/chosen": -0.7544921636581421, "logps/rejected": -1.236914038658142, "loss": 1.0924, "nll_loss": 1.029296875, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.07546386867761612, "rewards/margins": 0.04828491061925888, "rewards/rejected": -0.12373046576976776, "step": 600 }, { "epoch": 0.046326181887222324, "grad_norm": 1.173689895403346, "learning_rate": 3.239105320715664e-06, "log_odds_chosen": 0.882128894329071, "log_odds_ratio": -0.508105456829071, "logits/chosen": -1.0304687023162842, "logits/rejected": -0.8775390386581421, "logps/chosen": -0.7875000238418579, "logps/rejected": -1.4249999523162842, "loss": 1.1141, "nll_loss": 1.0671875476837158, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.07883300632238388, "rewards/margins": 0.06378784030675888, "rewards/rejected": -0.142578125, "step": 610 }, { "epoch": 0.04708562749193089, "grad_norm": 1.2719702097449104, "learning_rate": 3.2128773156099956e-06, "log_odds_chosen": 0.804492175579071, "log_odds_ratio": -0.5125976800918579, "logits/chosen": -1.1082031726837158, "logits/rejected": -0.9144531488418579, "logps/chosen": -0.695117175579071, "logps/rejected": -1.204492211341858, "loss": 1.1393, "nll_loss": 0.999218761920929, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.069580078125, "rewards/margins": 0.05094604566693306, "rewards/rejected": -0.12045898288488388, "step": 620 }, { "epoch": 0.047845073096639455, "grad_norm": 1.29621095727597, "learning_rate": 3.187276291558383e-06, "log_odds_chosen": 0.872851550579071, "log_odds_ratio": -0.4959960877895355, "logits/chosen": -1.0646483898162842, "logits/rejected": -0.876757800579071, "logps/chosen": -0.741992175579071, "logps/rejected": -1.3273437023162842, "loss": 1.1146, "nll_loss": 1.020117163658142, "rewards/accuracies": 0.75, "rewards/chosen": -0.07418213039636612, "rewards/margins": 0.05853271484375, "rewards/rejected": -0.13271483778953552, "step": 630 }, { "epoch": 0.048604518701348014, "grad_norm": 1.1926581413202013, "learning_rate": 3.1622776601683788e-06, "log_odds_chosen": 0.6888672113418579, "log_odds_ratio": -0.5728515386581421, "logits/chosen": -1.026953101158142, "logits/rejected": -0.905078113079071, "logps/chosen": -0.76171875, "logps/rejected": -1.2458984851837158, "loss": 1.1222, "nll_loss": 1.0060546398162842, "rewards/accuracies": 0.65625, "rewards/chosen": -0.07608642429113388, "rewards/margins": 0.048476409167051315, "rewards/rejected": -0.1246337890625, "step": 640 }, { "epoch": 0.04936396430605658, "grad_norm": 1.2289174428389282, "learning_rate": 3.1378581622109444e-06, "log_odds_chosen": 0.8836914300918579, "log_odds_ratio": -0.5482422113418579, "logits/chosen": -0.9775390625, "logits/rejected": -0.892382800579071, "logps/chosen": -0.7763671875, "logps/rejected": -1.4152343273162842, "loss": 1.1386, "nll_loss": 1.0642578601837158, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.07767333835363388, "rewards/margins": 0.0639495849609375, "rewards/rejected": -0.14155273139476776, "step": 650 }, { "epoch": 0.050123409910765145, "grad_norm": 1.1786820747359983, "learning_rate": 3.113995776646092e-06, "log_odds_chosen": 0.7305663824081421, "log_odds_ratio": -0.514843761920929, "logits/chosen": -1.1134765148162842, "logits/rejected": -0.9130859375, "logps/chosen": -0.765625, "logps/rejected": -1.269921898841858, "loss": 1.1478, "nll_loss": 1.0822265148162842, "rewards/accuracies": 0.71875, "rewards/chosen": -0.07652588188648224, "rewards/margins": 0.05049438402056694, "rewards/rejected": -0.12697753310203552, "step": 660 }, { "epoch": 0.0508828555154737, "grad_norm": 1.3312347225655792, "learning_rate": 3.090669637145023e-06, "log_odds_chosen": 1.0925781726837158, "log_odds_ratio": -0.45654296875, "logits/chosen": -1.0128905773162842, "logits/rejected": -0.875195324420929, "logps/chosen": -0.720703125, "logps/rejected": -1.4699218273162842, "loss": 1.1142, "nll_loss": 0.9994140863418579, "rewards/accuracies": 0.78125, "rewards/chosen": -0.07207031548023224, "rewards/margins": 0.07487793266773224, "rewards/rejected": -0.14702148735523224, "step": 670 }, { "epoch": 0.05164230112018227, "grad_norm": 1.1948117321314018, "learning_rate": 3.0678599553894814e-06, "log_odds_chosen": 1.125707983970642, "log_odds_ratio": -0.4688476622104645, "logits/chosen": -0.9761718511581421, "logits/rejected": -0.875, "logps/chosen": -0.7171875238418579, "logps/rejected": -1.5378906726837158, "loss": 1.1222, "nll_loss": 1.081445336341858, "rewards/accuracies": 0.6875, "rewards/chosen": -0.07166747748851776, "rewards/margins": 0.08215637505054474, "rewards/rejected": -0.15380859375, "step": 680 }, { "epoch": 0.05240174672489083, "grad_norm": 10.568787243834047, "learning_rate": 3.0455479505075235e-06, "log_odds_chosen": 0.9056640863418579, "log_odds_ratio": -0.4808593690395355, "logits/chosen": -0.9888671636581421, "logits/rejected": -0.841015636920929, "logps/chosen": -0.72900390625, "logps/rejected": -1.308007836341858, "loss": 1.1102, "nll_loss": 1.0361328125, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.07290039211511612, "rewards/margins": 0.05789489671587944, "rewards/rejected": -0.13071289658546448, "step": 690 }, { "epoch": 0.05316119232959939, "grad_norm": 1.286266032169233, "learning_rate": 3.0237157840738173e-06, "log_odds_chosen": 1.061425805091858, "log_odds_ratio": -0.42558592557907104, "logits/chosen": -0.9990234375, "logits/rejected": -0.8382812738418579, "logps/chosen": -0.7181640863418579, "logps/rejected": -1.441796898841858, "loss": 1.1047, "nll_loss": 1.04296875, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.07183837890625, "rewards/margins": 0.07239379733800888, "rewards/rejected": -0.1441650390625, "step": 700 }, { "epoch": 0.05392063793430796, "grad_norm": 1.1911411718677432, "learning_rate": 3.002346500163206e-06, "log_odds_chosen": 0.734130859375, "log_odds_ratio": -0.561230480670929, "logits/chosen": -0.9830077886581421, "logits/rejected": -0.838085949420929, "logps/chosen": -0.781933605670929, "logps/rejected": -1.2683594226837158, "loss": 1.1144, "nll_loss": 1.136132836341858, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.07828368991613388, "rewards/margins": 0.04859314113855362, "rewards/rejected": -0.12685546278953552, "step": 710 }, { "epoch": 0.054680083539016516, "grad_norm": 1.1480290061260026, "learning_rate": 2.9814239699997195e-06, "log_odds_chosen": 0.8654540777206421, "log_odds_ratio": -0.46533203125, "logits/chosen": -0.9710937738418579, "logits/rejected": -0.8296874761581421, "logps/chosen": -0.744921863079071, "logps/rejected": -1.3427734375, "loss": 1.0946, "nll_loss": 1.001367211341858, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.07448730617761612, "rewards/margins": 0.05968017503619194, "rewards/rejected": -0.1341552734375, "step": 720 }, { "epoch": 0.05543952914372508, "grad_norm": 1.1770607358773444, "learning_rate": 2.9609328407904207e-06, "log_odds_chosen": 0.6666625738143921, "log_odds_ratio": -0.562695324420929, "logits/chosen": -0.984570324420929, "logits/rejected": -0.8382812738418579, "logps/chosen": -0.769335925579071, "logps/rejected": -1.2265625, "loss": 1.1379, "nll_loss": 1.077539086341858, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.07695312798023224, "rewards/margins": 0.04564361646771431, "rewards/rejected": -0.12263183295726776, "step": 730 }, { "epoch": 0.05619897474843364, "grad_norm": 1.2044886672236956, "learning_rate": 2.940858488375231e-06, "log_odds_chosen": 0.9058593511581421, "log_odds_ratio": -0.47246092557907104, "logits/chosen": -1.0974609851837158, "logits/rejected": -0.8525390625, "logps/chosen": -0.757617175579071, "logps/rejected": -1.4001953601837158, "loss": 1.0875, "nll_loss": 1.0167968273162842, "rewards/accuracies": 0.78125, "rewards/chosen": -0.07573242485523224, "rewards/margins": 0.064239501953125, "rewards/rejected": -0.1400146484375, "step": 740 }, { "epoch": 0.056958420353142206, "grad_norm": 1.1899498766733454, "learning_rate": 2.9211869733608857e-06, "log_odds_chosen": 0.71484375, "log_odds_ratio": -0.5440429449081421, "logits/chosen": -0.953906238079071, "logits/rejected": -0.838085949420929, "logps/chosen": -0.7911132574081421, "logps/rejected": -1.253320336341858, "loss": 1.0996, "nll_loss": 1.1291015148162842, "rewards/accuracies": 0.71875, "rewards/chosen": -0.07907714694738388, "rewards/margins": 0.04622955247759819, "rewards/rejected": -0.12529297173023224, "step": 750 }, { "epoch": 0.05771786595785077, "grad_norm": 1.2093900056856193, "learning_rate": 2.901905000440047e-06, "log_odds_chosen": 0.7349853515625, "log_odds_ratio": -0.5418456792831421, "logits/chosen": -1.0662109851837158, "logits/rejected": -0.904492199420929, "logps/chosen": -0.77099609375, "logps/rejected": -1.2302734851837158, "loss": 1.1262, "nll_loss": 1.0158202648162842, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.07707519829273224, "rewards/margins": 0.045867919921875, "rewards/rejected": -0.12297363579273224, "step": 760 }, { "epoch": 0.05847731156255933, "grad_norm": 1.1104903118561136, "learning_rate": 2.8829998806257885e-06, "log_odds_chosen": 0.6325439214706421, "log_odds_ratio": -0.588671863079071, "logits/chosen": -1.0900390148162842, "logits/rejected": -0.900195300579071, "logps/chosen": -0.811328113079071, "logps/rejected": -1.232031226158142, "loss": 1.1125, "nll_loss": 1.074804663658142, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.08109130710363388, "rewards/margins": 0.04205475002527237, "rewards/rejected": -0.12312011420726776, "step": 770 }, { "epoch": 0.059236757167267895, "grad_norm": 1.2104716230086077, "learning_rate": 2.8644594961577314e-06, "log_odds_chosen": 0.779296875, "log_odds_ratio": -0.5453125238418579, "logits/chosen": -0.9945312738418579, "logits/rejected": -0.8685547113418579, "logps/chosen": -0.759570300579071, "logps/rejected": -1.306054711341858, "loss": 1.1244, "nll_loss": 0.9615234136581421, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.07593993842601776, "rewards/margins": 0.0547027587890625, "rewards/rejected": -0.13056640326976776, "step": 780 }, { "epoch": 0.05999620277197646, "grad_norm": 1.235464638106656, "learning_rate": 2.84627226785928e-06, "log_odds_chosen": 0.8065185546875, "log_odds_ratio": -0.552734375, "logits/chosen": -1.0576171875, "logits/rejected": -0.968554675579071, "logps/chosen": -0.7392578125, "logps/rejected": -1.303320288658142, "loss": 1.1089, "nll_loss": 1.094335913658142, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.07387695461511612, "rewards/margins": 0.056533049792051315, "rewards/rejected": -0.13034668564796448, "step": 790 }, { "epoch": 0.06075564837668502, "grad_norm": 1.3902961472733677, "learning_rate": 2.82842712474619e-06, "log_odds_chosen": 0.749621570110321, "log_odds_ratio": -0.550097644329071, "logits/chosen": -1.067968726158142, "logits/rejected": -0.919726550579071, "logps/chosen": -0.770703136920929, "logps/rejected": -1.294921875, "loss": 1.1083, "nll_loss": 1.0517578125, "rewards/accuracies": 0.65625, "rewards/chosen": -0.07705078274011612, "rewards/margins": 0.05236358568072319, "rewards/rejected": -0.12944336235523224, "step": 800 }, { "epoch": 0.061515093981393584, "grad_norm": 1.3540797666274922, "learning_rate": 2.810913475705226e-06, "log_odds_chosen": 0.7945556640625, "log_odds_ratio": -0.564746081829071, "logits/chosen": -1.1056640148162842, "logits/rejected": -0.906054675579071, "logps/chosen": -0.826171875, "logps/rejected": -1.3859374523162842, "loss": 1.1311, "nll_loss": 1.1025390625, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.08259277045726776, "rewards/margins": 0.05601043626666069, "rewards/rejected": -0.138671875, "step": 810 }, { "epoch": 0.06227453958610214, "grad_norm": 1.2938801214836573, "learning_rate": 2.7937211830783128e-06, "log_odds_chosen": 0.906494140625, "log_odds_ratio": -0.5169433355331421, "logits/chosen": -1.066015601158142, "logits/rejected": -0.916796863079071, "logps/chosen": -0.8042968511581421, "logps/rejected": -1.451562523841858, "loss": 1.1219, "nll_loss": 1.093359351158142, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.0804443359375, "rewards/margins": 0.06471862643957138, "rewards/rejected": -0.14511719346046448, "step": 820 }, { "epoch": 0.0630339851908107, "grad_norm": 1.2224846803646274, "learning_rate": 2.776840538002493e-06, "log_odds_chosen": 0.699536144733429, "log_odds_ratio": -0.5567382574081421, "logits/chosen": -0.9681640863418579, "logits/rejected": -0.8695312738418579, "logps/chosen": -0.7738281488418579, "logps/rejected": -1.251367211341858, "loss": 1.1013, "nll_loss": 1.0750000476837158, "rewards/accuracies": 0.6875, "rewards/chosen": -0.07736816257238388, "rewards/margins": 0.04777374118566513, "rewards/rejected": -0.1251220703125, "step": 830 }, { "epoch": 0.06379343079551927, "grad_norm": 1.4214473147251157, "learning_rate": 2.7602622373694163e-06, "log_odds_chosen": 0.936938464641571, "log_odds_ratio": -0.4775390625, "logits/chosen": -1.021484375, "logits/rejected": -0.8511718511581421, "logps/chosen": -0.7916015386581421, "logps/rejected": -1.4621093273162842, "loss": 1.1164, "nll_loss": 1.0988280773162842, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.07923583686351776, "rewards/margins": 0.0669708251953125, "rewards/rejected": -0.14626464247703552, "step": 840 }, { "epoch": 0.06455287640022783, "grad_norm": 1.1806615926691915, "learning_rate": 2.743977362280141e-06, "log_odds_chosen": 0.7479248046875, "log_odds_ratio": -0.545117199420929, "logits/chosen": -1.039453148841858, "logits/rejected": -0.8744140863418579, "logps/chosen": -0.7769531011581421, "logps/rejected": -1.2957031726837158, "loss": 1.1075, "nll_loss": 1.075781226158142, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -0.07769775390625, "rewards/margins": 0.05192871019244194, "rewards/rejected": -0.12956543266773224, "step": 850 }, { "epoch": 0.06531232200493639, "grad_norm": 1.2334374136369932, "learning_rate": 2.7279773578818937e-06, "log_odds_chosen": 0.699206531047821, "log_odds_ratio": -0.5643554925918579, "logits/chosen": -1.047460913658142, "logits/rejected": -0.933789074420929, "logps/chosen": -0.7603515386581421, "logps/rejected": -1.254296898841858, "loss": 1.1085, "nll_loss": 1.013671875, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.07595214992761612, "rewards/margins": 0.049224853515625, "rewards/rejected": -0.12528076767921448, "step": 860 }, { "epoch": 0.06607176760964496, "grad_norm": 1.2759225598108017, "learning_rate": 2.7122540144832417e-06, "log_odds_chosen": 0.548876941204071, "log_odds_ratio": -0.6099609136581421, "logits/chosen": -1.060937523841858, "logits/rejected": -0.923828125, "logps/chosen": -0.8509765863418579, "logps/rejected": -1.2322266101837158, "loss": 1.1167, "nll_loss": 1.1115233898162842, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.08507080376148224, "rewards/margins": 0.03810729831457138, "rewards/rejected": -0.12324218451976776, "step": 870 }, { "epoch": 0.06683121321435352, "grad_norm": 1.1423060361713626, "learning_rate": 2.696799449852968e-06, "log_odds_chosen": 0.729541003704071, "log_odds_ratio": -0.55810546875, "logits/chosen": -1.001562476158142, "logits/rejected": -0.877734363079071, "logps/chosen": -0.781445324420929, "logps/rejected": -1.29296875, "loss": 1.1077, "nll_loss": 1.0617187023162842, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.07819823920726776, "rewards/margins": 0.05106963962316513, "rewards/rejected": -0.12922362983226776, "step": 880 }, { "epoch": 0.06759065881906208, "grad_norm": 1.2740345643629647, "learning_rate": 2.6816060926159636e-06, "log_odds_chosen": 0.9874511957168579, "log_odds_ratio": -0.45917969942092896, "logits/chosen": -1.149804711341858, "logits/rejected": -0.8968750238418579, "logps/chosen": -0.753125011920929, "logps/rejected": -1.4304687976837158, "loss": 1.1118, "nll_loss": 1.039453148841858, "rewards/accuracies": 0.71875, "rewards/chosen": -0.07541503757238388, "rewards/margins": 0.06775207817554474, "rewards/rejected": -0.14313964545726776, "step": 890 }, { "epoch": 0.06835010442377065, "grad_norm": 1.1808122435102768, "learning_rate": 2.6666666666666664e-06, "log_odds_chosen": 0.8213866949081421, "log_odds_ratio": -0.5967773199081421, "logits/chosen": -1.0498046875, "logits/rejected": -0.9371093511581421, "logps/chosen": -0.7933593988418579, "logps/rejected": -1.405859351158142, "loss": 1.114, "nll_loss": 1.058984398841858, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.079345703125, "rewards/margins": 0.06116180494427681, "rewards/rejected": -0.14055176079273224, "step": 900 }, { "epoch": 0.06910955002847921, "grad_norm": 1.2315047655913471, "learning_rate": 2.6519741765271837e-06, "log_odds_chosen": 0.9344726800918579, "log_odds_ratio": -0.5025390386581421, "logits/chosen": -1.004492163658142, "logits/rejected": -0.8675781488418579, "logps/chosen": -0.769335925579071, "logps/rejected": -1.427343726158142, "loss": 1.0716, "nll_loss": 1.0615234375, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.07696533203125, "rewards/margins": 0.06583251804113388, "rewards/rejected": -0.14284667372703552, "step": 910 }, { "epoch": 0.06986899563318777, "grad_norm": 1.2276006068497622, "learning_rate": 2.637521893583148e-06, "log_odds_chosen": 0.8968505859375, "log_odds_ratio": -0.4952148497104645, "logits/chosen": -1.123046875, "logits/rejected": -0.910937488079071, "logps/chosen": -0.8140624761581421, "logps/rejected": -1.425195336341858, "loss": 1.0979, "nll_loss": 1.074804663658142, "rewards/accuracies": 0.71875, "rewards/chosen": -0.08137206733226776, "rewards/margins": 0.06109924241900444, "rewards/rejected": -0.14248046278953552, "step": 920 }, { "epoch": 0.07062844123789634, "grad_norm": 1.172879573683124, "learning_rate": 2.6233033431358115e-06, "log_odds_chosen": 0.6944824457168579, "log_odds_ratio": -0.5762695074081421, "logits/chosen": -1.073828101158142, "logits/rejected": -0.9076172113418579, "logps/chosen": -0.778515636920929, "logps/rejected": -1.283789038658142, "loss": 1.1009, "nll_loss": 1.0632812976837158, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.07786865532398224, "rewards/margins": 0.050580598413944244, "rewards/rejected": -0.12839356064796448, "step": 930 }, { "epoch": 0.0713878868426049, "grad_norm": 1.3113135398297677, "learning_rate": 2.6093122922137685e-06, "log_odds_chosen": 0.7413085699081421, "log_odds_ratio": -0.5615234375, "logits/chosen": -1.110742211341858, "logits/rejected": -0.920703113079071, "logps/chosen": -0.832812488079071, "logps/rejected": -1.371679663658142, "loss": 1.1113, "nll_loss": 1.0978515148162842, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.08332519233226776, "rewards/margins": 0.05401153489947319, "rewards/rejected": -0.13737793266773224, "step": 940 }, { "epoch": 0.07214733244731346, "grad_norm": 1.2407085485756437, "learning_rate": 2.5955427380922006e-06, "log_odds_chosen": 0.8196166753768921, "log_odds_ratio": -0.530078113079071, "logits/chosen": -1.003320336341858, "logits/rejected": -0.896289050579071, "logps/chosen": -0.809765636920929, "logps/rejected": -1.3859374523162842, "loss": 1.1151, "nll_loss": 1.0509765148162842, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.08096923679113388, "rewards/margins": 0.05762481689453125, "rewards/rejected": -0.13864746689796448, "step": 950 }, { "epoch": 0.07290677805202202, "grad_norm": 1.203998193871145, "learning_rate": 2.5819888974716113e-06, "log_odds_chosen": 0.592041015625, "log_odds_ratio": -0.612500011920929, "logits/chosen": -1.0623047351837158, "logits/rejected": -0.9710937738418579, "logps/chosen": -0.7904297113418579, "logps/rejected": -1.2041015625, "loss": 1.0931, "nll_loss": 1.029687523841858, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.07901611179113388, "rewards/margins": 0.04140625149011612, "rewards/rejected": -0.1204833984375, "step": 960 }, { "epoch": 0.07366622365673059, "grad_norm": 6.757897834622117, "learning_rate": 2.5686451962717425e-06, "log_odds_chosen": 0.864501953125, "log_odds_ratio": -0.50341796875, "logits/chosen": -1.0207030773162842, "logits/rejected": -0.8603515625, "logps/chosen": -0.781445324420929, "logps/rejected": -1.396484375, "loss": 1.1258, "nll_loss": 1.1105468273162842, "rewards/accuracies": 0.71875, "rewards/chosen": -0.07813720405101776, "rewards/margins": 0.06162109225988388, "rewards/rejected": -0.13962402939796448, "step": 970 }, { "epoch": 0.07442566926143915, "grad_norm": 1.2809388596265894, "learning_rate": 2.5555062599997596e-06, "log_odds_chosen": 0.77978515625, "log_odds_ratio": -0.523242175579071, "logits/chosen": -1.0476562976837158, "logits/rejected": -0.8994140625, "logps/chosen": -0.790234386920929, "logps/rejected": -1.339453101158142, "loss": 1.1283, "nll_loss": 1.105078101158142, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.07902832329273224, "rewards/margins": 0.05488281324505806, "rewards/rejected": -0.13388672471046448, "step": 980 }, { "epoch": 0.0751851148661477, "grad_norm": 1.1940766522639505, "learning_rate": 2.5425669046549126e-06, "log_odds_chosen": 0.7021850347518921, "log_odds_ratio": -0.5478515625, "logits/chosen": -1.052148461341858, "logits/rejected": -0.9380859136581421, "logps/chosen": -0.72265625, "logps/rejected": -1.182226538658142, "loss": 1.1049, "nll_loss": 1.069726586341858, "rewards/accuracies": 0.6875, "rewards/chosen": -0.072265625, "rewards/margins": 0.04597320407629013, "rewards/rejected": -0.11831054836511612, "step": 990 }, { "epoch": 0.07594456047085628, "grad_norm": 1.2328658371128554, "learning_rate": 2.5298221281347034e-06, "log_odds_chosen": 0.59521484375, "log_odds_ratio": -0.579394519329071, "logits/chosen": -1.0427734851837158, "logits/rejected": -0.880859375, "logps/chosen": -0.7744140625, "logps/rejected": -1.1896483898162842, "loss": 1.1217, "nll_loss": 1.0568358898162842, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.07749023288488388, "rewards/margins": 0.04156341403722763, "rewards/rejected": -0.11899413913488388, "step": 1000 }, { "epoch": 0.07670400607556484, "grad_norm": 1.328643225162742, "learning_rate": 2.5172671021102103e-06, "log_odds_chosen": 0.828564465045929, "log_odds_ratio": -0.5384765863418579, "logits/chosen": -1.146093726158142, "logits/rejected": -0.9888671636581421, "logps/chosen": -0.755859375, "logps/rejected": -1.3312499523162842, "loss": 1.113, "nll_loss": 0.9869140386581421, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.0755615234375, "rewards/margins": 0.05749816820025444, "rewards/rejected": -0.13310547173023224, "step": 1010 }, { "epoch": 0.0774634516802734, "grad_norm": 1.271480546998933, "learning_rate": 2.504897164340598e-06, "log_odds_chosen": 1.061132788658142, "log_odds_ratio": -0.46611326932907104, "logits/chosen": -1.0886719226837158, "logits/rejected": -0.8525390625, "logps/chosen": -0.7646484375, "logps/rejected": -1.485742211341858, "loss": 1.0917, "nll_loss": 1.0285155773162842, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.07646484673023224, "rewards/margins": 0.07216797024011612, "rewards/rejected": -0.14873047173023224, "step": 1020 }, { "epoch": 0.07822289728498197, "grad_norm": 1.2566273212101755, "learning_rate": 2.492707811399023e-06, "log_odds_chosen": 0.939038097858429, "log_odds_ratio": -0.49589842557907104, "logits/chosen": -1.055078148841858, "logits/rejected": -0.8775390386581421, "logps/chosen": -0.73291015625, "logps/rejected": -1.422460913658142, "loss": 1.0749, "nll_loss": 1.0007812976837158, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.07326660305261612, "rewards/margins": 0.06887207180261612, "rewards/rejected": -0.14218750596046448, "step": 1030 }, { "epoch": 0.07898234288969053, "grad_norm": 1.2297164671618135, "learning_rate": 2.480694691784169e-06, "log_odds_chosen": 0.801074206829071, "log_odds_ratio": -0.51806640625, "logits/chosen": -1.0880858898162842, "logits/rejected": -0.912109375, "logps/chosen": -0.751171886920929, "logps/rejected": -1.3046875, "loss": 1.0874, "nll_loss": 1.076171875, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.07514648139476776, "rewards/margins": 0.05530395358800888, "rewards/rejected": -0.13046875596046448, "step": 1040 }, { "epoch": 0.07974178849439909, "grad_norm": 2.2002326720714915, "learning_rate": 2.4688535993934706e-06, "log_odds_chosen": 0.9129883050918579, "log_odds_ratio": -0.5006347894668579, "logits/chosen": -0.9990234375, "logits/rejected": -0.829882800579071, "logps/chosen": -0.7728515863418579, "logps/rejected": -1.409570336341858, "loss": 1.0907, "nll_loss": 1.0302734375, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.07731933891773224, "rewards/margins": 0.063720703125, "rewards/rejected": -0.1409912109375, "step": 1050 }, { "epoch": 0.08050123409910766, "grad_norm": 1.2875034093281599, "learning_rate": 2.457180467335805e-06, "log_odds_chosen": 1.032373070716858, "log_odds_ratio": -0.4422851502895355, "logits/chosen": -1.070703148841858, "logits/rejected": -0.8910156488418579, "logps/chosen": -0.727343738079071, "logps/rejected": -1.4269530773162842, "loss": 1.0764, "nll_loss": 0.987500011920929, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.07269287109375, "rewards/margins": 0.06989745795726776, "rewards/rejected": -0.142578125, "step": 1060 }, { "epoch": 0.08126067970381622, "grad_norm": 1.2475241172670009, "learning_rate": 2.4456713620629725e-06, "log_odds_chosen": 1.06005859375, "log_odds_ratio": -0.47822266817092896, "logits/chosen": -1.0613281726837158, "logits/rejected": -0.8785156011581421, "logps/chosen": -0.7237304449081421, "logps/rejected": -1.445898413658142, "loss": 1.0779, "nll_loss": 0.968554675579071, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.07236327975988388, "rewards/margins": 0.07232742011547089, "rewards/rejected": -0.14470215141773224, "step": 1070 }, { "epoch": 0.08202012530852477, "grad_norm": 1.2770147014060478, "learning_rate": 2.4343224778007378e-06, "log_odds_chosen": 0.9845215082168579, "log_odds_ratio": -0.507763683795929, "logits/chosen": -1.029687523841858, "logits/rejected": -0.868359386920929, "logps/chosen": -0.768359363079071, "logps/rejected": -1.505468726158142, "loss": 1.1122, "nll_loss": 1.0263671875, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.07680664211511612, "rewards/margins": 0.07365341484546661, "rewards/rejected": -0.1505126953125, "step": 1080 }, { "epoch": 0.08277957091323333, "grad_norm": 1.4049250885855562, "learning_rate": 2.4231301312615306e-06, "log_odds_chosen": 0.9251953363418579, "log_odds_ratio": -0.49907225370407104, "logits/chosen": -1.0537109375, "logits/rejected": -0.9166015386581421, "logps/chosen": -0.76171875, "logps/rejected": -1.3855469226837158, "loss": 1.0815, "nll_loss": 1.0046875476837158, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.07625732570886612, "rewards/margins": 0.06239013746380806, "rewards/rejected": -0.13857421278953552, "step": 1090 }, { "epoch": 0.0835390165179419, "grad_norm": 1.2449165498530383, "learning_rate": 2.412090756622109e-06, "log_odds_chosen": 0.940625011920929, "log_odds_ratio": -0.4959960877895355, "logits/chosen": -1.0869140625, "logits/rejected": -0.8802734613418579, "logps/chosen": -0.8060547113418579, "logps/rejected": -1.463281273841858, "loss": 1.0865, "nll_loss": 1.065039038658142, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.08062744140625, "rewards/margins": 0.065704345703125, "rewards/rejected": -0.14633789658546448, "step": 1100 }, { "epoch": 0.08429846212265046, "grad_norm": 1.2203495326278755, "learning_rate": 2.401200900750657e-06, "log_odds_chosen": 1.152197241783142, "log_odds_ratio": -0.463623046875, "logits/chosen": -0.99609375, "logits/rejected": -0.8091796636581421, "logps/chosen": -0.763867199420929, "logps/rejected": -1.5849609375, "loss": 1.1084, "nll_loss": 1.069921851158142, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.07642821967601776, "rewards/margins": 0.08193206787109375, "rewards/rejected": -0.15830078721046448, "step": 1110 }, { "epoch": 0.08505790772735902, "grad_norm": 1.2655787737926392, "learning_rate": 2.390457218668787e-06, "log_odds_chosen": 0.8880370855331421, "log_odds_ratio": -0.4989257752895355, "logits/chosen": -1.0886719226837158, "logits/rejected": -0.8876953125, "logps/chosen": -0.736132800579071, "logps/rejected": -1.344335913658142, "loss": 1.0825, "nll_loss": 1.0593750476837158, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.07357177883386612, "rewards/margins": 0.06085815280675888, "rewards/rejected": -0.1343994140625, "step": 1120 }, { "epoch": 0.0858173533320676, "grad_norm": 1.1335288654942561, "learning_rate": 2.379856469234918e-06, "log_odds_chosen": 0.7937988042831421, "log_odds_ratio": -0.5357421636581421, "logits/chosen": -1.0324218273162842, "logits/rejected": -0.876171886920929, "logps/chosen": -0.821484386920929, "logps/rejected": -1.344140648841858, "loss": 1.0901, "nll_loss": 1.048242211341858, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.08209228515625, "rewards/margins": 0.052337646484375, "rewards/rejected": -0.1343994140625, "step": 1130 }, { "epoch": 0.08657679893677615, "grad_norm": 1.1443311187977512, "learning_rate": 2.369395511036369e-06, "log_odds_chosen": 0.8692992925643921, "log_odds_ratio": -0.5335937738418579, "logits/chosen": -1.033789038658142, "logits/rejected": -0.9164062738418579, "logps/chosen": -0.7911132574081421, "logps/rejected": -1.3748047351837158, "loss": 1.0836, "nll_loss": 1.0753905773162842, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.07912597805261612, "rewards/margins": 0.05836181715130806, "rewards/rejected": -0.13750000298023224, "step": 1140 }, { "epoch": 0.08733624454148471, "grad_norm": 1.4152113995997173, "learning_rate": 2.359071298478354e-06, "log_odds_chosen": 0.743762195110321, "log_odds_ratio": -0.5491698980331421, "logits/chosen": -1.0341796875, "logits/rejected": -0.900390625, "logps/chosen": -0.7406250238418579, "logps/rejected": -1.2619140148162842, "loss": 1.058, "nll_loss": 0.9501953125, "rewards/accuracies": 0.71875, "rewards/chosen": -0.0740966796875, "rewards/margins": 0.05215301364660263, "rewards/rejected": -0.126220703125, "step": 1150 }, { "epoch": 0.08809569014619328, "grad_norm": 1.343084320135623, "learning_rate": 2.3488808780588137e-06, "log_odds_chosen": 0.959582507610321, "log_odds_ratio": -0.505175769329071, "logits/chosen": -1.0236327648162842, "logits/rejected": -0.8804687261581421, "logps/chosen": -0.800000011920929, "logps/rejected": -1.5128905773162842, "loss": 1.105, "nll_loss": 1.067968726158142, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.08006592094898224, "rewards/margins": 0.07133178412914276, "rewards/rejected": -0.15126952528953552, "step": 1160 }, { "epoch": 0.08885513575090184, "grad_norm": 1.3246285585416955, "learning_rate": 2.3388213848187446e-06, "log_odds_chosen": 0.953417956829071, "log_odds_ratio": -0.4892578125, "logits/chosen": -1.083398461341858, "logits/rejected": -0.9009765386581421, "logps/chosen": -0.747265636920929, "logps/rejected": -1.407812476158142, "loss": 1.0735, "nll_loss": 1.033789038658142, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.07475586235523224, "rewards/margins": 0.06605835258960724, "rewards/rejected": -0.1407470703125, "step": 1170 }, { "epoch": 0.0896145813556104, "grad_norm": 1.3927054455367915, "learning_rate": 2.328890038958328e-06, "log_odds_chosen": 0.898999035358429, "log_odds_ratio": -0.52880859375, "logits/chosen": -1.0437500476837158, "logits/rejected": -0.8617187738418579, "logps/chosen": -0.775585949420929, "logps/rejected": -1.4111328125, "loss": 1.1002, "nll_loss": 1.040429711341858, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.07760009914636612, "rewards/margins": 0.06354675441980362, "rewards/rejected": -0.14108887314796448, "step": 1180 }, { "epoch": 0.09037402696031897, "grad_norm": 1.1629703496269976, "learning_rate": 2.3190841426097937e-06, "log_odds_chosen": 0.769970715045929, "log_odds_ratio": -0.53369140625, "logits/chosen": -1.0009765625, "logits/rejected": -0.9058593511581421, "logps/chosen": -0.745312511920929, "logps/rejected": -1.2628905773162842, "loss": 1.0894, "nll_loss": 0.9888671636581421, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.07453612983226776, "rewards/margins": 0.05177612230181694, "rewards/rejected": -0.12624511122703552, "step": 1190 }, { "epoch": 0.09113347256502753, "grad_norm": 1.2552217520608357, "learning_rate": 2.309401076758503e-06, "log_odds_chosen": 0.8551269769668579, "log_odds_ratio": -0.516308605670929, "logits/chosen": -1.119531273841858, "logits/rejected": -0.9117187261581421, "logps/chosen": -0.7652343511581421, "logps/rejected": -1.3624999523162842, "loss": 1.067, "nll_loss": 1.018945336341858, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.0765380859375, "rewards/margins": 0.05975951999425888, "rewards/rejected": -0.13629150390625, "step": 1200 }, { "epoch": 0.09189291816973609, "grad_norm": 1.365699703902085, "learning_rate": 2.299838298304276e-06, "log_odds_chosen": 1.141015648841858, "log_odds_ratio": -0.4697265625, "logits/chosen": -1.0242187976837158, "logits/rejected": -0.851757824420929, "logps/chosen": -0.7564452886581421, "logps/rejected": -1.578710913658142, "loss": 1.0668, "nll_loss": 1.0330078601837158, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.07563476264476776, "rewards/margins": 0.08226318657398224, "rewards/rejected": -0.15791015326976776, "step": 1210 }, { "epoch": 0.09265236377444465, "grad_norm": 1.2211005592893962, "learning_rate": 2.2903933372554728e-06, "log_odds_chosen": 0.8724120855331421, "log_odds_ratio": -0.524169921875, "logits/chosen": -1.1082031726837158, "logits/rejected": -0.9544922113418579, "logps/chosen": -0.761523425579071, "logps/rejected": -1.3800780773162842, "loss": 1.1021, "nll_loss": 1.045312523841858, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.07608642429113388, "rewards/margins": 0.06178588792681694, "rewards/rejected": -0.13791504502296448, "step": 1220 }, { "epoch": 0.09341180937915322, "grad_norm": 1.2524063673259989, "learning_rate": 2.281063794048804e-06, "log_odds_chosen": 0.740234375, "log_odds_ratio": -0.540820300579071, "logits/chosen": -0.993945300579071, "logits/rejected": -0.861523449420929, "logps/chosen": -0.7208007574081421, "logps/rejected": -1.2392578125, "loss": 1.0964, "nll_loss": 1.046875, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.07208251953125, "rewards/margins": 0.05193786695599556, "rewards/rejected": -0.12385253608226776, "step": 1230 }, { "epoch": 0.09417125498386178, "grad_norm": 1.1890173868923026, "learning_rate": 2.271847336988259e-06, "log_odds_chosen": 0.8353027105331421, "log_odds_ratio": -0.52685546875, "logits/chosen": -1.045312523841858, "logits/rejected": -0.8935546875, "logps/chosen": -0.821484386920929, "logps/rejected": -1.404882788658142, "loss": 1.09, "nll_loss": 1.0544922351837158, "rewards/accuracies": 0.71875, "rewards/chosen": -0.08212890475988388, "rewards/margins": 0.05834808200597763, "rewards/rejected": -0.14055176079273224, "step": 1240 }, { "epoch": 0.09493070058857034, "grad_norm": 1.4867817830075825, "learning_rate": 2.262741699796952e-06, "log_odds_chosen": 0.728710949420929, "log_odds_ratio": -0.5376952886581421, "logits/chosen": -0.965039074420929, "logits/rejected": -0.8345702886581421, "logps/chosen": -0.7578125, "logps/rejected": -1.2433593273162842, "loss": 1.093, "nll_loss": 1.085351586341858, "rewards/accuracies": 0.75, "rewards/chosen": -0.07584228366613388, "rewards/margins": 0.048553466796875, "rewards/rejected": -0.12434081733226776, "step": 1250 }, { "epoch": 0.09569014619327891, "grad_norm": 1.468901786172847, "learning_rate": 2.253744679276044e-06, "log_odds_chosen": 0.9930664300918579, "log_odds_ratio": -0.4754394590854645, "logits/chosen": -0.982617199420929, "logits/rejected": -0.877148449420929, "logps/chosen": -0.758007824420929, "logps/rejected": -1.4279296398162842, "loss": 1.112, "nll_loss": 1.033203125, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.07585449516773224, "rewards/margins": 0.06703491508960724, "rewards/rejected": -0.14289550483226776, "step": 1260 }, { "epoch": 0.09644959179798747, "grad_norm": 1.3119535048074145, "learning_rate": 2.244854133065255e-06, "log_odds_chosen": 1.1066405773162842, "log_odds_ratio": -0.45722657442092896, "logits/chosen": -1.1162109375, "logits/rejected": -0.862500011920929, "logps/chosen": -0.7250000238418579, "logps/rejected": -1.4792969226837158, "loss": 1.08, "nll_loss": 1.0119140148162842, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.07254638522863388, "rewards/margins": 0.0753173828125, "rewards/rejected": -0.1480712890625, "step": 1270 }, { "epoch": 0.09720903740269603, "grad_norm": 1.3342326134552545, "learning_rate": 2.2360679774997895e-06, "log_odds_chosen": 1.015625, "log_odds_ratio": -0.47099608182907104, "logits/chosen": -1.1212890148162842, "logits/rejected": -0.900195300579071, "logps/chosen": -0.7798827886581421, "logps/rejected": -1.4773437976837158, "loss": 1.0793, "nll_loss": 1.070898413658142, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.0780029296875, "rewards/margins": 0.06964111328125, "rewards/rejected": -0.147705078125, "step": 1280 }, { "epoch": 0.0979684830074046, "grad_norm": 1.3877919411637134, "learning_rate": 2.2273841855588183e-06, "log_odds_chosen": 0.919677734375, "log_odds_ratio": -0.541943371295929, "logits/chosen": -1.01171875, "logits/rejected": -0.8720703125, "logps/chosen": -0.782421886920929, "logps/rejected": -1.422460913658142, "loss": 1.0716, "nll_loss": 1.0119140148162842, "rewards/accuracies": 0.6875, "rewards/chosen": -0.07832030951976776, "rewards/margins": 0.06386413425207138, "rewards/rejected": -0.14216308295726776, "step": 1290 }, { "epoch": 0.09872792861211316, "grad_norm": 1.3152014349068926, "learning_rate": 2.2188007849009167e-06, "log_odds_chosen": 1.043066382408142, "log_odds_ratio": -0.4751953184604645, "logits/chosen": -1.0544922351837158, "logits/rejected": -0.8720703125, "logps/chosen": -0.7337890863418579, "logps/rejected": -1.4578125476837158, "loss": 1.0574, "nll_loss": 1.0246093273162842, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.07333984225988388, "rewards/margins": 0.07246704399585724, "rewards/rejected": -0.14580078423023224, "step": 1300 }, { "epoch": 0.09948737421682172, "grad_norm": 1.5398725759037881, "learning_rate": 2.2103158559821502e-06, "log_odds_chosen": 1.2927734851837158, "log_odds_ratio": -0.3848632872104645, "logits/chosen": -0.991406261920929, "logits/rejected": -0.8701171875, "logps/chosen": -0.7085937261581421, "logps/rejected": -1.5910155773162842, "loss": 1.0494, "nll_loss": 0.9931640625, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.07086181640625, "rewards/margins": 0.08814696967601776, "rewards/rejected": -0.1591796875, "step": 1310 }, { "epoch": 0.10024681982153029, "grad_norm": 1.221941769186949, "learning_rate": 2.2019275302527213e-06, "log_odds_chosen": 1.106835961341858, "log_odds_ratio": -0.4424804747104645, "logits/chosen": -1.0441405773162842, "logits/rejected": -0.8714843988418579, "logps/chosen": -0.744335949420929, "logps/rejected": -1.5050780773162842, "loss": 1.0945, "nll_loss": 1.085546851158142, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.07443847507238388, "rewards/margins": 0.07620849460363388, "rewards/rejected": -0.15068359673023224, "step": 1320 }, { "epoch": 0.10100626542623885, "grad_norm": 1.3117403279491364, "learning_rate": 2.193633988428327e-06, "log_odds_chosen": 0.9405273199081421, "log_odds_ratio": -0.4999023377895355, "logits/chosen": -1.1298828125, "logits/rejected": -0.903124988079071, "logps/chosen": -0.767773449420929, "logps/rejected": -1.4070312976837158, "loss": 1.0766, "nll_loss": 1.035546898841858, "rewards/accuracies": 0.78125, "rewards/chosen": -0.07679443061351776, "rewards/margins": 0.06395263969898224, "rewards/rejected": -0.14072266221046448, "step": 1330 }, { "epoch": 0.1017657110309474, "grad_norm": 1.4108194598337718, "learning_rate": 2.185433458832612e-06, "log_odds_chosen": 1.0618164539337158, "log_odds_ratio": -0.503710925579071, "logits/chosen": -0.9765625, "logits/rejected": -0.86328125, "logps/chosen": -0.75, "logps/rejected": -1.521093726158142, "loss": 1.0607, "nll_loss": 1.0441405773162842, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.07504882663488388, "rewards/margins": 0.07715453952550888, "rewards/rejected": -0.15217284858226776, "step": 1340 }, { "epoch": 0.10252515663565596, "grad_norm": 1.2584144983829033, "learning_rate": 2.177324215807269e-06, "log_odds_chosen": 0.9881836175918579, "log_odds_ratio": -0.4957519471645355, "logits/chosen": -1.0341796875, "logits/rejected": -0.9029296636581421, "logps/chosen": -0.7373046875, "logps/rejected": -1.4296875, "loss": 1.0774, "nll_loss": 1.063085913658142, "rewards/accuracies": 0.75, "rewards/chosen": -0.07381591945886612, "rewards/margins": 0.06915283203125, "rewards/rejected": -0.14296874403953552, "step": 1350 }, { "epoch": 0.10328460224036454, "grad_norm": 1.781065499796602, "learning_rate": 2.1693045781865616e-06, "log_odds_chosen": 1.0832030773162842, "log_odds_ratio": -0.46562498807907104, "logits/chosen": -1.0974609851837158, "logits/rejected": -0.9007812738418579, "logps/chosen": -0.733593761920929, "logps/rejected": -1.480859398841858, "loss": 1.0631, "nll_loss": 1.037695288658142, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.07330322265625, "rewards/margins": 0.07468261569738388, "rewards/rejected": -0.14794921875, "step": 1360 }, { "epoch": 0.1040440478450731, "grad_norm": 1.440315572606993, "learning_rate": 2.1613729078331965e-06, "log_odds_chosen": 1.155664086341858, "log_odds_ratio": -0.4378906190395355, "logits/chosen": -1.0380859375, "logits/rejected": -0.8599609136581421, "logps/chosen": -0.7237304449081421, "logps/rejected": -1.530859351158142, "loss": 1.0708, "nll_loss": 0.9957031011581421, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.07235107570886612, "rewards/margins": 0.08076171576976776, "rewards/rejected": -0.15314941108226776, "step": 1370 }, { "epoch": 0.10480349344978165, "grad_norm": 1.354532177090641, "learning_rate": 2.1535276082326617e-06, "log_odds_chosen": 0.9754883050918579, "log_odds_ratio": -0.5003906488418579, "logits/chosen": -1.0265624523162842, "logits/rejected": -0.870312511920929, "logps/chosen": -0.7232421636581421, "logps/rejected": -1.3962891101837158, "loss": 1.0579, "nll_loss": 1.012304663658142, "rewards/accuracies": 0.75, "rewards/chosen": -0.07233886420726776, "rewards/margins": 0.06735839694738388, "rewards/rejected": -0.13967284560203552, "step": 1380 }, { "epoch": 0.10556293905449023, "grad_norm": 1.323996975716089, "learning_rate": 2.14576712314328e-06, "log_odds_chosen": 0.99560546875, "log_odds_ratio": -0.5396484136581421, "logits/chosen": -1.0417969226837158, "logits/rejected": -0.913281261920929, "logps/chosen": -0.7841796875, "logps/rejected": -1.51171875, "loss": 1.0614, "nll_loss": 1.025976538658142, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.07840576022863388, "rewards/margins": 0.07285919040441513, "rewards/rejected": -0.15119628608226776, "step": 1390 }, { "epoch": 0.10632238465919878, "grad_norm": 1.3636953688348927, "learning_rate": 2.138089935299395e-06, "log_odds_chosen": 0.8795410394668579, "log_odds_ratio": -0.53857421875, "logits/chosen": -1.0623047351837158, "logits/rejected": -0.9156249761581421, "logps/chosen": -0.7904297113418579, "logps/rejected": -1.379492163658142, "loss": 1.0719, "nll_loss": 0.9683593511581421, "rewards/accuracies": 0.6875, "rewards/chosen": -0.07904052734375, "rewards/margins": 0.05877075344324112, "rewards/rejected": -0.13776855170726776, "step": 1400 }, { "epoch": 0.10708183026390734, "grad_norm": 1.3474488669922295, "learning_rate": 2.1304945651652297e-06, "log_odds_chosen": 1.001001000404358, "log_odds_ratio": -0.506396472454071, "logits/chosen": -1.039648413658142, "logits/rejected": -0.884765625, "logps/chosen": -0.7974609136581421, "logps/rejected": -1.532617211341858, "loss": 1.0758, "nll_loss": 1.0720703601837158, "rewards/accuracies": 0.6875, "rewards/chosen": -0.07976074516773224, "rewards/margins": 0.07355956733226776, "rewards/rejected": -0.15336914360523224, "step": 1410 }, { "epoch": 0.10784127586861592, "grad_norm": 1.2535361290211304, "learning_rate": 2.122979569737101e-06, "log_odds_chosen": 0.7390991449356079, "log_odds_ratio": -0.575878918170929, "logits/chosen": -1.065820336341858, "logits/rejected": -0.891796886920929, "logps/chosen": -0.7826172113418579, "logps/rejected": -1.302734375, "loss": 1.0939, "nll_loss": 0.9853515625, "rewards/accuracies": 0.6875, "rewards/chosen": -0.07828368991613388, "rewards/margins": 0.05184783786535263, "rewards/rejected": -0.13010254502296448, "step": 1420 }, { "epoch": 0.10860072147332447, "grad_norm": 1.2415044853436998, "learning_rate": 2.11554354139178e-06, "log_odds_chosen": 1.087548851966858, "log_odds_ratio": -0.4874511659145355, "logits/chosen": -1.0349609851837158, "logits/rejected": -0.860156238079071, "logps/chosen": -0.744433581829071, "logps/rejected": -1.517187476158142, "loss": 1.0416, "nll_loss": 1.007226586341858, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.07440185546875, "rewards/margins": 0.07728271186351776, "rewards/rejected": -0.15163573622703552, "step": 1430 }, { "epoch": 0.10936016707803303, "grad_norm": 1.2411364194438226, "learning_rate": 2.1081851067789196e-06, "log_odds_chosen": 1.038720726966858, "log_odds_ratio": -0.4839843809604645, "logits/chosen": -0.9873046875, "logits/rejected": -0.8525390625, "logps/chosen": -0.7630859613418579, "logps/rejected": -1.521875023841858, "loss": 1.057, "nll_loss": 0.9873046875, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.07631836086511612, "rewards/margins": 0.07570800930261612, "rewards/rejected": -0.15207520127296448, "step": 1440 }, { "epoch": 0.1101196126827416, "grad_norm": 1.211081503737846, "learning_rate": 2.1009029257555606e-06, "log_odds_chosen": 1.1140625476837158, "log_odds_ratio": -0.46513670682907104, "logits/chosen": -1.0740234851837158, "logits/rejected": -0.8794921636581421, "logps/chosen": -0.7118164300918579, "logps/rejected": -1.4513671398162842, "loss": 1.0665, "nll_loss": 0.9703124761581421, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.07117919623851776, "rewards/margins": 0.07398223876953125, "rewards/rejected": -0.14531250298023224, "step": 1450 }, { "epoch": 0.11087905828745016, "grad_norm": 1.2548440039523137, "learning_rate": 2.0936956903608545e-06, "log_odds_chosen": 0.990039050579071, "log_odds_ratio": -0.5025879144668579, "logits/chosen": -1.071679711341858, "logits/rejected": -0.914843738079071, "logps/chosen": -0.723828136920929, "logps/rejected": -1.391210913658142, "loss": 1.0618, "nll_loss": 0.9896484613418579, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.07233886420726776, "rewards/margins": 0.066741943359375, "rewards/rejected": -0.13911132514476776, "step": 1460 }, { "epoch": 0.11163850389215872, "grad_norm": 1.2165729056550976, "learning_rate": 2.0865621238292046e-06, "log_odds_chosen": 1.140722632408142, "log_odds_ratio": -0.45195311307907104, "logits/chosen": -1.1611328125, "logits/rejected": -0.967578113079071, "logps/chosen": -0.721875011920929, "logps/rejected": -1.538671851158142, "loss": 1.0614, "nll_loss": 1.0236327648162842, "rewards/accuracies": 0.78125, "rewards/chosen": -0.07215575873851776, "rewards/margins": 0.081817626953125, "rewards/rejected": -0.15402832627296448, "step": 1470 }, { "epoch": 0.11239794949686728, "grad_norm": 1.2397020628888196, "learning_rate": 2.079500979640145e-06, "log_odds_chosen": 0.7727295160293579, "log_odds_ratio": -0.552929699420929, "logits/chosen": -1.0720703601837158, "logits/rejected": -0.9175781011581421, "logps/chosen": -0.793749988079071, "logps/rejected": -1.335546851158142, "loss": 1.0854, "nll_loss": 1.109960913658142, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.07935790717601776, "rewards/margins": 0.05418701097369194, "rewards/rejected": -0.133544921875, "step": 1480 }, { "epoch": 0.11315739510157585, "grad_norm": 1.27579562820246, "learning_rate": 2.072511040603359e-06, "log_odds_chosen": 1.2195312976837158, "log_odds_ratio": -0.40800780057907104, "logits/chosen": -1.114648461341858, "logits/rejected": -0.903515636920929, "logps/chosen": -0.738085925579071, "logps/rejected": -1.629296898841858, "loss": 1.0627, "nll_loss": 0.9974609613418579, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.07380370795726776, "rewards/margins": 0.08922119438648224, "rewards/rejected": -0.16306152939796448, "step": 1490 }, { "epoch": 0.11391684070628441, "grad_norm": 1.232946568201911, "learning_rate": 2.065591117977289e-06, "log_odds_chosen": 0.93798828125, "log_odds_ratio": -0.49467772245407104, "logits/chosen": -1.1025390625, "logits/rejected": -0.945507824420929, "logps/chosen": -0.7837890386581421, "logps/rejected": -1.437890648841858, "loss": 1.0712, "nll_loss": 0.9839843511581421, "rewards/accuracies": 0.75, "rewards/chosen": -0.07833252102136612, "rewards/margins": 0.06526489555835724, "rewards/rejected": -0.14365234971046448, "step": 1500 }, { "epoch": 0.11467628631099297, "grad_norm": 1.3823867901051201, "learning_rate": 2.058740050619915e-06, "log_odds_chosen": 1.0791747570037842, "log_odds_ratio": -0.4610351622104645, "logits/chosen": -1.0734374523162842, "logits/rejected": -0.8861328363418579, "logps/chosen": -0.7730468511581421, "logps/rejected": -1.5203125476837158, "loss": 1.0967, "nll_loss": 1.106835961341858, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.07729492336511612, "rewards/margins": 0.07477875053882599, "rewards/rejected": -0.1519775390625, "step": 1510 }, { "epoch": 0.11543573191570154, "grad_norm": 1.2762947266436673, "learning_rate": 2.0519567041703083e-06, "log_odds_chosen": 1.098388671875, "log_odds_ratio": -0.46074217557907104, "logits/chosen": -1.114843726158142, "logits/rejected": -0.908203125, "logps/chosen": -0.726757824420929, "logps/rejected": -1.5128905773162842, "loss": 1.0634, "nll_loss": 1.009667992591858, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.07266845554113388, "rewards/margins": 0.07855834811925888, "rewards/rejected": -0.15134277939796448, "step": 1520 }, { "epoch": 0.1161951775204101, "grad_norm": 1.2904647877767499, "learning_rate": 2.0452399702596544e-06, "log_odds_chosen": 0.9715820550918579, "log_odds_ratio": -0.4927734434604645, "logits/chosen": -1.0652344226837158, "logits/rejected": -0.908398449420929, "logps/chosen": -0.74462890625, "logps/rejected": -1.4482421875, "loss": 1.0518, "nll_loss": 1.001367211341858, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.074462890625, "rewards/margins": 0.07039184868335724, "rewards/rejected": -0.14484862983226776, "step": 1530 }, { "epoch": 0.11695462312511866, "grad_norm": 1.7526745738601224, "learning_rate": 2.0385887657505017e-06, "log_odds_chosen": 0.928051769733429, "log_odds_ratio": -0.519824206829071, "logits/chosen": -1.079492211341858, "logits/rejected": -0.9033203125, "logps/chosen": -0.7757812738418579, "logps/rejected": -1.4474608898162842, "loss": 1.0421, "nll_loss": 0.9775390625, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.07762451469898224, "rewards/margins": 0.06719360500574112, "rewards/rejected": -0.14472655951976776, "step": 1540 }, { "epoch": 0.11771406872982723, "grad_norm": 1.2611959181970236, "learning_rate": 2.032002032003048e-06, "log_odds_chosen": 0.92828369140625, "log_odds_ratio": -0.48320311307907104, "logits/chosen": -1.0431640148162842, "logits/rejected": -0.91015625, "logps/chosen": -0.700976550579071, "logps/rejected": -1.3595702648162842, "loss": 1.0685, "nll_loss": 1.004296898841858, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.07008056342601776, "rewards/margins": 0.06577148288488388, "rewards/rejected": -0.13591308891773224, "step": 1550 }, { "epoch": 0.11847351433453579, "grad_norm": 1.357984067045838, "learning_rate": 2.025478734167333e-06, "log_odds_chosen": 0.981738269329071, "log_odds_ratio": -0.4933105409145355, "logits/chosen": -1.0369141101837158, "logits/rejected": -0.8980468511581421, "logps/chosen": -0.7328125238418579, "logps/rejected": -1.4070312976837158, "loss": 1.0528, "nll_loss": 1.0265624523162842, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.07331542670726776, "rewards/margins": 0.06741943210363388, "rewards/rejected": -0.14069823920726776, "step": 1560 }, { "epoch": 0.11923295993924435, "grad_norm": 1.2679986443915143, "learning_rate": 2.0190178605002747e-06, "log_odds_chosen": 0.9727538824081421, "log_odds_ratio": -0.506152331829071, "logits/chosen": -1.0666015148162842, "logits/rejected": -0.912890613079071, "logps/chosen": -0.8070312738418579, "logps/rejected": -1.4904296398162842, "loss": 1.0836, "nll_loss": 1.0662109851837158, "rewards/accuracies": 0.75, "rewards/chosen": -0.08072509616613388, "rewards/margins": 0.06829528510570526, "rewards/rejected": -0.14895018935203552, "step": 1570 }, { "epoch": 0.11999240554395292, "grad_norm": 1.2914530876497048, "learning_rate": 2.0126184217065104e-06, "log_odds_chosen": 0.7563110589981079, "log_odds_ratio": -0.546875, "logits/chosen": -1.083593726158142, "logits/rejected": -0.964062511920929, "logps/chosen": -0.819531261920929, "logps/rejected": -1.337890625, "loss": 1.0929, "nll_loss": 1.047460913658142, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08203125, "rewards/margins": 0.05179748684167862, "rewards/rejected": -0.13381347060203552, "step": 1580 }, { "epoch": 0.12075185114866148, "grad_norm": 1.320687551380879, "learning_rate": 2.0062794503020765e-06, "log_odds_chosen": 1.0807616710662842, "log_odds_ratio": -0.44970703125, "logits/chosen": -1.080078125, "logits/rejected": -0.8687499761581421, "logps/chosen": -0.68603515625, "logps/rejected": -1.441796898841858, "loss": 1.0338, "nll_loss": 0.9560546875, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.06866455078125, "rewards/margins": 0.07553710788488388, "rewards/rejected": -0.14423827826976776, "step": 1590 }, { "epoch": 0.12151129675337004, "grad_norm": 1.307281494217591, "learning_rate": 2e-06, "log_odds_chosen": 0.940625011920929, "log_odds_ratio": -0.49199217557907104, "logits/chosen": -1.1404297351837158, "logits/rejected": -0.9624999761581421, "logps/chosen": -0.8050781488418579, "logps/rejected": -1.4513671398162842, "loss": 1.0685, "nll_loss": 1.034570336341858, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.08050537109375, "rewards/margins": 0.06459198147058487, "rewards/rejected": -0.14516600966453552, "step": 1600 }, { "epoch": 0.1222707423580786, "grad_norm": 1.0853545042060448, "learning_rate": 1.993779145116907e-06, "log_odds_chosen": 1.035888671875, "log_odds_ratio": -0.4886718690395355, "logits/chosen": -1.0583984851837158, "logits/rejected": -0.883593738079071, "logps/chosen": -0.762988269329071, "logps/rejected": -1.491601586341858, "loss": 1.0651, "nll_loss": 1.061914086341858, "rewards/accuracies": 0.71875, "rewards/chosen": -0.07635498046875, "rewards/margins": 0.0728759765625, "rewards/rejected": -0.14909668266773224, "step": 1610 }, { "epoch": 0.12303018796278717, "grad_norm": 1.4763029045220466, "learning_rate": 1.987615979999813e-06, "log_odds_chosen": 1.191308617591858, "log_odds_ratio": -0.4716796875, "logits/chosen": -1.0498046875, "logits/rejected": -0.892773449420929, "logps/chosen": -0.8033202886581421, "logps/rejected": -1.6886718273162842, "loss": 1.0642, "nll_loss": 1.052343726158142, "rewards/accuracies": 0.71875, "rewards/chosen": -0.080322265625, "rewards/margins": 0.08850707858800888, "rewards/rejected": -0.16879883408546448, "step": 1620 }, { "epoch": 0.12378963356749573, "grad_norm": 1.1831469452606171, "learning_rate": 1.9815096184722797e-06, "log_odds_chosen": 1.097558617591858, "log_odds_ratio": -0.47895509004592896, "logits/chosen": -1.083593726158142, "logits/rejected": -0.9429687261581421, "logps/chosen": -0.773632824420929, "logps/rejected": -1.546875, "loss": 1.0535, "nll_loss": 0.9878906011581421, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.07749023288488388, "rewards/margins": 0.07725143432617188, "rewards/rejected": -0.15488281846046448, "step": 1630 }, { "epoch": 0.12454907917220429, "grad_norm": 1.2794088100809082, "learning_rate": 1.9754591932991793e-06, "log_odds_chosen": 0.934374988079071, "log_odds_ratio": -0.4931640625, "logits/chosen": -1.043554663658142, "logits/rejected": -0.893750011920929, "logps/chosen": -0.752148449420929, "logps/rejected": -1.390625, "loss": 1.0507, "nll_loss": 1.036523461341858, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.07530517876148224, "rewards/margins": 0.06391601264476776, "rewards/rejected": -0.13911132514476776, "step": 1640 }, { "epoch": 0.12530852477691284, "grad_norm": 1.188181648425747, "learning_rate": 1.9694638556693235e-06, "log_odds_chosen": 0.9251953363418579, "log_odds_ratio": -0.4774414002895355, "logits/chosen": -1.1164062023162842, "logits/rejected": -0.9632812738418579, "logps/chosen": -0.6917968988418579, "logps/rejected": -1.3292968273162842, "loss": 1.0667, "nll_loss": 0.955859363079071, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.06917724758386612, "rewards/margins": 0.06374511867761612, "rewards/rejected": -0.13300780951976776, "step": 1650 }, { "epoch": 0.1260679703816214, "grad_norm": 1.2533980423478062, "learning_rate": 1.963522774695264e-06, "log_odds_chosen": 1.01904296875, "log_odds_ratio": -0.5342773199081421, "logits/chosen": -1.076757788658142, "logits/rejected": -0.8949218988418579, "logps/chosen": -0.745898425579071, "logps/rejected": -1.455664038658142, "loss": 1.0229, "nll_loss": 0.982617199420929, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.07464599609375, "rewards/margins": 0.07095947116613388, "rewards/rejected": -0.14553222060203552, "step": 1660 }, { "epoch": 0.12682741598633, "grad_norm": 1.190761475391612, "learning_rate": 1.9576351369295853e-06, "log_odds_chosen": 0.8369140625, "log_odds_ratio": -0.506787121295929, "logits/chosen": -1.115625023841858, "logits/rejected": -0.950976550579071, "logps/chosen": -0.7376953363418579, "logps/rejected": -1.2978515625, "loss": 1.0583, "nll_loss": 0.9857422113418579, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.07370605319738388, "rewards/margins": 0.05601654201745987, "rewards/rejected": -0.1298828125, "step": 1670 }, { "epoch": 0.12758686159103855, "grad_norm": 1.2242022478309555, "learning_rate": 1.951800145897066e-06, "log_odds_chosen": 1.0771484375, "log_odds_ratio": -0.4736328125, "logits/chosen": -1.072265625, "logits/rejected": -0.9068359136581421, "logps/chosen": -0.7447265386581421, "logps/rejected": -1.5109374523162842, "loss": 1.062, "nll_loss": 0.9644531011581421, "rewards/accuracies": 0.71875, "rewards/chosen": -0.07449951022863388, "rewards/margins": 0.07666168361902237, "rewards/rejected": -0.15109863877296448, "step": 1680 }, { "epoch": 0.1283463071957471, "grad_norm": 5.056235717527065, "learning_rate": 1.9460170216420797e-06, "log_odds_chosen": 0.963183581829071, "log_odds_ratio": -0.5045410394668579, "logits/chosen": -1.084570288658142, "logits/rejected": -0.9453125, "logps/chosen": -0.72265625, "logps/rejected": -1.3748047351837158, "loss": 1.0556, "nll_loss": 0.9583984613418579, "rewards/accuracies": 0.71875, "rewards/chosen": -0.07219238579273224, "rewards/margins": 0.06519164890050888, "rewards/rejected": -0.13747557997703552, "step": 1690 }, { "epoch": 0.12910575280045566, "grad_norm": 1.4721198466662053, "learning_rate": 1.9402850002906637e-06, "log_odds_chosen": 1.386328101158142, "log_odds_ratio": -0.42045897245407104, "logits/chosen": -1.131250023841858, "logits/rejected": -0.9134765863418579, "logps/chosen": -0.716015636920929, "logps/rejected": -1.734765648841858, "loss": 1.0539, "nll_loss": 0.9761718511581421, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.07159423828125, "rewards/margins": 0.10196533054113388, "rewards/rejected": -0.17348632216453552, "step": 1700 }, { "epoch": 0.12986519840516422, "grad_norm": 1.414534011652381, "learning_rate": 1.9346033336266974e-06, "log_odds_chosen": 1.2606322765350342, "log_odds_ratio": -0.46049803495407104, "logits/chosen": -1.132421851158142, "logits/rejected": -0.919726550579071, "logps/chosen": -0.770703136920929, "logps/rejected": -1.666406273841858, "loss": 1.0551, "nll_loss": 1.032617211341858, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.07700195163488388, "rewards/margins": 0.089569091796875, "rewards/rejected": -0.16665038466453552, "step": 1710 }, { "epoch": 0.13062464400987278, "grad_norm": 1.2769933553924808, "learning_rate": 1.9289712886816486e-06, "log_odds_chosen": 1.0386230945587158, "log_odds_ratio": -0.4837890565395355, "logits/chosen": -1.191992163658142, "logits/rejected": -0.947460949420929, "logps/chosen": -0.728320300579071, "logps/rejected": -1.409765601158142, "loss": 1.0692, "nll_loss": 1.012109398841858, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.07283935695886612, "rewards/margins": 0.06811676174402237, "rewards/rejected": -0.14113768935203552, "step": 1720 }, { "epoch": 0.13138408961458137, "grad_norm": 1.2393540972836636, "learning_rate": 1.92338814733738e-06, "log_odds_chosen": 1.1072266101837158, "log_odds_ratio": -0.49370115995407104, "logits/chosen": -1.173242211341858, "logits/rejected": -0.9476562738418579, "logps/chosen": -0.766406238079071, "logps/rejected": -1.560937523841858, "loss": 1.0494, "nll_loss": 0.9595702886581421, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.07672119140625, "rewards/margins": 0.07949218899011612, "rewards/rejected": -0.15610352158546448, "step": 1730 }, { "epoch": 0.13214353521928993, "grad_norm": 1.2594151981819466, "learning_rate": 1.9178532059415367e-06, "log_odds_chosen": 0.9674072265625, "log_odds_ratio": -0.5182129144668579, "logits/chosen": -1.0431640148162842, "logits/rejected": -0.917187511920929, "logps/chosen": -0.73779296875, "logps/rejected": -1.414453148841858, "loss": 1.0795, "nll_loss": 1.054296851158142, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.07375488430261612, "rewards/margins": 0.06772308051586151, "rewards/rejected": -0.14138182997703552, "step": 1740 }, { "epoch": 0.13290298082399848, "grad_norm": 1.1836633572672954, "learning_rate": 1.9123657749350298e-06, "log_odds_chosen": 1.07861328125, "log_odds_ratio": -0.4427246153354645, "logits/chosen": -1.142187476158142, "logits/rejected": -0.9248046875, "logps/chosen": -0.7298828363418579, "logps/rejected": -1.479882836341858, "loss": 1.0469, "nll_loss": 1.0076172351837158, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.07291259616613388, "rewards/margins": 0.07512207329273224, "rewards/rejected": -0.14799804985523224, "step": 1750 }, { "epoch": 0.13366242642870704, "grad_norm": 1.3431817981962466, "learning_rate": 1.9069251784911844e-06, "log_odds_chosen": 0.9281250238418579, "log_odds_ratio": -0.4989257752895355, "logits/chosen": -1.139257788658142, "logits/rejected": -0.904101550579071, "logps/chosen": -0.760546863079071, "logps/rejected": -1.4015624523162842, "loss": 1.049, "nll_loss": 0.9913085699081421, "rewards/accuracies": 0.71875, "rewards/chosen": -0.07613525539636612, "rewards/margins": 0.06413574516773224, "rewards/rejected": -0.1402587890625, "step": 1760 }, { "epoch": 0.1344218720334156, "grad_norm": 1.3848227272242595, "learning_rate": 1.9015307541661132e-06, "log_odds_chosen": 0.8980712890625, "log_odds_ratio": -0.517578125, "logits/chosen": -1.031640648841858, "logits/rejected": -0.9320312738418579, "logps/chosen": -0.7757812738418579, "logps/rejected": -1.396484375, "loss": 1.0563, "nll_loss": 1.042578101158142, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.07752685248851776, "rewards/margins": 0.06197052076458931, "rewards/rejected": -0.13955077528953552, "step": 1770 }, { "epoch": 0.13518131763812416, "grad_norm": 1.3873240073043387, "learning_rate": 1.8961818525599089e-06, "log_odds_chosen": 0.825878918170929, "log_odds_ratio": -0.5110839605331421, "logits/chosen": -1.165429711341858, "logits/rejected": -0.943359375, "logps/chosen": -0.7607421875, "logps/rejected": -1.3078124523162842, "loss": 1.0678, "nll_loss": 1.0027344226837158, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.07609863579273224, "rewards/margins": 0.05476074293255806, "rewards/rejected": -0.13083496689796448, "step": 1780 }, { "epoch": 0.13594076324283272, "grad_norm": 1.3673153653847987, "learning_rate": 1.890877836988262e-06, "log_odds_chosen": 1.073339819908142, "log_odds_ratio": -0.4710937440395355, "logits/chosen": -1.1251952648162842, "logits/rejected": -0.9449218511581421, "logps/chosen": -0.670117199420929, "logps/rejected": -1.400390625, "loss": 1.0437, "nll_loss": 0.9638671875, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.06693115085363388, "rewards/margins": 0.07293701171875, "rewards/rejected": -0.13991698622703552, "step": 1790 }, { "epoch": 0.1367002088475413, "grad_norm": 1.2926990363141364, "learning_rate": 1.8856180831641269e-06, "log_odds_chosen": 1.0564453601837158, "log_odds_ratio": -0.45556640625, "logits/chosen": -1.0203125476837158, "logits/rejected": -0.844531238079071, "logps/chosen": -0.7427734136581421, "logps/rejected": -1.4718749523162842, "loss": 1.0348, "nll_loss": 0.9740234613418579, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.07421875, "rewards/margins": 0.073089599609375, "rewards/rejected": -0.1473388671875, "step": 1800 }, { "epoch": 0.13745965445224986, "grad_norm": 1.779941311955198, "learning_rate": 1.8804019788890737e-06, "log_odds_chosen": 1.16845703125, "log_odds_ratio": -0.44677734375, "logits/chosen": -1.0703125, "logits/rejected": -0.916210949420929, "logps/chosen": -0.7298828363418579, "logps/rejected": -1.5792968273162842, "loss": 1.0297, "nll_loss": 1.0046875476837158, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.072998046875, "rewards/margins": 0.08490295708179474, "rewards/rejected": -0.15788574516773224, "step": 1810 }, { "epoch": 0.13821910005695842, "grad_norm": 1.1883417261634077, "learning_rate": 1.8752289237539816e-06, "log_odds_chosen": 0.725024402141571, "log_odds_ratio": -0.5721679925918579, "logits/chosen": -1.064062476158142, "logits/rejected": -0.9326171875, "logps/chosen": -0.7568359375, "logps/rejected": -1.265039086341858, "loss": 1.0461, "nll_loss": 0.981249988079071, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.07572021335363388, "rewards/margins": 0.05075225979089737, "rewards/rejected": -0.12653808295726776, "step": 1820 }, { "epoch": 0.13897854566166698, "grad_norm": 1.1696294554357165, "learning_rate": 1.8700983288487376e-06, "log_odds_chosen": 0.9774414300918579, "log_odds_ratio": -0.49638670682907104, "logits/chosen": -1.022851586341858, "logits/rejected": -0.8999999761581421, "logps/chosen": -0.763476550579071, "logps/rejected": -1.447265625, "loss": 1.0574, "nll_loss": 1.0320312976837158, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.07628174126148224, "rewards/margins": 0.06836853176355362, "rewards/rejected": -0.14479979872703552, "step": 1830 }, { "epoch": 0.13973799126637554, "grad_norm": 1.1745750858256916, "learning_rate": 1.8650096164806275e-06, "log_odds_chosen": 1.0646240711212158, "log_odds_ratio": -0.4706054627895355, "logits/chosen": -1.0382812023162842, "logits/rejected": -0.8890625238418579, "logps/chosen": -0.7481445074081421, "logps/rejected": -1.4775390625, "loss": 1.0572, "nll_loss": 0.9837890863418579, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.07479248195886612, "rewards/margins": 0.07286681979894638, "rewards/rejected": -0.14777831733226776, "step": 1840 }, { "epoch": 0.1404974368710841, "grad_norm": 1.5296106172783739, "learning_rate": 1.8599622199011084e-06, "log_odds_chosen": 0.969042956829071, "log_odds_ratio": -0.48271483182907104, "logits/chosen": -1.1326172351837158, "logits/rejected": -0.941210925579071, "logps/chosen": -0.728320300579071, "logps/rejected": -1.391210913658142, "loss": 1.0394, "nll_loss": 0.9058593511581421, "rewards/accuracies": 0.75, "rewards/chosen": -0.07282714545726776, "rewards/margins": 0.066314697265625, "rewards/rejected": -0.13911132514476776, "step": 1850 }, { "epoch": 0.14125688247579268, "grad_norm": 1.5599352267722284, "learning_rate": 1.854955583040673e-06, "log_odds_chosen": 0.9144531488418579, "log_odds_ratio": -0.524609386920929, "logits/chosen": -1.1330077648162842, "logits/rejected": -0.935742199420929, "logps/chosen": -0.8060547113418579, "logps/rejected": -1.466406226158142, "loss": 1.0461, "nll_loss": 1.0304687023162842, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.08063964545726776, "rewards/margins": 0.06599120795726776, "rewards/rejected": -0.14665527641773224, "step": 1860 }, { "epoch": 0.14201632808050124, "grad_norm": 1.3505028851048735, "learning_rate": 1.849989160251521e-06, "log_odds_chosen": 1.1062500476837158, "log_odds_ratio": -0.4708496034145355, "logits/chosen": -1.0867187976837158, "logits/rejected": -0.869335949420929, "logps/chosen": -0.696972668170929, "logps/rejected": -1.477929711341858, "loss": 1.0577, "nll_loss": 1.0310547351837158, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.06972656399011612, "rewards/margins": 0.07805633544921875, "rewards/rejected": -0.14780274033546448, "step": 1870 }, { "epoch": 0.1427757736852098, "grad_norm": 1.3468713785909394, "learning_rate": 1.8450624160577701e-06, "log_odds_chosen": 1.107324242591858, "log_odds_ratio": -0.4395507872104645, "logits/chosen": -1.0908203125, "logits/rejected": -0.8714843988418579, "logps/chosen": -0.7408202886581421, "logps/rejected": -1.510156273841858, "loss": 1.065, "nll_loss": 1.006445288658142, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.07403564453125, "rewards/margins": 0.076873779296875, "rewards/rejected": -0.15104980766773224, "step": 1880 }, { "epoch": 0.14353521928991836, "grad_norm": 1.4948141143409317, "learning_rate": 1.8401748249129445e-06, "log_odds_chosen": 0.968798816204071, "log_odds_ratio": -0.5001465082168579, "logits/chosen": -1.148046851158142, "logits/rejected": -0.988085925579071, "logps/chosen": -0.706347644329071, "logps/rejected": -1.349218726158142, "loss": 1.0351, "nll_loss": 0.929492175579071, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.07059326022863388, "rewards/margins": 0.06419982761144638, "rewards/rejected": -0.13486328721046448, "step": 1890 }, { "epoch": 0.14429466489462692, "grad_norm": 1.4929623734476563, "learning_rate": 1.8353258709644938e-06, "log_odds_chosen": 0.881298840045929, "log_odds_ratio": -0.527148425579071, "logits/chosen": -1.0732421875, "logits/rejected": -0.9664062261581421, "logps/chosen": -0.727343738079071, "logps/rejected": -1.3244140148162842, "loss": 1.0556, "nll_loss": 1.0261719226837158, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.07279052585363388, "rewards/margins": 0.05953216552734375, "rewards/rejected": -0.13229981064796448, "step": 1900 }, { "epoch": 0.14505411049933548, "grad_norm": 1.2395149585244087, "learning_rate": 1.830515047825102e-06, "log_odds_chosen": 1.09033203125, "log_odds_ratio": -0.48579102754592896, "logits/chosen": -1.0564453601837158, "logits/rejected": -0.876953125, "logps/chosen": -0.734570324420929, "logps/rejected": -1.5056641101837158, "loss": 1.0427, "nll_loss": 0.962890625, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.07346191257238388, "rewards/margins": 0.07697448879480362, "rewards/rejected": -0.15041503310203552, "step": 1910 }, { "epoch": 0.14581355610404403, "grad_norm": 1.2852958935084131, "learning_rate": 1.8257418583505536e-06, "log_odds_chosen": 1.035253882408142, "log_odds_ratio": -0.46064454317092896, "logits/chosen": -1.1867187023162842, "logits/rejected": -0.912109375, "logps/chosen": -0.703320324420929, "logps/rejected": -1.4054687023162842, "loss": 1.0706, "nll_loss": 0.9447265863418579, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.07033691555261612, "rewards/margins": 0.07026977837085724, "rewards/rejected": -0.14060059189796448, "step": 1920 }, { "epoch": 0.14657300170875262, "grad_norm": 1.2278330382597873, "learning_rate": 1.8210058144239416e-06, "log_odds_chosen": 0.8663085699081421, "log_odds_ratio": -0.5362793207168579, "logits/chosen": -1.091406226158142, "logits/rejected": -0.890820324420929, "logps/chosen": -0.7862304449081421, "logps/rejected": -1.397070288658142, "loss": 1.062, "nll_loss": 1.0783202648162842, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.07863769680261612, "rewards/margins": 0.06100921705365181, "rewards/rejected": -0.13974609971046448, "step": 1930 }, { "epoch": 0.14733244731346118, "grad_norm": 1.2850419362794372, "learning_rate": 1.816306436745999e-06, "log_odds_chosen": 1.213964819908142, "log_odds_ratio": -0.43608397245407104, "logits/chosen": -1.0974609851837158, "logits/rejected": -0.901562511920929, "logps/chosen": -0.766406238079071, "logps/rejected": -1.6238281726837158, "loss": 1.0512, "nll_loss": 0.986328125, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -0.07664795219898224, "rewards/margins": 0.08583984524011612, "rewards/rejected": -0.16252441704273224, "step": 1940 }, { "epoch": 0.14809189291816974, "grad_norm": 1.4408392395524106, "learning_rate": 1.8116432546313529e-06, "log_odds_chosen": 0.9551025629043579, "log_odds_ratio": -0.4977050721645355, "logits/chosen": -1.0802733898162842, "logits/rejected": -0.9701172113418579, "logps/chosen": -0.7474609613418579, "logps/rejected": -1.3630859851837158, "loss": 1.0581, "nll_loss": 1.036718726158142, "rewards/accuracies": 0.71875, "rewards/chosen": -0.07478027045726776, "rewards/margins": 0.06166229397058487, "rewards/rejected": -0.13645020127296448, "step": 1950 }, { "epoch": 0.1488513385228783, "grad_norm": 1.19172744631118, "learning_rate": 1.8070158058105026e-06, "log_odds_chosen": 1.1218750476837158, "log_odds_ratio": -0.4734863340854645, "logits/chosen": -1.0808594226837158, "logits/rejected": -0.9712890386581421, "logps/chosen": -0.735156238079071, "logps/rejected": -1.5304687023162842, "loss": 1.06, "nll_loss": 1.057226538658142, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.07349853217601776, "rewards/margins": 0.07966308295726776, "rewards/rejected": -0.15317383408546448, "step": 1960 }, { "epoch": 0.14961078412758685, "grad_norm": 1.2535965157717082, "learning_rate": 1.8024236362373315e-06, "log_odds_chosen": 1.049902319908142, "log_odds_ratio": -0.47636717557907104, "logits/chosen": -1.081445336341858, "logits/rejected": -0.9453125, "logps/chosen": -0.719921886920929, "logps/rejected": -1.4714844226837158, "loss": 1.0463, "nll_loss": 1.0119140148162842, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.07188721001148224, "rewards/margins": 0.075347900390625, "rewards/rejected": -0.147216796875, "step": 1970 }, { "epoch": 0.1503702297322954, "grad_norm": 1.4552800220246505, "learning_rate": 1.7978662999019787e-06, "log_odds_chosen": 0.998669445514679, "log_odds_ratio": -0.47114259004592896, "logits/chosen": -1.054296851158142, "logits/rejected": -0.914843738079071, "logps/chosen": -0.756640613079071, "logps/rejected": -1.4705078601837158, "loss": 1.0631, "nll_loss": 0.9644531011581421, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.07573242485523224, "rewards/margins": 0.07139129936695099, "rewards/rejected": -0.14702148735523224, "step": 1980 }, { "epoch": 0.151129675337004, "grad_norm": 1.2727962283857737, "learning_rate": 1.793343358648881e-06, "log_odds_chosen": 1.006250023841858, "log_odds_ratio": -0.5054687261581421, "logits/chosen": -1.0421874523162842, "logits/rejected": -0.926562488079071, "logps/chosen": -0.767578125, "logps/rejected": -1.5050780773162842, "loss": 1.0706, "nll_loss": 0.955761730670929, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.0767822265625, "rewards/margins": 0.07363281399011612, "rewards/rejected": -0.1505126953125, "step": 1990 }, { "epoch": 0.15188912094171256, "grad_norm": 1.2838008109251926, "learning_rate": 1.7888543819998317e-06, "log_odds_chosen": 1.184472680091858, "log_odds_ratio": -0.44340819120407104, "logits/chosen": -1.127343773841858, "logits/rejected": -0.925585925579071, "logps/chosen": -0.768750011920929, "logps/rejected": -1.6320312023162842, "loss": 1.0238, "nll_loss": 1.035546898841858, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.07687988132238388, "rewards/margins": 0.08639679104089737, "rewards/rejected": -0.16325683891773224, "step": 2000 }, { "epoch": 0.15264856654642112, "grad_norm": 1.229047981661124, "learning_rate": 1.7843989469818819e-06, "log_odds_chosen": 0.884570300579071, "log_odds_ratio": -0.490234375, "logits/chosen": -1.067773461341858, "logits/rejected": -0.8929687738418579, "logps/chosen": -0.70654296875, "logps/rejected": -1.283203125, "loss": 1.0322, "nll_loss": 1.0009765625, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.07066650688648224, "rewards/margins": 0.05765380710363388, "rewards/rejected": -0.1282958984375, "step": 2010 }, { "epoch": 0.15340801215112967, "grad_norm": 1.42523091190382, "learning_rate": 1.779976637959939e-06, "log_odds_chosen": 1.125646948814392, "log_odds_ratio": -0.4581054747104645, "logits/chosen": -1.135351538658142, "logits/rejected": -0.9429687261581421, "logps/chosen": -0.789257824420929, "logps/rejected": -1.6007812023162842, "loss": 1.0481, "nll_loss": 0.991015613079071, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.07890625298023224, "rewards/margins": 0.08111725002527237, "rewards/rejected": -0.16000977158546448, "step": 2020 }, { "epoch": 0.15416745775583823, "grad_norm": 1.4010944258424316, "learning_rate": 1.7755870464739012e-06, "log_odds_chosen": 0.8477538824081421, "log_odds_ratio": -0.53076171875, "logits/chosen": -1.0203125476837158, "logits/rejected": -0.9306640625, "logps/chosen": -0.7679687738418579, "logps/rejected": -1.369140625, "loss": 1.0313, "nll_loss": 0.997265636920929, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.07681884616613388, "rewards/margins": 0.06008300930261612, "rewards/rejected": -0.13681641221046448, "step": 2030 }, { "epoch": 0.1549269033605468, "grad_norm": 1.0718192134660034, "learning_rate": 1.7712297710801907e-06, "log_odds_chosen": 0.787304699420929, "log_odds_ratio": -0.541699230670929, "logits/chosen": -1.1611328125, "logits/rejected": -0.9720703363418579, "logps/chosen": -0.7505859136581421, "logps/rejected": -1.294921875, "loss": 1.0326, "nll_loss": 0.943359375, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.07510986179113388, "rewards/margins": 0.05443725734949112, "rewards/rejected": -0.12954100966453552, "step": 2040 }, { "epoch": 0.15568634896525535, "grad_norm": 1.4910441101664211, "learning_rate": 1.7669044171975444e-06, "log_odds_chosen": 1.170556664466858, "log_odds_ratio": -0.46171873807907104, "logits/chosen": -1.1105468273162842, "logits/rejected": -0.9017578363418579, "logps/chosen": -0.798828125, "logps/rejected": -1.6443359851837158, "loss": 1.0272, "nll_loss": 1.0271484851837158, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.07991943508386612, "rewards/margins": 0.08437500149011612, "rewards/rejected": -0.164306640625, "step": 2050 }, { "epoch": 0.15644579456996394, "grad_norm": 1.3055818619383102, "learning_rate": 1.7626105969569268e-06, "log_odds_chosen": 0.760302722454071, "log_odds_ratio": -0.5228515863418579, "logits/chosen": -1.0968749523162842, "logits/rejected": -0.930468738079071, "logps/chosen": -0.7056640386581421, "logps/rejected": -1.1994140148162842, "loss": 1.056, "nll_loss": 0.9691406488418579, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.07059326022863388, "rewards/margins": 0.04939422756433487, "rewards/rejected": -0.11997070163488388, "step": 2060 }, { "epoch": 0.1572052401746725, "grad_norm": 1.2103021449175961, "learning_rate": 1.758347929055432e-06, "log_odds_chosen": 1.1057617664337158, "log_odds_ratio": -0.4786132872104645, "logits/chosen": -1.0773437023162842, "logits/rejected": -0.931640625, "logps/chosen": -0.734570324420929, "logps/rejected": -1.5115234851837158, "loss": 1.0328, "nll_loss": 0.9671875238418579, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.07349853217601776, "rewards/margins": 0.0777435302734375, "rewards/rejected": -0.15126952528953552, "step": 2070 }, { "epoch": 0.15796468577938105, "grad_norm": 1.4500487610211543, "learning_rate": 1.7541160386140582e-06, "log_odds_chosen": 1.068115234375, "log_odds_ratio": -0.46293944120407104, "logits/chosen": -1.100976586341858, "logits/rejected": -0.9248046875, "logps/chosen": -0.74462890625, "logps/rejected": -1.500390648841858, "loss": 1.0271, "nll_loss": 0.980664074420929, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.07445068657398224, "rewards/margins": 0.07553710788488388, "rewards/rejected": -0.1500244140625, "step": 2080 }, { "epoch": 0.1587241313840896, "grad_norm": 1.3847092777533436, "learning_rate": 1.7499145570392284e-06, "log_odds_chosen": 1.379553198814392, "log_odds_ratio": -0.4126953184604645, "logits/chosen": -1.1023437976837158, "logits/rejected": -0.9087890386581421, "logps/chosen": -0.6749023199081421, "logps/rejected": -1.63671875, "loss": 1.0082, "nll_loss": 0.9505859613418579, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.06751708686351776, "rewards/margins": 0.09615478664636612, "rewards/rejected": -0.16357421875, "step": 2090 }, { "epoch": 0.15948357698879817, "grad_norm": 1.2255231767620076, "learning_rate": 1.745743121887939e-06, "log_odds_chosen": 0.986035168170929, "log_odds_ratio": -0.482421875, "logits/chosen": -1.1619141101837158, "logits/rejected": -0.931835949420929, "logps/chosen": -0.7486327886581421, "logps/rejected": -1.4064452648162842, "loss": 1.0217, "nll_loss": 0.936718761920929, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.07491455227136612, "rewards/margins": 0.06561279296875, "rewards/rejected": -0.14052733778953552, "step": 2100 }, { "epoch": 0.16024302259350673, "grad_norm": 1.3181656178772425, "learning_rate": 1.7416013767364324e-06, "log_odds_chosen": 1.384374976158142, "log_odds_ratio": -0.4287109375, "logits/chosen": -1.067968726158142, "logits/rejected": -0.9115234613418579, "logps/chosen": -0.722460925579071, "logps/rejected": -1.7578125, "loss": 1.0297, "nll_loss": 0.8949218988418579, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.07222900539636612, "rewards/margins": 0.10349120944738388, "rewards/rejected": -0.17561034858226776, "step": 2110 }, { "epoch": 0.16100246819821531, "grad_norm": 1.5457541625315203, "learning_rate": 1.7374889710522776e-06, "log_odds_chosen": 1.0802733898162842, "log_odds_ratio": -0.5318359136581421, "logits/chosen": -1.0861327648162842, "logits/rejected": -0.8974609375, "logps/chosen": -0.7718750238418579, "logps/rejected": -1.564453125, "loss": 1.051, "nll_loss": 1.0001952648162842, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.0771484375, "rewards/margins": 0.079376220703125, "rewards/rejected": -0.15654297173023224, "step": 2120 }, { "epoch": 0.16176191380292387, "grad_norm": 1.4443034601091194, "learning_rate": 1.7334055600697579e-06, "log_odds_chosen": 0.991943359375, "log_odds_ratio": -0.4903320372104645, "logits/chosen": -1.003320336341858, "logits/rejected": -0.901171863079071, "logps/chosen": -0.8392578363418579, "logps/rejected": -1.5167968273162842, "loss": 1.0497, "nll_loss": 0.9964843988418579, "rewards/accuracies": 0.71875, "rewards/chosen": -0.08403320610523224, "rewards/margins": 0.06776122748851776, "rewards/rejected": -0.15180663764476776, "step": 2130 }, { "epoch": 0.16252135940763243, "grad_norm": 1.3490237339510445, "learning_rate": 1.7293508046684678e-06, "log_odds_chosen": 1.081201195716858, "log_odds_ratio": -0.46977537870407104, "logits/chosen": -1.108789086341858, "logits/rejected": -0.9175781011581421, "logps/chosen": -0.739062488079071, "logps/rejected": -1.5068359375, "loss": 1.0271, "nll_loss": 1.0710937976837158, "rewards/accuracies": 0.75, "rewards/chosen": -0.07393798977136612, "rewards/margins": 0.07680054008960724, "rewards/rejected": -0.15070800483226776, "step": 2140 }, { "epoch": 0.163280805012341, "grad_norm": 1.2944967013194153, "learning_rate": 1.7253243712550145e-06, "log_odds_chosen": 1.036718726158142, "log_odds_ratio": -0.4677734375, "logits/chosen": -1.120703101158142, "logits/rejected": -0.931640625, "logps/chosen": -0.773730456829071, "logps/rejected": -1.513281226158142, "loss": 1.0164, "nll_loss": 0.9820312261581421, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.07733154296875, "rewards/margins": 0.0738677978515625, "rewards/rejected": -0.15122070908546448, "step": 2150 }, { "epoch": 0.16404025061704955, "grad_norm": 1.3046998450508367, "learning_rate": 1.7213259316477406e-06, "log_odds_chosen": 0.760937511920929, "log_odds_ratio": -0.5547851324081421, "logits/chosen": -1.0556640625, "logits/rejected": -0.946484386920929, "logps/chosen": -0.771289050579071, "logps/rejected": -1.2919921875, "loss": 1.0454, "nll_loss": 1.010156273841858, "rewards/accuracies": 0.6875, "rewards/chosen": -0.07705078274011612, "rewards/margins": 0.052097320556640625, "rewards/rejected": -0.12924805283546448, "step": 2160 }, { "epoch": 0.1647996962217581, "grad_norm": 1.452242202257246, "learning_rate": 1.7173551629643674e-06, "log_odds_chosen": 1.1220703125, "log_odds_ratio": -0.511523425579071, "logits/chosen": -1.100195288658142, "logits/rejected": -0.9410156011581421, "logps/chosen": -0.7666015625, "logps/rejected": -1.6337890625, "loss": 1.0117, "nll_loss": 0.951953113079071, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.0765380859375, "rewards/margins": 0.08681335300207138, "rewards/rejected": -0.16335448622703552, "step": 2170 }, { "epoch": 0.16555914182646667, "grad_norm": 1.704373959908454, "learning_rate": 1.713411747512477e-06, "log_odds_chosen": 1.04052734375, "log_odds_ratio": -0.5135742425918579, "logits/chosen": -1.117578148841858, "logits/rejected": -0.920703113079071, "logps/chosen": -0.692187488079071, "logps/rejected": -1.4246094226837158, "loss": 1.0367, "nll_loss": 0.964648425579071, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.06920166313648224, "rewards/margins": 0.0731201171875, "rewards/rejected": -0.142578125, "step": 2180 }, { "epoch": 0.16631858743117525, "grad_norm": 1.517651811080025, "learning_rate": 1.709495372682753e-06, "log_odds_chosen": 1.2109375, "log_odds_ratio": -0.4371093809604645, "logits/chosen": -1.127343773841858, "logits/rejected": -0.982226550579071, "logps/chosen": -0.751953125, "logps/rejected": -1.647851586341858, "loss": 1.0295, "nll_loss": 1.001953125, "rewards/accuracies": 0.78125, "rewards/chosen": -0.07518310844898224, "rewards/margins": 0.08953247219324112, "rewards/rejected": -0.164794921875, "step": 2190 }, { "epoch": 0.1670780330358838, "grad_norm": 1.5051850147274737, "learning_rate": 1.7056057308448832e-06, "log_odds_chosen": 1.158300757408142, "log_odds_ratio": -0.48115235567092896, "logits/chosen": -1.1003906726837158, "logits/rejected": -0.9369140863418579, "logps/chosen": -0.7010742425918579, "logps/rejected": -1.514062523841858, "loss": 1.0457, "nll_loss": 0.963085949420929, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.070068359375, "rewards/margins": 0.08127593994140625, "rewards/rejected": -0.1513671875, "step": 2200 }, { "epoch": 0.16783747864059237, "grad_norm": 1.3687701042117044, "learning_rate": 1.701742519246068e-06, "log_odds_chosen": 1.101904273033142, "log_odds_ratio": -0.500195324420929, "logits/chosen": -1.1736328601837158, "logits/rejected": -0.984375, "logps/chosen": -0.7425781488418579, "logps/rejected": -1.4931640625, "loss": 1.0225, "nll_loss": 0.9330078363418579, "rewards/accuracies": 0.75, "rewards/chosen": -0.07423095405101776, "rewards/margins": 0.07521667331457138, "rewards/rejected": -0.14926758408546448, "step": 2210 }, { "epoch": 0.16859692424530093, "grad_norm": 1.723744503708798, "learning_rate": 1.6979054399120355e-06, "log_odds_chosen": 0.8772217035293579, "log_odds_ratio": -0.514843761920929, "logits/chosen": -1.129296898841858, "logits/rejected": -0.9527343511581421, "logps/chosen": -0.7265625, "logps/rejected": -1.3224608898162842, "loss": 1.0475, "nll_loss": 0.975390613079071, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.07271728664636612, "rewards/margins": 0.059500884264707565, "rewards/rejected": -0.13222655653953552, "step": 2220 }, { "epoch": 0.16935636985000949, "grad_norm": 1.4282182519929785, "learning_rate": 1.6940941995505069e-06, "log_odds_chosen": 0.887402355670929, "log_odds_ratio": -0.549511730670929, "logits/chosen": -1.075585961341858, "logits/rejected": -0.965039074420929, "logps/chosen": -0.7665039300918579, "logps/rejected": -1.3660156726837158, "loss": 1.0481, "nll_loss": 0.9765625, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.07662353664636612, "rewards/margins": 0.06000366061925888, "rewards/rejected": -0.13671875, "step": 2230 }, { "epoch": 0.17011581545471804, "grad_norm": 1.3515584039391881, "learning_rate": 1.6903085094570331e-06, "log_odds_chosen": 1.185461401939392, "log_odds_ratio": -0.44868165254592896, "logits/chosen": -1.078125, "logits/rejected": -0.918164074420929, "logps/chosen": -0.691601574420929, "logps/rejected": -1.5216796398162842, "loss": 1.0246, "nll_loss": 0.9400390386581421, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.069091796875, "rewards/margins": 0.08315811306238174, "rewards/rejected": -0.15219727158546448, "step": 2240 }, { "epoch": 0.17087526105942663, "grad_norm": 1.7983862082406363, "learning_rate": 1.6865480854231356e-06, "log_odds_chosen": 1.1281249523162842, "log_odds_ratio": -0.43486326932907104, "logits/chosen": -1.118554711341858, "logits/rejected": -0.8978515863418579, "logps/chosen": -0.6834961175918579, "logps/rejected": -1.425390601158142, "loss": 1.0279, "nll_loss": 0.929882824420929, "rewards/accuracies": 0.78125, "rewards/chosen": -0.06834717094898224, "rewards/margins": 0.07416381686925888, "rewards/rejected": -0.14252929389476776, "step": 2250 }, { "epoch": 0.1716347066641352, "grad_norm": 1.5497690081445343, "learning_rate": 1.682812647646685e-06, "log_odds_chosen": 0.9833984375, "log_odds_ratio": -0.4872070252895355, "logits/chosen": -1.0832030773162842, "logits/rejected": -0.9341796636581421, "logps/chosen": -0.7431640625, "logps/rejected": -1.4109375476837158, "loss": 1.0101, "nll_loss": 0.948437511920929, "rewards/accuracies": 0.71875, "rewards/chosen": -0.0743408203125, "rewards/margins": 0.06674804538488388, "rewards/rejected": -0.14106445014476776, "step": 2260 }, { "epoch": 0.17239415226884375, "grad_norm": 1.4169437787305321, "learning_rate": 1.6791019206444541e-06, "log_odds_chosen": 1.302148461341858, "log_odds_ratio": -0.38823240995407104, "logits/chosen": -1.115820288658142, "logits/rejected": -0.9019531011581421, "logps/chosen": -0.684765636920929, "logps/rejected": -1.6121094226837158, "loss": 1.0292, "nll_loss": 0.9810546636581421, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.0684814453125, "rewards/margins": 0.09268798679113388, "rewards/rejected": -0.16123047471046448, "step": 2270 }, { "epoch": 0.1731535978735523, "grad_norm": 1.4214154869032998, "learning_rate": 1.675415633166782e-06, "log_odds_chosen": 1.1916015148162842, "log_odds_ratio": -0.48198240995407104, "logits/chosen": -1.096289038658142, "logits/rejected": -0.88671875, "logps/chosen": -0.760546863079071, "logps/rejected": -1.599023461341858, "loss": 1.0476, "nll_loss": 0.9966796636581421, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.07607422024011612, "rewards/margins": 0.0839080810546875, "rewards/rejected": -0.159912109375, "step": 2280 }, { "epoch": 0.17391304347826086, "grad_norm": 1.5714646534516068, "learning_rate": 1.6717535181142914e-06, "log_odds_chosen": 0.769274890422821, "log_odds_ratio": -0.5411621332168579, "logits/chosen": -1.092187523841858, "logits/rejected": -0.978515625, "logps/chosen": -0.7583984136581421, "logps/rejected": -1.275781273841858, "loss": 1.0559, "nll_loss": 1.0068359375, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.07584228366613388, "rewards/margins": 0.05177917331457138, "rewards/rejected": -0.12763671576976776, "step": 2290 }, { "epoch": 0.17467248908296942, "grad_norm": 1.3906808108466515, "learning_rate": 1.668115312456598e-06, "log_odds_chosen": 1.104882836341858, "log_odds_ratio": -0.45341795682907104, "logits/chosen": -1.0654296875, "logits/rejected": -0.951367199420929, "logps/chosen": -0.741992175579071, "logps/rejected": -1.504296898841858, "loss": 1.0311, "nll_loss": 1.004492163658142, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.07419433444738388, "rewards/margins": 0.07623291015625, "rewards/rejected": -0.150390625, "step": 2300 }, { "epoch": 0.17543193468767798, "grad_norm": 1.3595463589756867, "learning_rate": 1.6645007571529578e-06, "log_odds_chosen": 1.088281273841858, "log_odds_ratio": -0.4876464903354645, "logits/chosen": -1.053125023841858, "logits/rejected": -0.943359375, "logps/chosen": -0.7705078125, "logps/rejected": -1.549414038658142, "loss": 1.0248, "nll_loss": 1.010644555091858, "rewards/accuracies": 0.71875, "rewards/chosen": -0.07711181789636612, "rewards/margins": 0.07790832221508026, "rewards/rejected": -0.15493163466453552, "step": 2310 }, { "epoch": 0.17619138029238657, "grad_norm": 1.4114020499902358, "learning_rate": 1.6609095970747992e-06, "log_odds_chosen": 1.150390625, "log_odds_ratio": -0.48417967557907104, "logits/chosen": -1.1541016101837158, "logits/rejected": -0.9486328363418579, "logps/chosen": -0.8031250238418579, "logps/rejected": -1.689453125, "loss": 1.015, "nll_loss": 1.004492163658142, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.08027343451976776, "rewards/margins": 0.08864746242761612, "rewards/rejected": -0.16889648139476776, "step": 2320 }, { "epoch": 0.17695082589709513, "grad_norm": 1.393187466581412, "learning_rate": 1.6573415809300833e-06, "log_odds_chosen": 1.2024414539337158, "log_odds_ratio": -0.4300293028354645, "logits/chosen": -1.0919921398162842, "logits/rejected": -0.918749988079071, "logps/chosen": -0.711718738079071, "logps/rejected": -1.5851562023162842, "loss": 1.0107, "nll_loss": 0.964062511920929, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.07110595703125, "rewards/margins": 0.08736572414636612, "rewards/rejected": -0.1585693359375, "step": 2330 }, { "epoch": 0.17771027150180368, "grad_norm": 2.029312352243631, "learning_rate": 1.6537964611894462e-06, "log_odds_chosen": 1.013330101966858, "log_odds_ratio": -0.515332043170929, "logits/chosen": -1.0654296875, "logits/rejected": -0.887499988079071, "logps/chosen": -0.8050781488418579, "logps/rejected": -1.560546875, "loss": 1.0261, "nll_loss": 0.9818359613418579, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.08050537109375, "rewards/margins": 0.07549743354320526, "rewards/rejected": -0.15610352158546448, "step": 2340 }, { "epoch": 0.17846971710651224, "grad_norm": 1.3114345385494288, "learning_rate": 1.6502739940140692e-06, "log_odds_chosen": 0.9976562261581421, "log_odds_ratio": -0.4742187559604645, "logits/chosen": -1.0568358898162842, "logits/rejected": -0.8990234136581421, "logps/chosen": -0.7515624761581421, "logps/rejected": -1.413476586341858, "loss": 1.0269, "nll_loss": 0.9208984375, "rewards/accuracies": 0.75, "rewards/chosen": -0.07514648139476776, "rewards/margins": 0.06619872897863388, "rewards/rejected": -0.14140625298023224, "step": 2350 }, { "epoch": 0.1792291627112208, "grad_norm": 1.4048932317431904, "learning_rate": 1.6467739391852364e-06, "log_odds_chosen": 1.17431640625, "log_odds_ratio": -0.4451660215854645, "logits/chosen": -0.9830077886581421, "logits/rejected": -0.885937511920929, "logps/chosen": -0.744335949420929, "logps/rejected": -1.5654296875, "loss": 1.0438, "nll_loss": 1.0656249523162842, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.07454834133386612, "rewards/margins": 0.08200683444738388, "rewards/rejected": -0.156494140625, "step": 2360 }, { "epoch": 0.17998860831592936, "grad_norm": 1.4140466590428853, "learning_rate": 1.6432960600355221e-06, "log_odds_chosen": 1.089453101158142, "log_odds_ratio": -0.4378906190395355, "logits/chosen": -1.062890648841858, "logits/rejected": -0.936328113079071, "logps/chosen": -0.7796875238418579, "logps/rejected": -1.556640625, "loss": 1.0465, "nll_loss": 0.9878906011581421, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.07795409858226776, "rewards/margins": 0.077850341796875, "rewards/rejected": -0.15573731064796448, "step": 2370 }, { "epoch": 0.18074805392063795, "grad_norm": 1.4131599698548685, "learning_rate": 1.6398401233815756e-06, "log_odds_chosen": 0.913330078125, "log_odds_ratio": -0.50830078125, "logits/chosen": -1.087499976158142, "logits/rejected": -0.9404296875, "logps/chosen": -0.774609386920929, "logps/rejected": -1.37109375, "loss": 1.0151, "nll_loss": 0.94921875, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.07742919772863388, "rewards/margins": 0.0596771240234375, "rewards/rejected": -0.13720703125, "step": 2380 }, { "epoch": 0.1815074995253465, "grad_norm": 1.2593423446717298, "learning_rate": 1.6364058994584524e-06, "log_odds_chosen": 0.936145007610321, "log_odds_ratio": -0.47490233182907104, "logits/chosen": -1.1056640148162842, "logits/rejected": -0.9078124761581421, "logps/chosen": -0.7705078125, "logps/rejected": -1.4099609851837158, "loss": 1.0319, "nll_loss": 0.965039074420929, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.07708740234375, "rewards/margins": 0.06393890082836151, "rewards/rejected": -0.14108887314796448, "step": 2390 }, { "epoch": 0.18226694513005506, "grad_norm": 1.4830022326753702, "learning_rate": 1.6329931618554522e-06, "log_odds_chosen": 1.0566284656524658, "log_odds_ratio": -0.47368162870407104, "logits/chosen": -1.0343749523162842, "logits/rejected": -0.9457031488418579, "logps/chosen": -0.7265625, "logps/rejected": -1.486914038658142, "loss": 1.0277, "nll_loss": 0.976757824420929, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.07265625149011612, "rewards/margins": 0.076019287109375, "rewards/rejected": -0.14873047173023224, "step": 2400 }, { "epoch": 0.18302639073476362, "grad_norm": 1.5332525641485273, "learning_rate": 1.6296016874534209e-06, "log_odds_chosen": 0.999707043170929, "log_odds_ratio": -0.502148449420929, "logits/chosen": -1.1003906726837158, "logits/rejected": -0.949999988079071, "logps/chosen": -0.7437499761581421, "logps/rejected": -1.4455077648162842, "loss": 1.0184, "nll_loss": 1.015039086341858, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.07432861626148224, "rewards/margins": 0.07005615532398224, "rewards/rejected": -0.14448241889476776, "step": 2410 }, { "epoch": 0.18378583633947218, "grad_norm": 1.3461728382636295, "learning_rate": 1.6262312563634835e-06, "log_odds_chosen": 1.036376953125, "log_odds_ratio": -0.4654296934604645, "logits/chosen": -1.074804663658142, "logits/rejected": -0.9583984613418579, "logps/chosen": -0.7646484375, "logps/rejected": -1.483007788658142, "loss": 1.0012, "nll_loss": 0.9583984613418579, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.07645263522863388, "rewards/margins": 0.07184753566980362, "rewards/rejected": -0.14838866889476776, "step": 2420 }, { "epoch": 0.18454528194418074, "grad_norm": 1.5928196093040912, "learning_rate": 1.6228816518671587e-06, "log_odds_chosen": 1.077539086341858, "log_odds_ratio": -0.5020996332168579, "logits/chosen": -1.191992163658142, "logits/rejected": -1.0115234851837158, "logps/chosen": -0.765820324420929, "logps/rejected": -1.5263671875, "loss": 1.0272, "nll_loss": 0.970507800579071, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.07658691704273224, "rewards/margins": 0.07609252631664276, "rewards/rejected": -0.1527099609375, "step": 2430 }, { "epoch": 0.1853047275488893, "grad_norm": 1.3805742418842342, "learning_rate": 1.619552660357832e-06, "log_odds_chosen": 1.1140868663787842, "log_odds_ratio": -0.4735351502895355, "logits/chosen": -1.1759765148162842, "logits/rejected": -0.997851550579071, "logps/chosen": -0.704882800579071, "logps/rejected": -1.481054663658142, "loss": 1.0184, "nll_loss": 0.970898449420929, "rewards/accuracies": 0.71875, "rewards/chosen": -0.07048340141773224, "rewards/margins": 0.07758788764476776, "rewards/rejected": -0.14809569716453552, "step": 2440 }, { "epoch": 0.18606417315359788, "grad_norm": 1.386941628081624, "learning_rate": 1.616244071283537e-06, "log_odds_chosen": 1.1019775867462158, "log_odds_ratio": -0.47431641817092896, "logits/chosen": -1.028710961341858, "logits/rejected": -0.917187511920929, "logps/chosen": -0.774218738079071, "logps/rejected": -1.5789062976837158, "loss": 1.0117, "nll_loss": 1.0402343273162842, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.07736816257238388, "rewards/margins": 0.08047179877758026, "rewards/rejected": -0.15786132216453552, "step": 2450 }, { "epoch": 0.18682361875830644, "grad_norm": 1.378516115009412, "learning_rate": 1.6129556770910235e-06, "log_odds_chosen": 1.0225098133087158, "log_odds_ratio": -0.504589855670929, "logits/chosen": -1.046875, "logits/rejected": -0.911328136920929, "logps/chosen": -0.7181640863418579, "logps/rejected": -1.444921851158142, "loss": 1.0301, "nll_loss": 0.9583984613418579, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.07183837890625, "rewards/margins": 0.07269592583179474, "rewards/rejected": -0.14448241889476776, "step": 2460 }, { "epoch": 0.187583064363015, "grad_norm": 1.296793839292042, "learning_rate": 1.6096872731710673e-06, "log_odds_chosen": 1.154296875, "log_odds_ratio": -0.4408203065395355, "logits/chosen": -1.0779297351837158, "logits/rejected": -0.908398449420929, "logps/chosen": -0.67724609375, "logps/rejected": -1.4533202648162842, "loss": 1.0365, "nll_loss": 0.9820312261581421, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.06771240383386612, "rewards/margins": 0.07769775390625, "rewards/rejected": -0.14533691108226776, "step": 2470 }, { "epoch": 0.18834250996772356, "grad_norm": 1.2803039122769662, "learning_rate": 1.6064386578049978e-06, "log_odds_chosen": 1.0380127429962158, "log_odds_ratio": -0.4811035096645355, "logits/chosen": -1.26171875, "logits/rejected": -1.041015625, "logps/chosen": -0.7398437261581421, "logps/rejected": -1.4914062023162842, "loss": 1.0108, "nll_loss": 0.9271484613418579, "rewards/accuracies": 0.75, "rewards/chosen": -0.07402344048023224, "rewards/margins": 0.075042724609375, "rewards/rejected": -0.14907225966453552, "step": 2480 }, { "epoch": 0.18910195557243212, "grad_norm": 1.4762736908221168, "learning_rate": 1.6032096321124046e-06, "log_odds_chosen": 1.035791039466858, "log_odds_ratio": -0.48066407442092896, "logits/chosen": -1.1261718273162842, "logits/rejected": -0.9595702886581421, "logps/chosen": -0.745898425579071, "logps/rejected": -1.4738280773162842, "loss": 1.0238, "nll_loss": 1.024023413658142, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.0745849609375, "rewards/margins": 0.072784423828125, "rewards/rejected": -0.14738769829273224, "step": 2490 }, { "epoch": 0.18986140117714068, "grad_norm": 1.403556755299548, "learning_rate": 1.6e-06, "log_odds_chosen": 0.969555675983429, "log_odds_ratio": -0.49873048067092896, "logits/chosen": -1.123046875, "logits/rejected": -1.022851586341858, "logps/chosen": -0.788281261920929, "logps/rejected": -1.490625023841858, "loss": 1.037, "nll_loss": 1.0041015148162842, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.07874755561351776, "rewards/margins": 0.070159912109375, "rewards/rejected": -0.1490478515625, "step": 2500 }, { "epoch": 0.19062084678184926, "grad_norm": 1.2399937251083246, "learning_rate": 1.5968095681115984e-06, "log_odds_chosen": 0.9971679449081421, "log_odds_ratio": -0.510449230670929, "logits/chosen": -1.162109375, "logits/rejected": -0.9537109136581421, "logps/chosen": -0.735156238079071, "logps/rejected": -1.437890648841858, "loss": 1.0335, "nll_loss": 0.96875, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.07362060248851776, "rewards/margins": 0.07034911960363388, "rewards/rejected": -0.14384765923023224, "step": 2510 }, { "epoch": 0.19138029238655782, "grad_norm": 1.5795691020074083, "learning_rate": 1.5936381457791914e-06, "log_odds_chosen": 1.2873046398162842, "log_odds_ratio": -0.3907226622104645, "logits/chosen": -1.1730468273162842, "logits/rejected": -0.9466797113418579, "logps/chosen": -0.7109375, "logps/rejected": -1.635156273841858, "loss": 1.0115, "nll_loss": 0.9588867425918579, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.07115478813648224, "rewards/margins": 0.0924072265625, "rewards/rejected": -0.16354981064796448, "step": 2520 }, { "epoch": 0.19213973799126638, "grad_norm": 1.3025827241955927, "learning_rate": 1.590485544975088e-06, "log_odds_chosen": 1.0261719226837158, "log_odds_ratio": -0.498046875, "logits/chosen": -1.0822265148162842, "logits/rejected": -0.9482421875, "logps/chosen": -0.7718750238418579, "logps/rejected": -1.505859375, "loss": 1.026, "nll_loss": 1.012109398841858, "rewards/accuracies": 0.6875, "rewards/chosen": -0.07718505710363388, "rewards/margins": 0.073333740234375, "rewards/rejected": -0.15056152641773224, "step": 2530 }, { "epoch": 0.19289918359597494, "grad_norm": 1.3370582895253371, "learning_rate": 1.5873515802650901e-06, "log_odds_chosen": 1.096093773841858, "log_odds_ratio": -0.4529785215854645, "logits/chosen": -1.121679663658142, "logits/rejected": -0.9710937738418579, "logps/chosen": -0.7119140625, "logps/rejected": -1.466796875, "loss": 1.0183, "nll_loss": 0.958984375, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.07117919623851776, "rewards/margins": 0.07547607272863388, "rewards/rejected": -0.14665527641773224, "step": 2540 }, { "epoch": 0.1936586292006835, "grad_norm": 1.4234966426655595, "learning_rate": 1.584236068762679e-06, "log_odds_chosen": 1.141357421875, "log_odds_ratio": -0.4454101622104645, "logits/chosen": -1.1150391101837158, "logits/rejected": -0.94384765625, "logps/chosen": -0.76708984375, "logps/rejected": -1.577734351158142, "loss": 1.0451, "nll_loss": 1.036718726158142, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.07666015625, "rewards/margins": 0.08110351860523224, "rewards/rejected": -0.15778808295726776, "step": 2550 }, { "epoch": 0.19441807480539205, "grad_norm": 1.3868843138602542, "learning_rate": 1.5811388300841894e-06, "log_odds_chosen": 1.1547362804412842, "log_odds_ratio": -0.45966798067092896, "logits/chosen": -1.118554711341858, "logits/rejected": -0.91796875, "logps/chosen": -0.7080078125, "logps/rejected": -1.515039086341858, "loss": 1.0272, "nll_loss": 1.03125, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.07082519680261612, "rewards/margins": 0.08066711574792862, "rewards/rejected": -0.15153808891773224, "step": 2560 }, { "epoch": 0.1951775204101006, "grad_norm": 1.4664083987818413, "learning_rate": 1.5780596863049431e-06, "log_odds_chosen": 1.0954101085662842, "log_odds_ratio": -0.508740246295929, "logits/chosen": -1.1560547351837158, "logits/rejected": -1.0125000476837158, "logps/chosen": -0.7337890863418579, "logps/rejected": -1.523828148841858, "loss": 1.0086, "nll_loss": 0.9515625238418579, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.07338867336511612, "rewards/margins": 0.079071044921875, "rewards/rejected": -0.15244141221046448, "step": 2570 }, { "epoch": 0.1959369660148092, "grad_norm": 1.2265091715728655, "learning_rate": 1.5749984619163156e-06, "log_odds_chosen": 1.0320312976837158, "log_odds_ratio": -0.47314453125, "logits/chosen": -1.0656249523162842, "logits/rejected": -0.8873046636581421, "logps/chosen": -0.746874988079071, "logps/rejected": -1.480859398841858, "loss": 1.0188, "nll_loss": 1.037695288658142, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.07475586235523224, "rewards/margins": 0.0733642578125, "rewards/rejected": -0.14802245795726776, "step": 2580 }, { "epoch": 0.19669641161951776, "grad_norm": 1.4464102596501267, "learning_rate": 1.5719549837837187e-06, "log_odds_chosen": 1.154687523841858, "log_odds_ratio": -0.43798828125, "logits/chosen": -1.039648413658142, "logits/rejected": -0.871289074420929, "logps/chosen": -0.732128918170929, "logps/rejected": -1.55078125, "loss": 1.0064, "nll_loss": 0.9716796875, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.0731201171875, "rewards/margins": 0.08182372897863388, "rewards/rejected": -0.155029296875, "step": 2590 }, { "epoch": 0.19745585722422632, "grad_norm": 1.3966886747312097, "learning_rate": 1.5689290811054722e-06, "log_odds_chosen": 0.9362548589706421, "log_odds_ratio": -0.5069824457168579, "logits/chosen": -1.096289038658142, "logits/rejected": -0.919921875, "logps/chosen": -0.787304699420929, "logps/rejected": -1.462304711341858, "loss": 1.0194, "nll_loss": 0.9339843988418579, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.07871093600988388, "rewards/margins": 0.06745605170726776, "rewards/rejected": -0.14616699516773224, "step": 2600 }, { "epoch": 0.19821530282893488, "grad_norm": 1.346617138214625, "learning_rate": 1.5659205853725426e-06, "log_odds_chosen": 0.963134765625, "log_odds_ratio": -0.515917956829071, "logits/chosen": -1.0294921398162842, "logits/rejected": -0.8648437261581421, "logps/chosen": -0.7220703363418579, "logps/rejected": -1.393945336341858, "loss": 1.0174, "nll_loss": 0.9560546875, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.07222900539636612, "rewards/margins": 0.06725387275218964, "rewards/rejected": -0.13947753608226776, "step": 2610 }, { "epoch": 0.19897474843364343, "grad_norm": 1.4319499560097193, "learning_rate": 1.562929330329127e-06, "log_odds_chosen": 0.9051513671875, "log_odds_ratio": -0.55712890625, "logits/chosen": -1.014257788658142, "logits/rejected": -0.890820324420929, "logps/chosen": -0.746777355670929, "logps/rejected": -1.3449218273162842, "loss": 1.0361, "nll_loss": 1.02734375, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.0745849609375, "rewards/margins": 0.05976714938879013, "rewards/rejected": -0.13437500596046448, "step": 2620 }, { "epoch": 0.199734194038352, "grad_norm": 1.3121314749158555, "learning_rate": 1.5599551519340636e-06, "log_odds_chosen": 1.02667236328125, "log_odds_ratio": -0.48750001192092896, "logits/chosen": -1.081445336341858, "logits/rejected": -0.9085937738418579, "logps/chosen": -0.7294921875, "logps/rejected": -1.4255859851837158, "loss": 1.0208, "nll_loss": 0.953125, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.07293701171875, "rewards/margins": 0.06976928561925888, "rewards/rejected": -0.1427001953125, "step": 2630 }, { "epoch": 0.20049363964306058, "grad_norm": 1.3260088561449281, "learning_rate": 1.556997888323046e-06, "log_odds_chosen": 0.997302234172821, "log_odds_ratio": -0.518505871295929, "logits/chosen": -1.046484351158142, "logits/rejected": -0.885937511920929, "logps/chosen": -0.7749999761581421, "logps/rejected": -1.505859375, "loss": 1.0123, "nll_loss": 0.997265636920929, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.0775146484375, "rewards/margins": 0.07315368950366974, "rewards/rejected": -0.15056152641773224, "step": 2640 }, { "epoch": 0.20125308524776914, "grad_norm": 1.4485709431979326, "learning_rate": 1.5540573797716226e-06, "log_odds_chosen": 1.2283203601837158, "log_odds_ratio": -0.42021483182907104, "logits/chosen": -1.0333983898162842, "logits/rejected": -0.8994140625, "logps/chosen": -0.7490234375, "logps/rejected": -1.6326172351837158, "loss": 1.0306, "nll_loss": 0.950976550579071, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.07485351711511612, "rewards/margins": 0.08839111030101776, "rewards/rejected": -0.16318359971046448, "step": 2650 }, { "epoch": 0.2020125308524777, "grad_norm": 1.474105815325273, "learning_rate": 1.5511334686589623e-06, "log_odds_chosen": 1.0120117664337158, "log_odds_ratio": -0.511523425579071, "logits/chosen": -1.188867211341858, "logits/rejected": -0.9888671636581421, "logps/chosen": -0.759765625, "logps/rejected": -1.4832031726837158, "loss": 1.0281, "nll_loss": 0.949999988079071, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.07603760063648224, "rewards/margins": 0.072265625, "rewards/rejected": -0.148193359375, "step": 2660 }, { "epoch": 0.20277197645718625, "grad_norm": 1.4719491131113853, "learning_rate": 1.548225999432367e-06, "log_odds_chosen": 1.0242431163787842, "log_odds_ratio": -0.45820313692092896, "logits/chosen": -1.148828148841858, "logits/rejected": -0.938281238079071, "logps/chosen": -0.7486327886581421, "logps/rejected": -1.435937523841858, "loss": 1.0106, "nll_loss": 0.994335949420929, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.07485351711511612, "rewards/margins": 0.0688323974609375, "rewards/rejected": -0.1435546875, "step": 2670 }, { "epoch": 0.2035314220618948, "grad_norm": 1.3112081578081516, "learning_rate": 1.5453348185725114e-06, "log_odds_chosen": 1.3245117664337158, "log_odds_ratio": -0.4132324159145355, "logits/chosen": -1.0390625, "logits/rejected": -0.8636718988418579, "logps/chosen": -0.7919921875, "logps/rejected": -1.74609375, "loss": 1.0175, "nll_loss": 0.9664062261581421, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.07918701320886612, "rewards/margins": 0.09534911811351776, "rewards/rejected": -0.17458495497703552, "step": 2680 }, { "epoch": 0.20429086766660337, "grad_norm": 1.5751358274321592, "learning_rate": 1.542459774559398e-06, "log_odds_chosen": 0.9666992425918579, "log_odds_ratio": -0.4878906309604645, "logits/chosen": -1.115820288658142, "logits/rejected": -0.9580078125, "logps/chosen": -0.7640625238418579, "logps/rejected": -1.437890648841858, "loss": 1.0357, "nll_loss": 0.9906250238418579, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.07644043117761612, "rewards/margins": 0.06744384765625, "rewards/rejected": -0.14394530653953552, "step": 2690 }, { "epoch": 0.20505031327131193, "grad_norm": 1.3476092754412672, "learning_rate": 1.539600717839002e-06, "log_odds_chosen": 1.1828124523162842, "log_odds_ratio": -0.46220701932907104, "logits/chosen": -1.0730469226837158, "logits/rejected": -0.9117187261581421, "logps/chosen": -0.797070324420929, "logps/rejected": -1.648046851158142, "loss": 1.0019, "nll_loss": 1.053320288658142, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.0797119140625, "rewards/margins": 0.08511962741613388, "rewards/rejected": -0.16496582329273224, "step": 2700 }, { "epoch": 0.20580975887602052, "grad_norm": 1.3976287136239551, "learning_rate": 1.536757500790597e-06, "log_odds_chosen": 1.1285400390625, "log_odds_ratio": -0.468017578125, "logits/chosen": -1.112890601158142, "logits/rejected": -0.87890625, "logps/chosen": -0.7496093511581421, "logps/rejected": -1.5822265148162842, "loss": 1.007, "nll_loss": 1.003320336341858, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.07490234076976776, "rewards/margins": 0.08319854736328125, "rewards/rejected": -0.15815429389476776, "step": 2710 }, { "epoch": 0.20656920448072907, "grad_norm": 1.4210871799329383, "learning_rate": 1.5339299776947407e-06, "log_odds_chosen": 0.951855480670929, "log_odds_ratio": -0.50048828125, "logits/chosen": -1.0681641101837158, "logits/rejected": -0.9292968511581421, "logps/chosen": -0.7349609136581421, "logps/rejected": -1.361718773841858, "loss": 1.044, "nll_loss": 0.8935546875, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.07354736328125, "rewards/margins": 0.06262512505054474, "rewards/rejected": -0.13601073622703552, "step": 2720 }, { "epoch": 0.20732865008543763, "grad_norm": 1.35290181226902, "learning_rate": 1.5311180047019054e-06, "log_odds_chosen": 1.288964867591858, "log_odds_ratio": -0.41923826932907104, "logits/chosen": -1.0955078601837158, "logits/rejected": -0.9134765863418579, "logps/chosen": -0.687792956829071, "logps/rejected": -1.5789062976837158, "loss": 1.0107, "nll_loss": 0.8926757574081421, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.06877441704273224, "rewards/margins": 0.08914794772863388, "rewards/rejected": -0.15788574516773224, "step": 2730 }, { "epoch": 0.2080880956901462, "grad_norm": 1.7225024190588087, "learning_rate": 1.5283214398017402e-06, "log_odds_chosen": 0.966723620891571, "log_odds_ratio": -0.5398925542831421, "logits/chosen": -1.037500023841858, "logits/rejected": -0.8970702886581421, "logps/chosen": -0.8119140863418579, "logps/rejected": -1.500585913658142, "loss": 1.0314, "nll_loss": 0.956250011920929, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.0811767578125, "rewards/margins": 0.06902465969324112, "rewards/rejected": -0.15017089247703552, "step": 2740 }, { "epoch": 0.20884754129485475, "grad_norm": 1.4767362565695075, "learning_rate": 1.5255401427929477e-06, "log_odds_chosen": 1.047265648841858, "log_odds_ratio": -0.47368162870407104, "logits/chosen": -1.081445336341858, "logits/rejected": -0.9111328125, "logps/chosen": -0.732226550579071, "logps/rejected": -1.4539062976837158, "loss": 1.0085, "nll_loss": 0.9853515625, "rewards/accuracies": 0.75, "rewards/chosen": -0.0732421875, "rewards/margins": 0.07211913913488388, "rewards/rejected": -0.14523926377296448, "step": 2750 }, { "epoch": 0.2096069868995633, "grad_norm": 1.6297896108186416, "learning_rate": 1.5227739752537617e-06, "log_odds_chosen": 1.0001099109649658, "log_odds_ratio": -0.4962402284145355, "logits/chosen": -1.1044921875, "logits/rejected": -0.9095703363418579, "logps/chosen": -0.727734386920929, "logps/rejected": -1.4142577648162842, "loss": 1.0252, "nll_loss": 0.9566406011581421, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.07283935695886612, "rewards/margins": 0.0686187744140625, "rewards/rejected": -0.14143066108226776, "step": 2760 }, { "epoch": 0.2103664325042719, "grad_norm": 1.5359486635362785, "learning_rate": 1.5200228005130127e-06, "log_odds_chosen": 1.32159423828125, "log_odds_ratio": -0.40288084745407104, "logits/chosen": -1.041406273841858, "logits/rejected": -0.8714843988418579, "logps/chosen": -0.6968749761581421, "logps/rejected": -1.6474609375, "loss": 1.0046, "nll_loss": 0.9417968988418579, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.06967773288488388, "rewards/margins": 0.09504242241382599, "rewards/rejected": -0.16484375298023224, "step": 2770 }, { "epoch": 0.21112587810898045, "grad_norm": 1.4194506654462857, "learning_rate": 1.5172864836217631e-06, "log_odds_chosen": 1.0873534679412842, "log_odds_ratio": -0.463623046875, "logits/chosen": -1.0164062976837158, "logits/rejected": -0.888867199420929, "logps/chosen": -0.7197265625, "logps/rejected": -1.4744141101837158, "loss": 1.0366, "nll_loss": 1.0617187023162842, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.07199706882238388, "rewards/margins": 0.07560577243566513, "rewards/rejected": -0.14750976860523224, "step": 2780 }, { "epoch": 0.211885323713689, "grad_norm": 1.3784726985198144, "learning_rate": 1.514564891325506e-06, "log_odds_chosen": 1.029394507408142, "log_odds_ratio": -0.5015624761581421, "logits/chosen": -0.974804699420929, "logits/rejected": -0.8804687261581421, "logps/chosen": -0.757617175579071, "logps/rejected": -1.482421875, "loss": 1.0259, "nll_loss": 1.0125000476837158, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.07573242485523224, "rewards/margins": 0.072662353515625, "rewards/rejected": -0.14838866889476776, "step": 2790 }, { "epoch": 0.21264476931839757, "grad_norm": 1.325776018316159, "learning_rate": 1.5118578920369086e-06, "log_odds_chosen": 1.0435059070587158, "log_odds_ratio": -0.44306641817092896, "logits/chosen": -1.2003905773162842, "logits/rejected": -0.9730468988418579, "logps/chosen": -0.7281249761581421, "logps/rejected": -1.447656273841858, "loss": 1.0231, "nll_loss": 0.9535156488418579, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.07281494140625, "rewards/margins": 0.0719451904296875, "rewards/rejected": -0.14467772841453552, "step": 2800 }, { "epoch": 0.21340421492310613, "grad_norm": 1.409620248313833, "learning_rate": 1.5091653558090898e-06, "log_odds_chosen": 1.023193359375, "log_odds_ratio": -0.5077148675918579, "logits/chosen": -1.038671851158142, "logits/rejected": -0.9326171875, "logps/chosen": -0.75341796875, "logps/rejected": -1.4894530773162842, "loss": 1.0056, "nll_loss": 0.9837890863418579, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.0753173828125, "rewards/margins": 0.07373657077550888, "rewards/rejected": -0.14897461235523224, "step": 2810 }, { "epoch": 0.2141636605278147, "grad_norm": 1.2883531174078497, "learning_rate": 1.506487154309419e-06, "log_odds_chosen": 1.1072876453399658, "log_odds_ratio": -0.45849609375, "logits/chosen": -1.0847656726837158, "logits/rejected": -0.876171886920929, "logps/chosen": -0.759960949420929, "logps/rejected": -1.548437476158142, "loss": 1.0212, "nll_loss": 1.017578125, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.07600097358226776, "rewards/margins": 0.07876892387866974, "rewards/rejected": -0.15483398735523224, "step": 2820 }, { "epoch": 0.21492310613252325, "grad_norm": 1.5393091001244983, "learning_rate": 1.5038231607938247e-06, "log_odds_chosen": 1.0544922351837158, "log_odds_ratio": -0.4718261659145355, "logits/chosen": -1.1173827648162842, "logits/rejected": -0.982421875, "logps/chosen": -0.703808605670929, "logps/rejected": -1.436914086341858, "loss": 1.0159, "nll_loss": 0.9869140386581421, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.07039795070886612, "rewards/margins": 0.07325439155101776, "rewards/rejected": -0.14365234971046448, "step": 2830 }, { "epoch": 0.21568255173723183, "grad_norm": 1.3122624094482356, "learning_rate": 1.501173250081603e-06, "log_odds_chosen": 0.9027099609375, "log_odds_ratio": -0.544238269329071, "logits/chosen": -1.0685546398162842, "logits/rejected": -0.9330078363418579, "logps/chosen": -0.80078125, "logps/rejected": -1.456445336341858, "loss": 1.0034, "nll_loss": 0.9619140625, "rewards/accuracies": 0.65625, "rewards/chosen": -0.08017577975988388, "rewards/margins": 0.06553955376148224, "rewards/rejected": -0.14560547471046448, "step": 2840 }, { "epoch": 0.2164419973419404, "grad_norm": 1.5487961558686079, "learning_rate": 1.4985372985307103e-06, "log_odds_chosen": 1.0182616710662842, "log_odds_ratio": -0.4805664122104645, "logits/chosen": -1.192773461341858, "logits/rejected": -1.038476586341858, "logps/chosen": -0.7503906488418579, "logps/rejected": -1.461523413658142, "loss": 1.0073, "nll_loss": 0.92578125, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.07510986179113388, "rewards/margins": 0.07102050632238388, "rewards/rejected": -0.14614257216453552, "step": 2850 }, { "epoch": 0.21720144294664895, "grad_norm": 1.3967284403364015, "learning_rate": 1.4959151840135313e-06, "log_odds_chosen": 1.18212890625, "log_odds_ratio": -0.4404296875, "logits/chosen": -1.133203148841858, "logits/rejected": -0.9740234613418579, "logps/chosen": -0.735156238079071, "logps/rejected": -1.5548827648162842, "loss": 1.0132, "nll_loss": 0.9712890386581421, "rewards/accuracies": 0.75, "rewards/chosen": -0.07354736328125, "rewards/margins": 0.082000732421875, "rewards/rejected": -0.15556640923023224, "step": 2860 }, { "epoch": 0.2179608885513575, "grad_norm": 1.4489280656636392, "learning_rate": 1.4933067858931148e-06, "log_odds_chosen": 1.1765625476837158, "log_odds_ratio": -0.46049803495407104, "logits/chosen": -1.0869140625, "logits/rejected": -0.9423828125, "logps/chosen": -0.711230456829071, "logps/rejected": -1.5451171398162842, "loss": 1.0255, "nll_loss": 0.9761718511581421, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.07110595703125, "rewards/margins": 0.0833740234375, "rewards/rejected": -0.15444335341453552, "step": 2870 }, { "epoch": 0.21872033415606607, "grad_norm": 1.4044101078587465, "learning_rate": 1.4907119849998597e-06, "log_odds_chosen": 1.221093773841858, "log_odds_ratio": -0.45917969942092896, "logits/chosen": -1.125, "logits/rejected": -0.882617175579071, "logps/chosen": -0.773144543170929, "logps/rejected": -1.681249976158142, "loss": 1.0181, "nll_loss": 0.9365234375, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.07740478217601776, "rewards/margins": 0.09089966118335724, "rewards/rejected": -0.16816405951976776, "step": 2880 }, { "epoch": 0.21947977976077462, "grad_norm": 1.4417634798081698, "learning_rate": 1.488130663608649e-06, "log_odds_chosen": 1.1541259288787842, "log_odds_ratio": -0.4814453125, "logits/chosen": -1.111718773841858, "logits/rejected": -0.943554699420929, "logps/chosen": -0.76708984375, "logps/rejected": -1.5964844226837158, "loss": 0.9934, "nll_loss": 0.9957031011581421, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.07664795219898224, "rewards/margins": 0.08302612602710724, "rewards/rejected": -0.15957030653953552, "step": 2890 }, { "epoch": 0.2202392253654832, "grad_norm": 1.6423248757055762, "learning_rate": 1.4855627054164149e-06, "log_odds_chosen": 0.9319823980331421, "log_odds_ratio": -0.514599621295929, "logits/chosen": -1.0685546398162842, "logits/rejected": -0.9720703363418579, "logps/chosen": -0.696093738079071, "logps/rejected": -1.3273437023162842, "loss": 1.0056, "nll_loss": 0.9517577886581421, "rewards/accuracies": 0.71875, "rewards/chosen": -0.06960449367761612, "rewards/margins": 0.06303863227367401, "rewards/rejected": -0.1326904296875, "step": 2900 }, { "epoch": 0.22099867097019177, "grad_norm": 1.8765194285417888, "learning_rate": 1.4830079955201294e-06, "log_odds_chosen": 1.263769507408142, "log_odds_ratio": -0.42255860567092896, "logits/chosen": -1.077734351158142, "logits/rejected": -0.9447265863418579, "logps/chosen": -0.6748046875, "logps/rejected": -1.5419921875, "loss": 1.021, "nll_loss": 0.9462890625, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.06756591796875, "rewards/margins": 0.08659972995519638, "rewards/rejected": -0.15415039658546448, "step": 2910 }, { "epoch": 0.22175811657490033, "grad_norm": 1.486333305692402, "learning_rate": 1.4804664203952103e-06, "log_odds_chosen": 1.2390625476837158, "log_odds_ratio": -0.4326171875, "logits/chosen": -1.149804711341858, "logits/rejected": -0.9312499761581421, "logps/chosen": -0.7261718511581421, "logps/rejected": -1.60546875, "loss": 1.0041, "nll_loss": 0.9468749761581421, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.07255859673023224, "rewards/margins": 0.087982177734375, "rewards/rejected": -0.16049805283546448, "step": 2920 }, { "epoch": 0.22251756217960889, "grad_norm": 1.986835983141858, "learning_rate": 1.4779378678743327e-06, "log_odds_chosen": 1.181640625, "log_odds_ratio": -0.46577149629592896, "logits/chosen": -1.1095702648162842, "logits/rejected": -0.9140625, "logps/chosen": -0.7421875, "logps/rejected": -1.58984375, "loss": 1.023, "nll_loss": 0.9720703363418579, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.07421875, "rewards/margins": 0.08469848334789276, "rewards/rejected": -0.158935546875, "step": 2930 }, { "epoch": 0.22327700778431744, "grad_norm": 1.3921219178144844, "learning_rate": 1.4754222271266348e-06, "log_odds_chosen": 1.1930663585662842, "log_odds_ratio": -0.439697265625, "logits/chosen": -1.119726538658142, "logits/rejected": -0.949999988079071, "logps/chosen": -0.7437499761581421, "logps/rejected": -1.573828101158142, "loss": 1.001, "nll_loss": 0.9742187261581421, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.07438965141773224, "rewards/margins": 0.08299560844898224, "rewards/rejected": -0.1573486328125, "step": 2940 }, { "epoch": 0.224036453389026, "grad_norm": 1.5173171215181578, "learning_rate": 1.4729193886373175e-06, "log_odds_chosen": 1.192529320716858, "log_odds_ratio": -0.459716796875, "logits/chosen": -1.1814453601837158, "logits/rejected": -1.008398413658142, "logps/chosen": -0.7269531488418579, "logps/rejected": -1.549218773841858, "loss": 0.9945, "nll_loss": 0.911328136920929, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.07268066704273224, "rewards/margins": 0.08225402981042862, "rewards/rejected": -0.15498046576976776, "step": 2950 }, { "epoch": 0.22479589899373456, "grad_norm": 1.5581049258603732, "learning_rate": 1.4704292441876156e-06, "log_odds_chosen": 1.1156737804412842, "log_odds_ratio": -0.47314453125, "logits/chosen": -1.092382788658142, "logits/rejected": -0.9488281011581421, "logps/chosen": -0.735156238079071, "logps/rejected": -1.49609375, "loss": 1.0217, "nll_loss": 0.9341796636581421, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.07347412407398224, "rewards/margins": 0.07610626518726349, "rewards/rejected": -0.14963379502296448, "step": 2960 }, { "epoch": 0.22555534459844315, "grad_norm": 1.3006837645901217, "learning_rate": 1.4679516868351474e-06, "log_odds_chosen": 0.9906250238418579, "log_odds_ratio": -0.4986328184604645, "logits/chosen": -1.0496094226837158, "logits/rejected": -0.9048827886581421, "logps/chosen": -0.721875011920929, "logps/rejected": -1.445898413658142, "loss": 1.0083, "nll_loss": 0.9931640625, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.07218017429113388, "rewards/margins": 0.07235107570886612, "rewards/rejected": -0.14462891221046448, "step": 2970 }, { "epoch": 0.2263147902031517, "grad_norm": 1.80427726392399, "learning_rate": 1.4654866108946234e-06, "log_odds_chosen": 1.1192505359649658, "log_odds_ratio": -0.5210937261581421, "logits/chosen": -0.989062488079071, "logits/rejected": -0.9046875238418579, "logps/chosen": -0.7374023199081421, "logps/rejected": -1.5109374523162842, "loss": 1.0193, "nll_loss": 0.9501953125, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.07369384914636612, "rewards/margins": 0.07729797065258026, "rewards/rejected": -0.15114745497703552, "step": 2980 }, { "epoch": 0.22707423580786026, "grad_norm": 1.594990250944197, "learning_rate": 1.4630339119189101e-06, "log_odds_chosen": 1.170507788658142, "log_odds_ratio": -0.45170897245407104, "logits/chosen": -1.078710913658142, "logits/rejected": -0.927734375, "logps/chosen": -0.7255859375, "logps/rejected": -1.5281250476837158, "loss": 0.9854, "nll_loss": 0.9505859613418579, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.07254638522863388, "rewards/margins": 0.08019409328699112, "rewards/rejected": -0.15273436903953552, "step": 2990 }, { "epoch": 0.22783368141256882, "grad_norm": 1.4183200247835324, "learning_rate": 1.4605934866804429e-06, "log_odds_chosen": 1.222631812095642, "log_odds_ratio": -0.43867188692092896, "logits/chosen": -1.0910155773162842, "logits/rejected": -0.9488281011581421, "logps/chosen": -0.7173827886581421, "logps/rejected": -1.582421898841858, "loss": 1.0285, "nll_loss": 0.984570324420929, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.07174072414636612, "rewards/margins": 0.08642272651195526, "rewards/rejected": -0.15827636420726776, "step": 3000 }, { "epoch": 0.22859312701727738, "grad_norm": 1.426521872826207, "learning_rate": 1.4581652331529784e-06, "log_odds_chosen": 1.0943481922149658, "log_odds_ratio": -0.47919923067092896, "logits/chosen": -1.119140625, "logits/rejected": -0.951953113079071, "logps/chosen": -0.756054699420929, "logps/rejected": -1.538671851158142, "loss": 0.9949, "nll_loss": 0.935742199420929, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.07564697414636612, "rewards/margins": 0.07832641899585724, "rewards/rejected": -0.15378418564796448, "step": 3010 }, { "epoch": 0.22935257262198594, "grad_norm": 1.5509852816434153, "learning_rate": 1.4557490504936778e-06, "log_odds_chosen": 1.0164062976837158, "log_odds_ratio": -0.45551759004592896, "logits/chosen": -1.1560547351837158, "logits/rejected": -0.9564453363418579, "logps/chosen": -0.689453125, "logps/rejected": -1.3976562023162842, "loss": 0.9909, "nll_loss": 0.875195324420929, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.06898193061351776, "rewards/margins": 0.07075195014476776, "rewards/rejected": -0.1397705078125, "step": 3020 }, { "epoch": 0.23011201822669453, "grad_norm": 2.0419378712604734, "learning_rate": 1.453344839025519e-06, "log_odds_chosen": 1.1350586414337158, "log_odds_ratio": -0.45166015625, "logits/chosen": -1.174414038658142, "logits/rejected": -0.9781249761581421, "logps/chosen": -0.694140613079071, "logps/rejected": -1.479882836341858, "loss": 0.9791, "nll_loss": 0.875781238079071, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.06948242336511612, "rewards/margins": 0.07857666164636612, "rewards/rejected": -0.14802245795726776, "step": 3030 }, { "epoch": 0.23087146383140308, "grad_norm": 1.2587224800774308, "learning_rate": 1.4509525002200234e-06, "log_odds_chosen": 0.973400890827179, "log_odds_ratio": -0.4901367127895355, "logits/chosen": -1.0876953601837158, "logits/rejected": -0.9996093511581421, "logps/chosen": -0.67626953125, "logps/rejected": -1.337890625, "loss": 0.9804, "nll_loss": 0.932421863079071, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.06757812201976776, "rewards/margins": 0.06608734279870987, "rewards/rejected": -0.13364258408546448, "step": 3040 }, { "epoch": 0.23163090943611164, "grad_norm": 1.6945830960144697, "learning_rate": 1.4485719366802965e-06, "log_odds_chosen": 1.135156273841858, "log_odds_ratio": -0.4906249940395355, "logits/chosen": -1.0986328125, "logits/rejected": -0.943164050579071, "logps/chosen": -0.771484375, "logps/rejected": -1.610937476158142, "loss": 1.0149, "nll_loss": 0.955859363079071, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.07711181789636612, "rewards/margins": 0.08395843207836151, "rewards/rejected": -0.16105957329273224, "step": 3050 }, { "epoch": 0.2323903550408202, "grad_norm": 1.362328199361553, "learning_rate": 1.4462030521243742e-06, "log_odds_chosen": 1.026757836341858, "log_odds_ratio": -0.502636730670929, "logits/chosen": -1.1875, "logits/rejected": -0.9925781488418579, "logps/chosen": -0.8115234375, "logps/rejected": -1.571874976158142, "loss": 1.0073, "nll_loss": 0.9732421636581421, "rewards/accuracies": 0.71875, "rewards/chosen": -0.08115234225988388, "rewards/margins": 0.07603149116039276, "rewards/rejected": -0.15725097060203552, "step": 3060 }, { "epoch": 0.23314980064552876, "grad_norm": 1.3199561817325909, "learning_rate": 1.443845751368867e-06, "log_odds_chosen": 1.160375952720642, "log_odds_ratio": -0.45917969942092896, "logits/chosen": -1.1085937023162842, "logits/rejected": -0.970507800579071, "logps/chosen": -0.728515625, "logps/rejected": -1.5294921398162842, "loss": 1.0077, "nll_loss": 0.9791015386581421, "rewards/accuracies": 0.75, "rewards/chosen": -0.07285156100988388, "rewards/margins": 0.080047607421875, "rewards/rejected": -0.15292969346046448, "step": 3070 }, { "epoch": 0.23390924625023732, "grad_norm": 1.4503290665923505, "learning_rate": 1.4414999403128943e-06, "log_odds_chosen": 1.330078125, "log_odds_ratio": -0.4249511659145355, "logits/chosen": -1.1150391101837158, "logits/rejected": -0.945507824420929, "logps/chosen": -0.7564452886581421, "logps/rejected": -1.7117187976837158, "loss": 1.0044, "nll_loss": 1.025390625, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.07562255859375, "rewards/margins": 0.09538574516773224, "rewards/rejected": -0.17092284560203552, "step": 3080 }, { "epoch": 0.23466869185494588, "grad_norm": 1.3259629321562467, "learning_rate": 1.439165525922309e-06, "log_odds_chosen": 1.079003930091858, "log_odds_ratio": -0.48334962129592896, "logits/chosen": -1.1337890625, "logits/rejected": -0.955078125, "logps/chosen": -0.7328125238418579, "logps/rejected": -1.4697265625, "loss": 1.0027, "nll_loss": 0.9916015863418579, "rewards/accuracies": 0.78125, "rewards/chosen": -0.07331542670726776, "rewards/margins": 0.07365722954273224, "rewards/rejected": -0.14702148735523224, "step": 3090 }, { "epoch": 0.23542813745965446, "grad_norm": 1.3768035323059686, "learning_rate": 1.4368424162141992e-06, "log_odds_chosen": 1.143396019935608, "log_odds_ratio": -0.4339355528354645, "logits/chosen": -1.142187476158142, "logits/rejected": -0.9916015863418579, "logps/chosen": -0.7183593511581421, "logps/rejected": -1.5226562023162842, "loss": 0.9737, "nll_loss": 0.87109375, "rewards/accuracies": 0.8125, "rewards/chosen": -0.07181396335363388, "rewards/margins": 0.08037109673023224, "rewards/rejected": -0.15217284858226776, "step": 3100 }, { "epoch": 0.23618758306436302, "grad_norm": 1.685616488519654, "learning_rate": 1.434530520241665e-06, "log_odds_chosen": 1.2224853038787842, "log_odds_ratio": -0.46577149629592896, "logits/chosen": -1.101171851158142, "logits/rejected": -0.9144531488418579, "logps/chosen": -0.7611328363418579, "logps/rejected": -1.6349608898162842, "loss": 0.9862, "nll_loss": 1.0222656726837158, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.07614745944738388, "rewards/margins": 0.08722534030675888, "rewards/rejected": -0.16335448622703552, "step": 3110 }, { "epoch": 0.23694702866907158, "grad_norm": 1.64533521257353, "learning_rate": 1.4322297480788657e-06, "log_odds_chosen": 0.994335949420929, "log_odds_ratio": -0.550976574420929, "logits/chosen": -1.1591796875, "logits/rejected": -1.034570336341858, "logps/chosen": -0.7802734375, "logps/rejected": -1.478515625, "loss": 1.0022, "nll_loss": 0.9195312261581421, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.0780029296875, "rewards/margins": 0.06981811672449112, "rewards/rejected": -0.14777831733226776, "step": 3120 }, { "epoch": 0.23770647427378014, "grad_norm": 1.447635916124419, "learning_rate": 1.4299400108063247e-06, "log_odds_chosen": 1.080175757408142, "log_odds_ratio": -0.45439451932907104, "logits/chosen": -1.085351586341858, "logits/rejected": -0.916015625, "logps/chosen": -0.728710949420929, "logps/rejected": -1.443359375, "loss": 0.9713, "nll_loss": 0.9292968511581421, "rewards/accuracies": 0.78125, "rewards/chosen": -0.07281494140625, "rewards/margins": 0.07155456393957138, "rewards/rejected": -0.14438477158546448, "step": 3130 }, { "epoch": 0.2384659198784887, "grad_norm": 1.4846906260149713, "learning_rate": 1.4276612204964992e-06, "log_odds_chosen": 1.2802734375, "log_odds_ratio": -0.4156250059604645, "logits/chosen": -1.1779296398162842, "logits/rejected": -0.975781261920929, "logps/chosen": -0.72314453125, "logps/rejected": -1.622656226158142, "loss": 1.0183, "nll_loss": 0.8988281488418579, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.07236327975988388, "rewards/margins": 0.08985595405101776, "rewards/rejected": -0.16225585341453552, "step": 3140 }, { "epoch": 0.23922536548319726, "grad_norm": 1.5507759491258146, "learning_rate": 1.4253932901995967e-06, "log_odds_chosen": 1.205175757408142, "log_odds_ratio": -0.439208984375, "logits/chosen": -1.096093773841858, "logits/rejected": -0.906054675579071, "logps/chosen": -0.7681640386581421, "logps/rejected": -1.607421875, "loss": 0.987, "nll_loss": 0.89501953125, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.07680664211511612, "rewards/margins": 0.08388672024011612, "rewards/rejected": -0.16059570014476776, "step": 3150 }, { "epoch": 0.23998481108790584, "grad_norm": 1.3657394686781164, "learning_rate": 1.42313613392964e-06, "log_odds_chosen": 1.113623023033142, "log_odds_ratio": -0.463623046875, "logits/chosen": -1.0216796398162842, "logits/rejected": -0.895703136920929, "logps/chosen": -0.74658203125, "logps/rejected": -1.4835937023162842, "loss": 0.9889, "nll_loss": 0.9068359136581421, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.07460937649011612, "rewards/margins": 0.07366943359375, "rewards/rejected": -0.14836426079273224, "step": 3160 }, { "epoch": 0.2407442566926144, "grad_norm": 1.4430728557735113, "learning_rate": 1.4208896666507756e-06, "log_odds_chosen": 1.0197265148162842, "log_odds_ratio": -0.4845214784145355, "logits/chosen": -1.1521484851837158, "logits/rejected": -0.895312488079071, "logps/chosen": -0.74462890625, "logps/rejected": -1.4636719226837158, "loss": 0.9958, "nll_loss": 0.984179675579071, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.07438965141773224, "rewards/margins": 0.07203368842601776, "rewards/rejected": -0.14643554389476776, "step": 3170 }, { "epoch": 0.24150370229732296, "grad_norm": 1.5111052572128698, "learning_rate": 1.4186538042638173e-06, "log_odds_chosen": 1.1962890625, "log_odds_ratio": -0.45781248807907104, "logits/chosen": -1.121484398841858, "logits/rejected": -0.9468749761581421, "logps/chosen": -0.763671875, "logps/rejected": -1.615625023841858, "loss": 0.9916, "nll_loss": 0.903027355670929, "rewards/accuracies": 0.78125, "rewards/chosen": -0.07635498046875, "rewards/margins": 0.08502807468175888, "rewards/rejected": -0.1614990234375, "step": 3180 }, { "epoch": 0.24226314790203152, "grad_norm": 1.38464974601922, "learning_rate": 1.416428463593022e-06, "log_odds_chosen": 1.275390625, "log_odds_ratio": -0.46918946504592896, "logits/chosen": -1.0078125, "logits/rejected": -0.8812500238418579, "logps/chosen": -0.7718750238418579, "logps/rejected": -1.689453125, "loss": 1.0024, "nll_loss": 1.0126953125, "rewards/accuracies": 0.75, "rewards/chosen": -0.07716064155101776, "rewards/margins": 0.091796875, "rewards/rejected": -0.16889648139476776, "step": 3190 }, { "epoch": 0.24302259350674008, "grad_norm": 1.238688867540669, "learning_rate": 1.414213562373095e-06, "log_odds_chosen": 1.086035132408142, "log_odds_ratio": -0.4755859375, "logits/chosen": -1.044921875, "logits/rejected": -0.938281238079071, "logps/chosen": -0.6963866949081421, "logps/rejected": -1.447851538658142, "loss": 0.9867, "nll_loss": 0.940625011920929, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.0697021484375, "rewards/margins": 0.07515869289636612, "rewards/rejected": -0.144775390625, "step": 3200 }, { "epoch": 0.24378203911144863, "grad_norm": 1.3350118625263159, "learning_rate": 1.4120090192364154e-06, "log_odds_chosen": 1.128320336341858, "log_odds_ratio": -0.44306641817092896, "logits/chosen": -1.121679663658142, "logits/rejected": -0.9482421875, "logps/chosen": -0.729296863079071, "logps/rejected": -1.4892578125, "loss": 1.0237, "nll_loss": 0.969921886920929, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.07293701171875, "rewards/margins": 0.0760498046875, "rewards/rejected": -0.14899902045726776, "step": 3210 }, { "epoch": 0.2445414847161572, "grad_norm": 1.5017530586631078, "learning_rate": 1.4098147537004828e-06, "log_odds_chosen": 1.32666015625, "log_odds_ratio": -0.42919921875, "logits/chosen": -1.2099609375, "logits/rejected": -1.0302734375, "logps/chosen": -0.6934570074081421, "logps/rejected": -1.6044921875, "loss": 0.9781, "nll_loss": 0.9072265625, "rewards/accuracies": 0.78125, "rewards/chosen": -0.06932373344898224, "rewards/margins": 0.09115906059741974, "rewards/rejected": -0.16054686903953552, "step": 3220 }, { "epoch": 0.24530093032086578, "grad_norm": 1.5841796298302908, "learning_rate": 1.4076306861555735e-06, "log_odds_chosen": 1.026269555091858, "log_odds_ratio": -0.49687498807907104, "logits/chosen": -1.0322265625, "logits/rejected": -0.899609386920929, "logps/chosen": -0.7203124761581421, "logps/rejected": -1.422460913658142, "loss": 0.9855, "nll_loss": 0.9437500238418579, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.072021484375, "rewards/margins": 0.070159912109375, "rewards/rejected": -0.14216308295726776, "step": 3230 }, { "epoch": 0.24606037592557434, "grad_norm": 1.6866548923004456, "learning_rate": 1.405456737852613e-06, "log_odds_chosen": 1.2684326171875, "log_odds_ratio": -0.4193359315395355, "logits/chosen": -1.1638672351837158, "logits/rejected": -0.9623047113418579, "logps/chosen": -0.6836913824081421, "logps/rejected": -1.5373046398162842, "loss": 0.9928, "nll_loss": 0.9482421875, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.06843261420726776, "rewards/margins": 0.08524779975414276, "rewards/rejected": -0.15375976264476776, "step": 3240 }, { "epoch": 0.2468198215302829, "grad_norm": 1.4973200020355129, "learning_rate": 1.4032928308912468e-06, "log_odds_chosen": 1.375390648841858, "log_odds_ratio": -0.41987305879592896, "logits/chosen": -1.126953125, "logits/rejected": -0.8861328363418579, "logps/chosen": -0.687792956829071, "logps/rejected": -1.6789062023162842, "loss": 1.0104, "nll_loss": 0.9906250238418579, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.06873778998851776, "rewards/margins": 0.09921874850988388, "rewards/rejected": -0.16801758110523224, "step": 3250 }, { "epoch": 0.24757926713499145, "grad_norm": 1.3981873710728605, "learning_rate": 1.4011388882081175e-06, "log_odds_chosen": 1.229516625404358, "log_odds_ratio": -0.41547852754592896, "logits/chosen": -1.129296898841858, "logits/rejected": -0.9195312261581421, "logps/chosen": -0.6949218511581421, "logps/rejected": -1.552343726158142, "loss": 0.9982, "nll_loss": 0.9546874761581421, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.06953124701976776, "rewards/margins": 0.08570251613855362, "rewards/rejected": -0.15522460639476776, "step": 3260 }, { "epoch": 0.2483387127397, "grad_norm": 1.3134383139639862, "learning_rate": 1.3989948335653378e-06, "log_odds_chosen": 0.867419421672821, "log_odds_ratio": -0.539794921875, "logits/chosen": -1.117578148841858, "logits/rejected": -1.034765601158142, "logps/chosen": -0.7529296875, "logps/rejected": -1.3076171875, "loss": 0.9712, "nll_loss": 0.903515636920929, "rewards/accuracies": 0.6875, "rewards/chosen": -0.07523193210363388, "rewards/margins": 0.05535278469324112, "rewards/rejected": -0.130615234375, "step": 3270 }, { "epoch": 0.24909815834440857, "grad_norm": 1.9092317116600896, "learning_rate": 1.3968605915391564e-06, "log_odds_chosen": 1.02587890625, "log_odds_ratio": -0.50244140625, "logits/chosen": -1.1076171398162842, "logits/rejected": -0.922070324420929, "logps/chosen": -0.7876952886581421, "logps/rejected": -1.513281226158142, "loss": 0.9893, "nll_loss": 1.0146484375, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.07878418266773224, "rewards/margins": 0.07256469875574112, "rewards/rejected": -0.15129394829273224, "step": 3280 }, { "epoch": 0.24985760394911716, "grad_norm": 6.275836313429705, "learning_rate": 1.3947360875088132e-06, "log_odds_chosen": 1.181494116783142, "log_odds_ratio": -0.489013671875, "logits/chosen": -1.0798828601837158, "logits/rejected": -0.8851562738418579, "logps/chosen": -0.754687488079071, "logps/rejected": -1.6179687976837158, "loss": 1.004, "nll_loss": 1.0499999523162842, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.07552490383386612, "rewards/margins": 0.08638305962085724, "rewards/rejected": -0.16181640326976776, "step": 3290 }, { "epoch": 0.2506170495538257, "grad_norm": 1.2206969523484765, "learning_rate": 1.3926212476455828e-06, "log_odds_chosen": 1.033300757408142, "log_odds_ratio": -0.4737304747104645, "logits/chosen": -1.0632812976837158, "logits/rejected": -0.923828125, "logps/chosen": -0.7373046875, "logps/rejected": -1.4675781726837158, "loss": 0.9827, "nll_loss": 0.9447265863418579, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.07375488430261612, "rewards/margins": 0.0730438232421875, "rewards/rejected": -0.14677734673023224, "step": 3300 }, { "epoch": 0.2513764951585343, "grad_norm": 1.7854229136806232, "learning_rate": 1.3905159989019964e-06, "log_odds_chosen": 1.264550805091858, "log_odds_ratio": -0.405517578125, "logits/chosen": -1.0919921398162842, "logits/rejected": -0.900195300579071, "logps/chosen": -0.763476550579071, "logps/rejected": -1.656640648841858, "loss": 1.0089, "nll_loss": 0.9722656011581421, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.07637939602136612, "rewards/margins": 0.08925171196460724, "rewards/rejected": -0.16574707627296448, "step": 3310 }, { "epoch": 0.2521359407632428, "grad_norm": 1.4125667172783436, "learning_rate": 1.3884202690012465e-06, "log_odds_chosen": 1.0185546875, "log_odds_ratio": -0.4647460877895355, "logits/chosen": -1.081640601158142, "logits/rejected": -0.900585949420929, "logps/chosen": -0.7626953125, "logps/rejected": -1.46484375, "loss": 1.0323, "nll_loss": 1.0050780773162842, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.07624511420726776, "rewards/margins": 0.07021484524011612, "rewards/rejected": -0.1466064453125, "step": 3320 }, { "epoch": 0.2528953863679514, "grad_norm": 1.4299616174033238, "learning_rate": 1.3863339864267636e-06, "log_odds_chosen": 1.33447265625, "log_odds_ratio": -0.4285644590854645, "logits/chosen": -1.112695336341858, "logits/rejected": -0.9283202886581421, "logps/chosen": -0.709765613079071, "logps/rejected": -1.665429711341858, "loss": 1.015, "nll_loss": 1.0001952648162842, "rewards/accuracies": 0.78125, "rewards/chosen": -0.07094726711511612, "rewards/margins": 0.09550094604492188, "rewards/rejected": -0.16652831435203552, "step": 3330 }, { "epoch": 0.25365483197266, "grad_norm": 1.7250129234684053, "learning_rate": 1.3842570804119655e-06, "log_odds_chosen": 0.984912097454071, "log_odds_ratio": -0.511279284954071, "logits/chosen": -1.1541016101837158, "logits/rejected": -0.9603515863418579, "logps/chosen": -0.777148425579071, "logps/rejected": -1.486718773841858, "loss": 1.0112, "nll_loss": 1.042578101158142, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.07768554985523224, "rewards/margins": 0.07109527289867401, "rewards/rejected": -0.14882811903953552, "step": 3340 }, { "epoch": 0.2544142775773685, "grad_norm": 1.466103548897273, "learning_rate": 1.3821894809301763e-06, "log_odds_chosen": 1.208837866783142, "log_odds_ratio": -0.45654296875, "logits/chosen": -1.043554663658142, "logits/rejected": -0.875195324420929, "logps/chosen": -0.735546886920929, "logps/rejected": -1.583398461341858, "loss": 0.9851, "nll_loss": 0.9404296875, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.07359619438648224, "rewards/margins": 0.08479614555835724, "rewards/rejected": -0.15837402641773224, "step": 3350 }, { "epoch": 0.2551737231820771, "grad_norm": 1.3140497620065643, "learning_rate": 1.3801311186847081e-06, "log_odds_chosen": 1.1935546398162842, "log_odds_ratio": -0.44404298067092896, "logits/chosen": -1.079687476158142, "logits/rejected": -0.93359375, "logps/chosen": -0.6802734136581421, "logps/rejected": -1.4912109375, "loss": 0.9726, "nll_loss": 0.969921886920929, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.06802978366613388, "rewards/margins": 0.08100585639476776, "rewards/rejected": -0.14899902045726776, "step": 3360 }, { "epoch": 0.2559331687867856, "grad_norm": 1.2593397588267254, "learning_rate": 1.378081925099109e-06, "log_odds_chosen": 1.405664086341858, "log_odds_ratio": -0.40019530057907104, "logits/chosen": -1.1150391101837158, "logits/rejected": -0.9517577886581421, "logps/chosen": -0.697265625, "logps/rejected": -1.7175781726837158, "loss": 0.9888, "nll_loss": 0.989453136920929, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.06965331733226776, "rewards/margins": 0.1019287109375, "rewards/rejected": -0.17177733778953552, "step": 3370 }, { "epoch": 0.2566926143914942, "grad_norm": 1.600492782417642, "learning_rate": 1.376041832307563e-06, "log_odds_chosen": 1.143945336341858, "log_odds_ratio": -0.4590820372104645, "logits/chosen": -1.106835961341858, "logits/rejected": -0.973437488079071, "logps/chosen": -0.719921886920929, "logps/rejected": -1.5519530773162842, "loss": 0.9983, "nll_loss": 0.994335949420929, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.07200928032398224, "rewards/margins": 0.08329467475414276, "rewards/rejected": -0.15517577528953552, "step": 3380 }, { "epoch": 0.2574520599962028, "grad_norm": 1.6354013390479716, "learning_rate": 1.3740107731454524e-06, "log_odds_chosen": 1.120849609375, "log_odds_ratio": -0.452880859375, "logits/chosen": -1.1414062976837158, "logits/rejected": -0.9574218988418579, "logps/chosen": -0.7294921875, "logps/rejected": -1.520898461341858, "loss": 0.9997, "nll_loss": 0.9007812738418579, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.07298584282398224, "rewards/margins": 0.079254150390625, "rewards/rejected": -0.152099609375, "step": 3390 }, { "epoch": 0.25821150560091133, "grad_norm": 1.3711240065430998, "learning_rate": 1.3719886811400705e-06, "log_odds_chosen": 1.07421875, "log_odds_ratio": -0.4869140684604645, "logits/chosen": -1.146093726158142, "logits/rejected": -1.0085937976837158, "logps/chosen": -0.7210937738418579, "logps/rejected": -1.4900391101837158, "loss": 0.9925, "nll_loss": 0.9585937261581421, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.07209472358226776, "rewards/margins": 0.0768585205078125, "rewards/rejected": -0.14902344346046448, "step": 3400 }, { "epoch": 0.2589709512056199, "grad_norm": 1.587675641240288, "learning_rate": 1.3699754905014834e-06, "log_odds_chosen": 1.3369140625, "log_odds_ratio": -0.40727537870407104, "logits/chosen": -1.083593726158142, "logits/rejected": -0.9388672113418579, "logps/chosen": -0.695117175579071, "logps/rejected": -1.6628906726837158, "loss": 0.9592, "nll_loss": 0.9632812738418579, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.06949462741613388, "rewards/margins": 0.09668578952550888, "rewards/rejected": -0.16604003310203552, "step": 3410 }, { "epoch": 0.25973039681032845, "grad_norm": 1.379237707456793, "learning_rate": 1.3679711361135388e-06, "log_odds_chosen": 1.159765601158142, "log_odds_ratio": -0.4771972596645355, "logits/chosen": -1.141992211341858, "logits/rejected": -0.974804699420929, "logps/chosen": -0.7470703125, "logps/rejected": -1.560156226158142, "loss": 0.9875, "nll_loss": 0.96484375, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.07462158054113388, "rewards/margins": 0.08114013820886612, "rewards/rejected": -0.15581054985523224, "step": 3420 }, { "epoch": 0.26048984241503703, "grad_norm": 1.4371825913947844, "learning_rate": 1.3659755535250212e-06, "log_odds_chosen": 1.2651488780975342, "log_odds_ratio": -0.44428712129592896, "logits/chosen": -1.1708984375, "logits/rejected": -1.007226586341858, "logps/chosen": -0.7431640625, "logps/rejected": -1.6550781726837158, "loss": 0.992, "nll_loss": 0.977343738079071, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.0743408203125, "rewards/margins": 0.09113006293773651, "rewards/rejected": -0.16547851264476776, "step": 3430 }, { "epoch": 0.26124928801974556, "grad_norm": 1.4503276625847412, "learning_rate": 1.3639886789409469e-06, "log_odds_chosen": 1.206640601158142, "log_odds_ratio": -0.4513183534145355, "logits/chosen": -1.1339843273162842, "logits/rejected": -0.976367175579071, "logps/chosen": -0.737500011920929, "logps/rejected": -1.6160156726837158, "loss": 0.9874, "nll_loss": 0.9169921875, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.07375488430261612, "rewards/margins": 0.08788452297449112, "rewards/rejected": -0.16162109375, "step": 3440 }, { "epoch": 0.26200873362445415, "grad_norm": 1.4429449757708084, "learning_rate": 1.3620104492139977e-06, "log_odds_chosen": 1.177880883216858, "log_odds_ratio": -0.43427735567092896, "logits/chosen": -1.097070336341858, "logits/rejected": -0.9253906011581421, "logps/chosen": -0.8148437738418579, "logps/rejected": -1.6650390625, "loss": 1.0263, "nll_loss": 1.0294921398162842, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.08151855319738388, "rewards/margins": 0.08490905910730362, "rewards/rejected": -0.1663818359375, "step": 3450 }, { "epoch": 0.26276817922916273, "grad_norm": 2.176544676840145, "learning_rate": 1.3600408018360918e-06, "log_odds_chosen": 1.005151391029358, "log_odds_ratio": -0.4850097596645355, "logits/chosen": -1.050195336341858, "logits/rejected": -0.8968750238418579, "logps/chosen": -0.7484375238418579, "logps/rejected": -1.446679711341858, "loss": 0.9847, "nll_loss": 0.924609363079071, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.07487793266773224, "rewards/margins": 0.0697174072265625, "rewards/rejected": -0.14443358778953552, "step": 3460 }, { "epoch": 0.26352762483387127, "grad_norm": 1.46553086270503, "learning_rate": 1.3580796749300878e-06, "log_odds_chosen": 1.089514136314392, "log_odds_ratio": -0.44990235567092896, "logits/chosen": -1.127343773841858, "logits/rejected": -0.9468749761581421, "logps/chosen": -0.7392578125, "logps/rejected": -1.48828125, "loss": 1.0087, "nll_loss": 0.925976574420929, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.073974609375, "rewards/margins": 0.07484283298254013, "rewards/rejected": -0.148681640625, "step": 3470 }, { "epoch": 0.26428707043857985, "grad_norm": 1.2166774466433257, "learning_rate": 1.3561270072416209e-06, "log_odds_chosen": 1.237402319908142, "log_odds_ratio": -0.4642578065395355, "logits/chosen": -1.1404297351837158, "logits/rejected": -0.9283202886581421, "logps/chosen": -0.7542968988418579, "logps/rejected": -1.658593773841858, "loss": 0.9832, "nll_loss": 0.941210925579071, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.07545165717601776, "rewards/margins": 0.09033203125, "rewards/rejected": -0.16579589247703552, "step": 3480 }, { "epoch": 0.2650465160432884, "grad_norm": 1.5323304416293912, "learning_rate": 1.3541827381310652e-06, "log_odds_chosen": 1.2986328601837158, "log_odds_ratio": -0.4231933653354645, "logits/chosen": -1.0857422351837158, "logits/rejected": -0.9447265863418579, "logps/chosen": -0.7113281488418579, "logps/rejected": -1.650390625, "loss": 0.9725, "nll_loss": 0.9585937261581421, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.07111816108226776, "rewards/margins": 0.09385986626148224, "rewards/rejected": -0.16511230170726776, "step": 3490 }, { "epoch": 0.26580596164799697, "grad_norm": 1.585044801435611, "learning_rate": 1.3522468075656264e-06, "log_odds_chosen": 1.234765648841858, "log_odds_ratio": -0.4823242127895355, "logits/chosen": -1.1298828125, "logits/rejected": -0.9574218988418579, "logps/chosen": -0.7203124761581421, "logps/rejected": -1.627539038658142, "loss": 0.9866, "nll_loss": 0.949999988079071, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.07205810397863388, "rewards/margins": 0.09068603813648224, "rewards/rejected": -0.16264648735523224, "step": 3500 }, { "epoch": 0.2665654072527055, "grad_norm": 1.6088959932298965, "learning_rate": 1.3503191561115553e-06, "log_odds_chosen": 1.3253905773162842, "log_odds_ratio": -0.4601074159145355, "logits/chosen": -1.1572265625, "logits/rejected": -0.9371093511581421, "logps/chosen": -0.7513672113418579, "logps/rejected": -1.7734375, "loss": 1.0053, "nll_loss": 0.9037109613418579, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.07510986179113388, "rewards/margins": 0.10223388671875, "rewards/rejected": -0.17731933295726776, "step": 3510 }, { "epoch": 0.2673248528574141, "grad_norm": 1.3131910901948682, "learning_rate": 1.348399724926484e-06, "log_odds_chosen": 1.2335937023162842, "log_odds_ratio": -0.445556640625, "logits/chosen": -1.1482422351837158, "logits/rejected": -0.953906238079071, "logps/chosen": -0.7328125238418579, "logps/rejected": -1.616796851158142, "loss": 0.9713, "nll_loss": 0.98828125, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.07329101860523224, "rewards/margins": 0.08824463188648224, "rewards/rejected": -0.16152343153953552, "step": 3520 }, { "epoch": 0.26808429846212267, "grad_norm": 1.5103752753904787, "learning_rate": 1.346488455751882e-06, "log_odds_chosen": 1.2790038585662842, "log_odds_ratio": -0.442626953125, "logits/chosen": -1.1437499523162842, "logits/rejected": -0.9476562738418579, "logps/chosen": -0.6917968988418579, "logps/rejected": -1.6201171875, "loss": 0.9665, "nll_loss": 0.892773449420929, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.06920166313648224, "rewards/margins": 0.09288940578699112, "rewards/rejected": -0.16208496689796448, "step": 3530 }, { "epoch": 0.2688437440668312, "grad_norm": 1.4716327415175032, "learning_rate": 1.3445852909056286e-06, "log_odds_chosen": 1.1433594226837158, "log_odds_ratio": -0.43842774629592896, "logits/chosen": -1.1388671398162842, "logits/rejected": -0.9677734375, "logps/chosen": -0.7064453363418579, "logps/rejected": -1.4865233898162842, "loss": 0.9645, "nll_loss": 1.0076172351837158, "rewards/accuracies": 0.75, "rewards/chosen": -0.07070312649011612, "rewards/margins": 0.07794799655675888, "rewards/rejected": -0.14873047173023224, "step": 3540 }, { "epoch": 0.2696031896715398, "grad_norm": 1.4546614320578075, "learning_rate": 1.3426901732747024e-06, "log_odds_chosen": 1.1063964366912842, "log_odds_ratio": -0.4630371034145355, "logits/chosen": -1.135156273841858, "logits/rejected": -0.98046875, "logps/chosen": -0.68701171875, "logps/rejected": -1.450781226158142, "loss": 0.964, "nll_loss": 0.9190429449081421, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.06867675483226776, "rewards/margins": 0.07645873725414276, "rewards/rejected": -0.14516600966453552, "step": 3550 }, { "epoch": 0.2703626352762483, "grad_norm": 1.3896417900692484, "learning_rate": 1.3408030463079818e-06, "log_odds_chosen": 1.177343726158142, "log_odds_ratio": -0.467529296875, "logits/chosen": -1.1023437976837158, "logits/rejected": -0.9439452886581421, "logps/chosen": -0.7476562261581421, "logps/rejected": -1.5998046398162842, "loss": 0.9693, "nll_loss": 0.892578125, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.07470703125, "rewards/margins": 0.08530273288488388, "rewards/rejected": -0.15983887016773224, "step": 3560 }, { "epoch": 0.2711220808809569, "grad_norm": 2.2914992812687567, "learning_rate": 1.3389238540091568e-06, "log_odds_chosen": 1.229394555091858, "log_odds_ratio": -0.4256347715854645, "logits/chosen": -1.2000000476837158, "logits/rejected": -0.975781261920929, "logps/chosen": -0.7110351324081421, "logps/rejected": -1.5908203125, "loss": 1.0, "nll_loss": 0.9517577886581421, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.07109375298023224, "rewards/margins": 0.08792724460363388, "rewards/rejected": -0.15891113877296448, "step": 3570 }, { "epoch": 0.27188152648566544, "grad_norm": 1.909572523620127, "learning_rate": 1.337052540929751e-06, "log_odds_chosen": 1.277734398841858, "log_odds_ratio": -0.43769532442092896, "logits/chosen": -1.140039086341858, "logits/rejected": -0.9507812261581421, "logps/chosen": -0.7109375, "logps/rejected": -1.6203124523162842, "loss": 0.9884, "nll_loss": 0.934765636920929, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.07100830227136612, "rewards/margins": 0.09093017876148224, "rewards/rejected": -0.16201171278953552, "step": 3580 }, { "epoch": 0.272640972090374, "grad_norm": 1.5261932017459958, "learning_rate": 1.33518905216225e-06, "log_odds_chosen": 1.3503906726837158, "log_odds_ratio": -0.42265623807907104, "logits/chosen": -1.0927734375, "logits/rejected": -0.9326171875, "logps/chosen": -0.7533203363418579, "logps/rejected": -1.723046898841858, "loss": 0.983, "nll_loss": 0.9222656488418579, "rewards/accuracies": 0.78125, "rewards/chosen": -0.07524414360523224, "rewards/margins": 0.09708251804113388, "rewards/rejected": -0.17231445014476776, "step": 3590 }, { "epoch": 0.2734004176950826, "grad_norm": 1.6017940702607685, "learning_rate": 1.3333333333333332e-06, "log_odds_chosen": 1.366418480873108, "log_odds_ratio": -0.41694337129592896, "logits/chosen": -1.095312476158142, "logits/rejected": -0.903515636920929, "logps/chosen": -0.7081054449081421, "logps/rejected": -1.6804687976837158, "loss": 0.9731, "nll_loss": 0.9593750238418579, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.07086181640625, "rewards/margins": 0.09723129123449326, "rewards/rejected": -0.16801758110523224, "step": 3600 }, { "epoch": 0.27415986329979114, "grad_norm": 1.3719909364232317, "learning_rate": 1.3314853305972122e-06, "log_odds_chosen": 1.1667969226837158, "log_odds_ratio": -0.4420410096645355, "logits/chosen": -1.069726586341858, "logits/rejected": -0.958789050579071, "logps/chosen": -0.681347668170929, "logps/rejected": -1.5076172351837158, "loss": 0.9785, "nll_loss": 0.9203125238418579, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.06812743842601776, "rewards/margins": 0.08260498195886612, "rewards/rejected": -0.15080566704273224, "step": 3610 }, { "epoch": 0.2749193089044997, "grad_norm": 1.6687052622327487, "learning_rate": 1.3296449906290671e-06, "log_odds_chosen": 1.3759765625, "log_odds_ratio": -0.3968261778354645, "logits/chosen": -1.1515624523162842, "logits/rejected": -0.968945324420929, "logps/chosen": -0.674609363079071, "logps/rejected": -1.634374976158142, "loss": 0.9662, "nll_loss": 0.953320324420929, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.06741943210363388, "rewards/margins": 0.09603271633386612, "rewards/rejected": -0.16347655653953552, "step": 3620 }, { "epoch": 0.27567875450920826, "grad_norm": 1.420007666636513, "learning_rate": 1.3278122606185844e-06, "log_odds_chosen": 1.2668945789337158, "log_odds_ratio": -0.41132813692092896, "logits/chosen": -1.103906273841858, "logits/rejected": -0.9794921875, "logps/chosen": -0.716992199420929, "logps/rejected": -1.601953148841858, "loss": 0.9759, "nll_loss": 0.929882824420929, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.07167968899011612, "rewards/margins": 0.08853759616613388, "rewards/rejected": -0.16030272841453552, "step": 3630 }, { "epoch": 0.27643820011391684, "grad_norm": 1.52531255522937, "learning_rate": 1.3259870882635918e-06, "log_odds_chosen": 1.058447241783142, "log_odds_ratio": -0.5074218511581421, "logits/chosen": -1.0242187976837158, "logits/rejected": -0.8841797113418579, "logps/chosen": -0.712695300579071, "logps/rejected": -1.4091796875, "loss": 0.9764, "nll_loss": 0.958789050579071, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.07124023139476776, "rewards/margins": 0.069671630859375, "rewards/rejected": -0.14094237983226776, "step": 3640 }, { "epoch": 0.27719764571862543, "grad_norm": 1.469836011677743, "learning_rate": 1.3241694217637886e-06, "log_odds_chosen": 0.991162121295929, "log_odds_ratio": -0.48955076932907104, "logits/chosen": -1.192968726158142, "logits/rejected": -0.9624999761581421, "logps/chosen": -0.7583984136581421, "logps/rejected": -1.439453125, "loss": 0.9904, "nll_loss": 0.9566406011581421, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.07579346001148224, "rewards/margins": 0.06814346462488174, "rewards/rejected": -0.14389649033546448, "step": 3650 }, { "epoch": 0.27795709132333396, "grad_norm": 1.5054505983589397, "learning_rate": 1.3223592098145723e-06, "log_odds_chosen": 1.1679198741912842, "log_odds_ratio": -0.43012696504592896, "logits/chosen": -1.0597655773162842, "logits/rejected": -0.872851550579071, "logps/chosen": -0.699414074420929, "logps/rejected": -1.529687523841858, "loss": 0.996, "nll_loss": 0.94140625, "rewards/accuracies": 0.8125, "rewards/chosen": -0.06990966945886612, "rewards/margins": 0.08306274563074112, "rewards/rejected": -0.15278320014476776, "step": 3660 }, { "epoch": 0.27871653692804255, "grad_norm": 1.4434670236611713, "learning_rate": 1.3205564016009555e-06, "log_odds_chosen": 1.04443359375, "log_odds_ratio": -0.4825195372104645, "logits/chosen": -1.099609375, "logits/rejected": -0.967968761920929, "logps/chosen": -0.7835937738418579, "logps/rejected": -1.5203125476837158, "loss": 0.9877, "nll_loss": 0.952343761920929, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.07838134467601776, "rewards/margins": 0.07358398288488388, "rewards/rejected": -0.15202637016773224, "step": 3670 }, { "epoch": 0.2794759825327511, "grad_norm": 1.6151180255321824, "learning_rate": 1.318760946791574e-06, "log_odds_chosen": 0.855908215045929, "log_odds_ratio": -0.53125, "logits/chosen": -1.212304711341858, "logits/rejected": -1.058203101158142, "logps/chosen": -0.7250000238418579, "logps/rejected": -1.31640625, "loss": 0.9909, "nll_loss": 0.902539074420929, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.07252196967601776, "rewards/margins": 0.05918274074792862, "rewards/rejected": -0.1317138671875, "step": 3680 }, { "epoch": 0.28023542813745966, "grad_norm": 1.2867627624056872, "learning_rate": 1.316972795532786e-06, "log_odds_chosen": 1.279394507408142, "log_odds_ratio": -0.4107910096645355, "logits/chosen": -1.1496093273162842, "logits/rejected": -1.0060546398162842, "logps/chosen": -0.66357421875, "logps/rejected": -1.5398437976837158, "loss": 0.9688, "nll_loss": 0.8671875, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.06630859524011612, "rewards/margins": 0.08768920600414276, "rewards/rejected": -0.15395507216453552, "step": 3690 }, { "epoch": 0.2809948737421682, "grad_norm": 1.4643952424202256, "learning_rate": 1.3151918984428582e-06, "log_odds_chosen": 1.2014648914337158, "log_odds_ratio": -0.5008789300918579, "logits/chosen": -1.0994141101837158, "logits/rejected": -0.98828125, "logps/chosen": -0.759570300579071, "logps/rejected": -1.635156273841858, "loss": 0.9876, "nll_loss": 1.012304663658142, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.07597656548023224, "rewards/margins": 0.08767089992761612, "rewards/rejected": -0.16350097954273224, "step": 3700 }, { "epoch": 0.2817543193468768, "grad_norm": 1.610843210581124, "learning_rate": 1.313418206606237e-06, "log_odds_chosen": 1.1012084484100342, "log_odds_ratio": -0.529492199420929, "logits/chosen": -1.122656226158142, "logits/rejected": -0.991406261920929, "logps/chosen": -0.775683581829071, "logps/rejected": -1.5753905773162842, "loss": 0.9783, "nll_loss": 0.933789074420929, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.0775146484375, "rewards/margins": 0.07987670600414276, "rewards/rejected": -0.15754394233226776, "step": 3710 }, { "epoch": 0.28251376495158537, "grad_norm": 1.491608931236001, "learning_rate": 1.3116516715679057e-06, "log_odds_chosen": 0.9146484136581421, "log_odds_ratio": -0.5130859613418579, "logits/chosen": -1.2087891101837158, "logits/rejected": -1.0126953125, "logps/chosen": -0.7416015863418579, "logps/rejected": -1.380859375, "loss": 0.9738, "nll_loss": 0.8412109613418579, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.07414551079273224, "rewards/margins": 0.06396484375, "rewards/rejected": -0.13808593153953552, "step": 3720 }, { "epoch": 0.2832732105562939, "grad_norm": 1.4742608382143596, "learning_rate": 1.3098922453278258e-06, "log_odds_chosen": 1.3552734851837158, "log_odds_ratio": -0.4028076231479645, "logits/chosen": -1.1943359375, "logits/rejected": -1.0167968273162842, "logps/chosen": -0.6693359613418579, "logps/rejected": -1.609765648841858, "loss": 0.9623, "nll_loss": 0.958203136920929, "rewards/accuracies": 0.78125, "rewards/chosen": -0.06689453125, "rewards/margins": 0.09395141899585724, "rewards/rejected": -0.16096191108226776, "step": 3730 }, { "epoch": 0.2840326561610025, "grad_norm": 1.6751798924042576, "learning_rate": 1.3081398803354573e-06, "log_odds_chosen": 1.0268676280975342, "log_odds_ratio": -0.520458996295929, "logits/chosen": -1.114648461341858, "logits/rejected": -0.988085925579071, "logps/chosen": -0.810253918170929, "logps/rejected": -1.53125, "loss": 0.9923, "nll_loss": 0.991015613079071, "rewards/accuracies": 0.71875, "rewards/chosen": -0.0810546875, "rewards/margins": 0.07220001518726349, "rewards/rejected": -0.15322265028953552, "step": 3740 }, { "epoch": 0.284792101765711, "grad_norm": 1.6895740813138298, "learning_rate": 1.3063945294843617e-06, "log_odds_chosen": 1.1066162586212158, "log_odds_ratio": -0.48918455839157104, "logits/chosen": -1.158593773841858, "logits/rejected": -0.9791015386581421, "logps/chosen": -0.7066406011581421, "logps/rejected": -1.494726538658142, "loss": 0.9958, "nll_loss": 0.9141601324081421, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.07070312649011612, "rewards/margins": 0.07871093600988388, "rewards/rejected": -0.14931640028953552, "step": 3750 }, { "epoch": 0.2855515473704196, "grad_norm": 1.3551310516093933, "learning_rate": 1.3046561461068843e-06, "log_odds_chosen": 1.1027343273162842, "log_odds_ratio": -0.4644531309604645, "logits/chosen": -1.1435546875, "logits/rejected": -0.939648449420929, "logps/chosen": -0.6668945550918579, "logps/rejected": -1.425390601158142, "loss": 0.9851, "nll_loss": 0.9365234375, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.06673584133386612, "rewards/margins": 0.07593383640050888, "rewards/rejected": -0.14243164658546448, "step": 3760 }, { "epoch": 0.28631099297512813, "grad_norm": 1.2872733994595555, "learning_rate": 1.3029246839689124e-06, "log_odds_chosen": 0.9031982421875, "log_odds_ratio": -0.5287109613418579, "logits/chosen": -1.100976586341858, "logits/rejected": -0.996289074420929, "logps/chosen": -0.7162109613418579, "logps/rejected": -1.3240234851837158, "loss": 0.9854, "nll_loss": 0.9365234375, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.07163085788488388, "rewards/margins": 0.06087341159582138, "rewards/rejected": -0.13232421875, "step": 3770 }, { "epoch": 0.2870704385798367, "grad_norm": 1.8875003663563275, "learning_rate": 1.3012000972647109e-06, "log_odds_chosen": 1.1169922351837158, "log_odds_ratio": -0.456298828125, "logits/chosen": -1.1593749523162842, "logits/rejected": -0.9908202886581421, "logps/chosen": -0.7139648199081421, "logps/rejected": -1.465234398841858, "loss": 0.9706, "nll_loss": 0.955859363079071, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.07143554836511612, "rewards/margins": 0.07517089694738388, "rewards/rejected": -0.1466064453125, "step": 3780 }, { "epoch": 0.2878298841845453, "grad_norm": 1.3344170091057086, "learning_rate": 1.299482340611832e-06, "log_odds_chosen": 1.454492211341858, "log_odds_ratio": -0.36298829317092896, "logits/chosen": -1.1513671875, "logits/rejected": -0.954296886920929, "logps/chosen": -0.646484375, "logps/rejected": -1.6535155773162842, "loss": 0.9793, "nll_loss": 0.941113293170929, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.06462402641773224, "rewards/margins": 0.10064697265625, "rewards/rejected": -0.165283203125, "step": 3790 }, { "epoch": 0.28858932978925383, "grad_norm": 1.3437948556215271, "learning_rate": 1.2977713690461003e-06, "log_odds_chosen": 1.4052734375, "log_odds_ratio": -0.43461912870407104, "logits/chosen": -1.1083984375, "logits/rejected": -0.927734375, "logps/chosen": -0.7997070550918579, "logps/rejected": -1.843359351158142, "loss": 0.97, "nll_loss": 0.9486328363418579, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -0.07999267429113388, "rewards/margins": 0.10430908203125, "rewards/rejected": -0.1842041015625, "step": 3800 }, { "epoch": 0.2893487753939624, "grad_norm": 1.892029134340577, "learning_rate": 1.296067138016669e-06, "log_odds_chosen": 1.292089819908142, "log_odds_ratio": -0.4212402403354645, "logits/chosen": -1.0896484851837158, "logits/rejected": -0.9273437261581421, "logps/chosen": -0.7484375238418579, "logps/rejected": -1.674414038658142, "loss": 0.9913, "nll_loss": 0.925585925579071, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.07476806640625, "rewards/margins": 0.0927734375, "rewards/rejected": -0.16748046875, "step": 3810 }, { "epoch": 0.29010822099867095, "grad_norm": 1.5875108652574412, "learning_rate": 1.294369603381147e-06, "log_odds_chosen": 1.381738305091858, "log_odds_ratio": -0.43413084745407104, "logits/chosen": -1.1857421398162842, "logits/rejected": -0.9976562261581421, "logps/chosen": -0.7616211175918579, "logps/rejected": -1.7609374523162842, "loss": 0.9788, "nll_loss": 0.941601574420929, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.07620849460363388, "rewards/margins": 0.09990234673023224, "rewards/rejected": -0.17617186903953552, "step": 3820 }, { "epoch": 0.29086766660337954, "grad_norm": 1.4112950079997628, "learning_rate": 1.2926787214007981e-06, "log_odds_chosen": 1.0858643054962158, "log_odds_ratio": -0.4942871034145355, "logits/chosen": -1.149023413658142, "logits/rejected": -0.9697265625, "logps/chosen": -0.7701171636581421, "logps/rejected": -1.544531226158142, "loss": 1.0066, "nll_loss": 0.982617199420929, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.07701416313648224, "rewards/margins": 0.07748107612133026, "rewards/rejected": -0.15456542372703552, "step": 3830 }, { "epoch": 0.29162711220808807, "grad_norm": 1.317863845747207, "learning_rate": 1.2909944487358056e-06, "log_odds_chosen": 1.252343773841858, "log_odds_ratio": -0.41552734375, "logits/chosen": -1.1183593273162842, "logits/rejected": -0.9658203125, "logps/chosen": -0.680859386920929, "logps/rejected": -1.5382812023162842, "loss": 0.9727, "nll_loss": 0.899218738079071, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.06804199516773224, "rewards/margins": 0.08576659858226776, "rewards/rejected": -0.15380859375, "step": 3840 }, { "epoch": 0.29238655781279665, "grad_norm": 1.6513138551346938, "learning_rate": 1.2893167424406084e-06, "log_odds_chosen": 1.199804663658142, "log_odds_ratio": -0.40961915254592896, "logits/chosen": -1.117773413658142, "logits/rejected": -0.925000011920929, "logps/chosen": -0.708789050579071, "logps/rejected": -1.500390648841858, "loss": 0.9702, "nll_loss": 0.9222656488418579, "rewards/accuracies": 0.8125, "rewards/chosen": -0.07087402045726776, "rewards/margins": 0.0790863037109375, "rewards/rejected": -0.15004882216453552, "step": 3850 }, { "epoch": 0.29314600341750524, "grad_norm": 1.6015614298818726, "learning_rate": 1.2876455599593008e-06, "log_odds_chosen": 1.0802733898162842, "log_odds_ratio": -0.45869141817092896, "logits/chosen": -1.152929663658142, "logits/rejected": -0.9755859375, "logps/chosen": -0.739941418170929, "logps/rejected": -1.462499976158142, "loss": 0.9653, "nll_loss": 0.946484386920929, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.07393798977136612, "rewards/margins": 0.07231750339269638, "rewards/rejected": -0.14631347358226776, "step": 3860 }, { "epoch": 0.29390544902221377, "grad_norm": 1.8492890264459703, "learning_rate": 1.285980859121099e-06, "log_odds_chosen": 1.240820288658142, "log_odds_ratio": -0.46806639432907104, "logits/chosen": -1.1472656726837158, "logits/rejected": -0.9388672113418579, "logps/chosen": -0.762499988079071, "logps/rejected": -1.6853516101837158, "loss": 0.9778, "nll_loss": 0.9468749761581421, "rewards/accuracies": 0.75, "rewards/chosen": -0.07631836086511612, "rewards/margins": 0.09233398735523224, "rewards/rejected": -0.16867676377296448, "step": 3870 }, { "epoch": 0.29466489462692236, "grad_norm": 2.375096848280451, "learning_rate": 1.2843225981358712e-06, "log_odds_chosen": 1.206640601158142, "log_odds_ratio": -0.44926756620407104, "logits/chosen": -1.072851538658142, "logits/rejected": -0.937695324420929, "logps/chosen": -0.737500011920929, "logps/rejected": -1.5859375, "loss": 0.9905, "nll_loss": 0.98046875, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.07379150390625, "rewards/margins": 0.08471069484949112, "rewards/rejected": -0.1585693359375, "step": 3880 }, { "epoch": 0.2954243402316309, "grad_norm": 1.5532642091297204, "learning_rate": 1.2826707355897317e-06, "log_odds_chosen": 1.2840087413787842, "log_odds_ratio": -0.4241699278354645, "logits/chosen": -1.1062500476837158, "logits/rejected": -0.9189453125, "logps/chosen": -0.745312511920929, "logps/rejected": -1.6867187023162842, "loss": 1.0058, "nll_loss": 1.011132836341858, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.07456054538488388, "rewards/margins": 0.09412231296300888, "rewards/rejected": -0.16853027045726776, "step": 3890 }, { "epoch": 0.2961837858363395, "grad_norm": 1.3734683852775462, "learning_rate": 1.281025230440697e-06, "log_odds_chosen": 1.103979468345642, "log_odds_ratio": -0.4815917909145355, "logits/chosen": -1.114843726158142, "logits/rejected": -0.943164050579071, "logps/chosen": -0.7074218988418579, "logps/rejected": -1.490820288658142, "loss": 0.9837, "nll_loss": 0.9378906488418579, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.0706787109375, "rewards/margins": 0.07838134467601776, "rewards/rejected": -0.14909668266773224, "step": 3900 }, { "epoch": 0.29694323144104806, "grad_norm": 1.546160137488261, "learning_rate": 1.2793860420144025e-06, "log_odds_chosen": 1.140966773033142, "log_odds_ratio": -0.4849609434604645, "logits/chosen": -1.140625, "logits/rejected": -0.994921863079071, "logps/chosen": -0.7528320550918579, "logps/rejected": -1.5359375476837158, "loss": 0.9954, "nll_loss": 1.0087890625, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.07524414360523224, "rewards/margins": 0.07821960747241974, "rewards/rejected": -0.15357665717601776, "step": 3910 }, { "epoch": 0.2977026770457566, "grad_norm": 1.4830176861795343, "learning_rate": 1.2777531299998798e-06, "log_odds_chosen": 1.134008765220642, "log_odds_ratio": -0.4917968809604645, "logits/chosen": -1.160742163658142, "logits/rejected": -0.9652343988418579, "logps/chosen": -0.7255859375, "logps/rejected": -1.552148461341858, "loss": 1.0074, "nll_loss": 0.9248046875, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.07254638522863388, "rewards/margins": 0.082611083984375, "rewards/rejected": -0.15520019829273224, "step": 3920 }, { "epoch": 0.2984621226504652, "grad_norm": 1.7681650453970392, "learning_rate": 1.2761264544453928e-06, "log_odds_chosen": 1.193603515625, "log_odds_ratio": -0.4505859315395355, "logits/chosen": -1.18359375, "logits/rejected": -0.943164050579071, "logps/chosen": -0.7191406488418579, "logps/rejected": -1.5517578125, "loss": 0.9499, "nll_loss": 0.8714843988418579, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.07192382961511612, "rewards/margins": 0.08324585109949112, "rewards/rejected": -0.15524902939796448, "step": 3930 }, { "epoch": 0.2992215682551737, "grad_norm": 1.7080216851926062, "learning_rate": 1.2745059757543324e-06, "log_odds_chosen": 1.2736327648162842, "log_odds_ratio": -0.4266113340854645, "logits/chosen": -1.1033203601837158, "logits/rejected": -0.940234363079071, "logps/chosen": -0.698925793170929, "logps/rejected": -1.6044921875, "loss": 0.9707, "nll_loss": 0.8983398675918579, "rewards/accuracies": 0.78125, "rewards/chosen": -0.06984863430261612, "rewards/margins": 0.09050293266773224, "rewards/rejected": -0.1605224609375, "step": 3940 }, { "epoch": 0.2999810138598823, "grad_norm": 22.384341182922466, "learning_rate": 1.272891654681168e-06, "log_odds_chosen": 0.9751220941543579, "log_odds_ratio": -0.5111328363418579, "logits/chosen": -1.1279296875, "logits/rejected": -0.9720703363418579, "logps/chosen": -0.7431640625, "logps/rejected": -1.440820336341858, "loss": 0.9788, "nll_loss": 0.951953113079071, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.07437743991613388, "rewards/margins": 0.06978454440832138, "rewards/rejected": -0.14399413764476776, "step": 3950 }, { "epoch": 0.3007404594645908, "grad_norm": 1.5013129520750492, "learning_rate": 1.2712834523274563e-06, "log_odds_chosen": 1.205810546875, "log_odds_ratio": -0.45869141817092896, "logits/chosen": -1.1589844226837158, "logits/rejected": -0.934765636920929, "logps/chosen": -0.7353515625, "logps/rejected": -1.5773437023162842, "loss": 0.9666, "nll_loss": 0.870898425579071, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.07351074367761612, "rewards/margins": 0.08422698825597763, "rewards/rejected": -0.1578369140625, "step": 3960 }, { "epoch": 0.3014999050692994, "grad_norm": 1.630203721579801, "learning_rate": 1.2696813301379032e-06, "log_odds_chosen": 1.10009765625, "log_odds_ratio": -0.478515625, "logits/chosen": -1.2326171398162842, "logits/rejected": -1.0496094226837158, "logps/chosen": -0.749804675579071, "logps/rejected": -1.5226562023162842, "loss": 0.9685, "nll_loss": 0.9652343988418579, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.07502441108226776, "rewards/margins": 0.07734985649585724, "rewards/rejected": -0.15229491889476776, "step": 3970 }, { "epoch": 0.302259350674008, "grad_norm": 1.8299550655654082, "learning_rate": 1.2680852498964829e-06, "log_odds_chosen": 1.3813965320587158, "log_odds_ratio": -0.4297851622104645, "logits/chosen": -1.139062523841858, "logits/rejected": -0.985156238079071, "logps/chosen": -0.711621105670929, "logps/rejected": -1.697265625, "loss": 0.9579, "nll_loss": 0.8421875238418579, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.07114257663488388, "rewards/margins": 0.09866638481616974, "rewards/rejected": -0.16982421278953552, "step": 3980 }, { "epoch": 0.30301879627871653, "grad_norm": 1.4396051476480813, "learning_rate": 1.266495173722607e-06, "log_odds_chosen": 1.414160132408142, "log_odds_ratio": -0.411865234375, "logits/chosen": -1.2429687976837158, "logits/rejected": -1.034765601158142, "logps/chosen": -0.6973632574081421, "logps/rejected": -1.67578125, "loss": 0.9329, "nll_loss": 0.840039074420929, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -0.06973876804113388, "rewards/margins": 0.09785766899585724, "rewards/rejected": -0.16752929985523224, "step": 3990 }, { "epoch": 0.3037782418834251, "grad_norm": 1.3100451158234845, "learning_rate": 1.2649110640673517e-06, "log_odds_chosen": 1.253515601158142, "log_odds_ratio": -0.4696289002895355, "logits/chosen": -1.171289086341858, "logits/rejected": -0.982617199420929, "logps/chosen": -0.778515636920929, "logps/rejected": -1.707421898841858, "loss": 0.9845, "nll_loss": 0.9818359613418579, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.07784423977136612, "rewards/margins": 0.09282226860523224, "rewards/rejected": -0.170654296875, "step": 4000 }, { "epoch": 0.30453768748813365, "grad_norm": 1.3772531828622947, "learning_rate": 1.2633328837097308e-06, "log_odds_chosen": 1.328393578529358, "log_odds_ratio": -0.41352540254592896, "logits/chosen": -1.162695288658142, "logits/rejected": -0.9126952886581421, "logps/chosen": -0.723828136920929, "logps/rejected": -1.694726586341858, "loss": 0.9834, "nll_loss": 0.91796875, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.07236327975988388, "rewards/margins": 0.09714965522289276, "rewards/rejected": -0.16958007216453552, "step": 4010 }, { "epoch": 0.30529713309284223, "grad_norm": 1.4118818873015981, "learning_rate": 1.2617605957530233e-06, "log_odds_chosen": 1.275781273841858, "log_odds_ratio": -0.398193359375, "logits/chosen": -1.1433594226837158, "logits/rejected": -0.9437500238418579, "logps/chosen": -0.703320324420929, "logps/rejected": -1.579492211341858, "loss": 0.9683, "nll_loss": 0.8833984136581421, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.07032470405101776, "rewards/margins": 0.08776245266199112, "rewards/rejected": -0.15812988579273224, "step": 4020 }, { "epoch": 0.30605657869755076, "grad_norm": 1.654201123666687, "learning_rate": 1.2601941636211516e-06, "log_odds_chosen": 1.298242211341858, "log_odds_ratio": -0.40751951932907104, "logits/chosen": -1.234375, "logits/rejected": -1.0080077648162842, "logps/chosen": -0.696093738079071, "logps/rejected": -1.587499976158142, "loss": 0.9732, "nll_loss": 0.954882800579071, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.06961669772863388, "rewards/margins": 0.08914794772863388, "rewards/rejected": -0.1588134765625, "step": 4030 }, { "epoch": 0.30681602430225935, "grad_norm": 1.493792240083812, "learning_rate": 1.2586335510551052e-06, "log_odds_chosen": 1.412500023841858, "log_odds_ratio": -0.4026855528354645, "logits/chosen": -1.138671875, "logits/rejected": -0.943554699420929, "logps/chosen": -0.7494140863418579, "logps/rejected": -1.791406273841858, "loss": 0.9404, "nll_loss": 0.887499988079071, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.07496337592601776, "rewards/margins": 0.10427246242761612, "rewards/rejected": -0.17910155653953552, "step": 4040 }, { "epoch": 0.30757546990696794, "grad_norm": 1.3632358891705942, "learning_rate": 1.2570787221094177e-06, "log_odds_chosen": 1.328125, "log_odds_ratio": -0.4405273497104645, "logits/chosen": -1.1337890625, "logits/rejected": -0.906445324420929, "logps/chosen": -0.753710925579071, "logps/rejected": -1.7302734851837158, "loss": 0.9734, "nll_loss": 0.992382824420929, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.075439453125, "rewards/margins": 0.09759521484375, "rewards/rejected": -0.17299804091453552, "step": 4050 }, { "epoch": 0.30833491551167647, "grad_norm": 1.459250991459911, "learning_rate": 1.255529641148689e-06, "log_odds_chosen": 1.1911132335662842, "log_odds_ratio": -0.44648438692092896, "logits/chosen": -1.081640601158142, "logits/rejected": -0.9517577886581421, "logps/chosen": -0.7095702886581421, "logps/rejected": -1.5330078601837158, "loss": 0.9724, "nll_loss": 0.887890636920929, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.07099609076976776, "rewards/margins": 0.08241119235754013, "rewards/rejected": -0.15341797471046448, "step": 4060 }, { "epoch": 0.30909436111638505, "grad_norm": 1.5768719832072284, "learning_rate": 1.2539862728441536e-06, "log_odds_chosen": 1.063256859779358, "log_odds_ratio": -0.49101561307907104, "logits/chosen": -1.1251952648162842, "logits/rejected": -0.970898449420929, "logps/chosen": -0.7337890863418579, "logps/rejected": -1.478515625, "loss": 1.0064, "nll_loss": 0.950976550579071, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.07333984225988388, "rewards/margins": 0.07452698051929474, "rewards/rejected": -0.1478271484375, "step": 4070 }, { "epoch": 0.3098538067210936, "grad_norm": 1.4063104366094328, "learning_rate": 1.252448582170299e-06, "log_odds_chosen": 1.578710913658142, "log_odds_ratio": -0.3636474609375, "logits/chosen": -1.1298828125, "logits/rejected": -0.9833984375, "logps/chosen": -0.7056640386581421, "logps/rejected": -1.855078101158142, "loss": 0.9838, "nll_loss": 0.838085949420929, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.07053222507238388, "rewards/margins": 0.11501464992761612, "rewards/rejected": -0.18557128310203552, "step": 4080 }, { "epoch": 0.31061325232580217, "grad_norm": 1.638751602038585, "learning_rate": 1.2509165344015243e-06, "log_odds_chosen": 1.4951171875, "log_odds_ratio": -0.41730958223342896, "logits/chosen": -1.1218750476837158, "logits/rejected": -0.9798828363418579, "logps/chosen": -0.7759765386581421, "logps/rejected": -1.891210913658142, "loss": 0.9755, "nll_loss": 1.0133788585662842, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.07760009914636612, "rewards/margins": 0.11157684028148651, "rewards/rejected": -0.18925781548023224, "step": 4090 }, { "epoch": 0.3113726979305107, "grad_norm": 3.196446289247504, "learning_rate": 1.2493900951088486e-06, "log_odds_chosen": 1.245263695716858, "log_odds_ratio": -0.4215331971645355, "logits/chosen": -1.187109351158142, "logits/rejected": -0.9859374761581421, "logps/chosen": -0.7383788824081421, "logps/rejected": -1.6052734851837158, "loss": 0.9948, "nll_loss": 0.948046863079071, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.07386474311351776, "rewards/margins": 0.08659668266773224, "rewards/rejected": -0.16057129204273224, "step": 4100 }, { "epoch": 0.3121321435352193, "grad_norm": 1.8572096996323164, "learning_rate": 1.2478692301566601e-06, "log_odds_chosen": 1.119873046875, "log_odds_ratio": -0.4635253846645355, "logits/chosen": -1.108007788658142, "logits/rejected": -0.9662109613418579, "logps/chosen": -0.6749023199081421, "logps/rejected": -1.4230468273162842, "loss": 0.9745, "nll_loss": 0.8949218988418579, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.06744384765625, "rewards/margins": 0.07480163872241974, "rewards/rejected": -0.142333984375, "step": 4110 }, { "epoch": 0.3128915891399279, "grad_norm": 1.8025068962378696, "learning_rate": 1.2463539056995116e-06, "log_odds_chosen": 1.461816430091858, "log_odds_ratio": -0.4200195372104645, "logits/chosen": -1.075585961341858, "logits/rejected": -0.963085949420929, "logps/chosen": -0.7041015625, "logps/rejected": -1.7332031726837158, "loss": 0.9563, "nll_loss": 0.9032226800918579, "rewards/accuracies": 0.78125, "rewards/chosen": -0.07037353515625, "rewards/margins": 0.10288085788488388, "rewards/rejected": -0.17336425185203552, "step": 4120 }, { "epoch": 0.3136510347446364, "grad_norm": 1.4734617839376782, "learning_rate": 1.2448440881789541e-06, "log_odds_chosen": 1.331152319908142, "log_odds_ratio": -0.4471679627895355, "logits/chosen": -1.1892578601837158, "logits/rejected": -1.060156226158142, "logps/chosen": -0.6630859375, "logps/rejected": -1.6232421398162842, "loss": 0.9408, "nll_loss": 0.8681640625, "rewards/accuracies": 0.75, "rewards/chosen": -0.06629638373851776, "rewards/margins": 0.09604492038488388, "rewards/rejected": -0.16225585341453552, "step": 4130 }, { "epoch": 0.314410480349345, "grad_norm": 1.8707674641553664, "learning_rate": 1.2433397443204184e-06, "log_odds_chosen": 1.437353491783142, "log_odds_ratio": -0.3882812559604645, "logits/chosen": -1.154296875, "logits/rejected": -1.000390648841858, "logps/chosen": -0.6572265625, "logps/rejected": -1.6531250476837158, "loss": 0.985, "nll_loss": 0.900585949420929, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.06566162407398224, "rewards/margins": 0.09978027641773224, "rewards/rejected": -0.16538086533546448, "step": 4140 }, { "epoch": 0.3151699259540535, "grad_norm": 2.2033811593308674, "learning_rate": 1.2418408411301324e-06, "log_odds_chosen": 1.4558594226837158, "log_odds_ratio": -0.39287108182907104, "logits/chosen": -1.181054711341858, "logits/rejected": -1.0134766101837158, "logps/chosen": -0.6640625, "logps/rejected": -1.664453148841858, "loss": 0.9614, "nll_loss": 0.838085949420929, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.06636963039636612, "rewards/margins": 0.09986571967601776, "rewards/rejected": -0.1663818359375, "step": 4150 }, { "epoch": 0.3159293715587621, "grad_norm": 1.6289275654331348, "learning_rate": 1.2403473458920844e-06, "log_odds_chosen": 1.189062476158142, "log_odds_ratio": -0.463623046875, "logits/chosen": -1.13671875, "logits/rejected": -0.97265625, "logps/chosen": -0.730664074420929, "logps/rejected": -1.600195288658142, "loss": 0.9636, "nll_loss": 0.9537109136581421, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.07297363132238388, "rewards/margins": 0.08691253513097763, "rewards/rejected": -0.15998534858226776, "step": 4160 }, { "epoch": 0.3166888171634707, "grad_norm": 1.5124973792586722, "learning_rate": 1.2388592261650217e-06, "log_odds_chosen": 1.459570288658142, "log_odds_ratio": -0.38383787870407104, "logits/chosen": -1.156835913658142, "logits/rejected": -0.970507800579071, "logps/chosen": -0.681347668170929, "logps/rejected": -1.704687476158142, "loss": 0.9505, "nll_loss": 0.904296875, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.06810303032398224, "rewards/margins": 0.1024169921875, "rewards/rejected": -0.17041015625, "step": 4170 }, { "epoch": 0.3174482627681792, "grad_norm": 1.3020068228455774, "learning_rate": 1.2373764497794918e-06, "log_odds_chosen": 1.2825195789337158, "log_odds_ratio": -0.473876953125, "logits/chosen": -1.1298828125, "logits/rejected": -0.9677734375, "logps/chosen": -0.736523449420929, "logps/rejected": -1.6833984851837158, "loss": 0.9514, "nll_loss": 0.8853515386581421, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.07365722954273224, "rewards/margins": 0.09463043510913849, "rewards/rejected": -0.16826172173023224, "step": 4180 }, { "epoch": 0.3182077083728878, "grad_norm": 1.486407334595894, "learning_rate": 1.2358989848349217e-06, "log_odds_chosen": 1.3203125, "log_odds_ratio": -0.4326171875, "logits/chosen": -1.211523413658142, "logits/rejected": -1.013671875, "logps/chosen": -0.76171875, "logps/rejected": -1.689843773841858, "loss": 0.9652, "nll_loss": 0.9341796636581421, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.07619629055261612, "rewards/margins": 0.0928802490234375, "rewards/rejected": -0.16904297471046448, "step": 4190 }, { "epoch": 0.31896715397759634, "grad_norm": 1.2738348312941254, "learning_rate": 1.2344267996967353e-06, "log_odds_chosen": 1.365332007408142, "log_odds_ratio": -0.4248046875, "logits/chosen": -1.0419921875, "logits/rejected": -0.9535156488418579, "logps/chosen": -0.70751953125, "logps/rejected": -1.6740233898162842, "loss": 0.942, "nll_loss": 0.8773437738418579, "rewards/accuracies": 0.78125, "rewards/chosen": -0.07075195014476776, "rewards/margins": 0.09653625637292862, "rewards/rejected": -0.16730956733226776, "step": 4200 }, { "epoch": 0.3197265995823049, "grad_norm": 1.5338409457215532, "learning_rate": 1.2329598629935076e-06, "log_odds_chosen": 1.311254858970642, "log_odds_ratio": -0.4112304747104645, "logits/chosen": -1.1943359375, "logits/rejected": -1.0076172351837158, "logps/chosen": -0.7261718511581421, "logps/rejected": -1.689062476158142, "loss": 0.9786, "nll_loss": 0.9556640386581421, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.07258300483226776, "rewards/margins": 0.09637603908777237, "rewards/rejected": -0.1689453125, "step": 4210 }, { "epoch": 0.32048604518701346, "grad_norm": 1.5258899208723855, "learning_rate": 1.2314981436141583e-06, "log_odds_chosen": 1.3805663585662842, "log_odds_ratio": -0.4117675721645355, "logits/chosen": -1.119531273841858, "logits/rejected": -0.962109386920929, "logps/chosen": -0.6978515386581421, "logps/rejected": -1.649999976158142, "loss": 1.0108, "nll_loss": 0.943164050579071, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.06972656399011612, "rewards/margins": 0.09520874172449112, "rewards/rejected": -0.1649169921875, "step": 4220 }, { "epoch": 0.32124549079172204, "grad_norm": 1.717168877620292, "learning_rate": 1.2300416107051802e-06, "log_odds_chosen": 1.0763428211212158, "log_odds_ratio": -0.4972167909145355, "logits/chosen": -1.0603516101837158, "logits/rejected": -0.9085937738418579, "logps/chosen": -0.7447265386581421, "logps/rejected": -1.5080077648162842, "loss": 0.9681, "nll_loss": 0.962695300579071, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.07448730617761612, "rewards/margins": 0.07644806057214737, "rewards/rejected": -0.15095214545726776, "step": 4230 }, { "epoch": 0.32200493639643063, "grad_norm": 1.44873353311678, "learning_rate": 1.2285902336679024e-06, "log_odds_chosen": 1.074365258216858, "log_odds_ratio": -0.505810558795929, "logits/chosen": -1.096093773841858, "logits/rejected": -0.927539050579071, "logps/chosen": -0.7494140863418579, "logps/rejected": -1.5359375476837158, "loss": 0.9695, "nll_loss": 0.9927734136581421, "rewards/accuracies": 0.75, "rewards/chosen": -0.07491455227136612, "rewards/margins": 0.07861938327550888, "rewards/rejected": -0.1534423828125, "step": 4240 }, { "epoch": 0.32276438200113916, "grad_norm": 1.4675221743760594, "learning_rate": 1.2271439821557926e-06, "log_odds_chosen": 1.301660180091858, "log_odds_ratio": -0.40068358182907104, "logits/chosen": -1.0714843273162842, "logits/rejected": -0.931835949420929, "logps/chosen": -0.658203125, "logps/rejected": -1.5675780773162842, "loss": 0.9885, "nll_loss": 0.8681640625, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.06578369438648224, "rewards/margins": 0.09099731594324112, "rewards/rejected": -0.1568603515625, "step": 4250 }, { "epoch": 0.32352382760584775, "grad_norm": 1.879882604471872, "learning_rate": 1.225702826071791e-06, "log_odds_chosen": 1.3193359375, "log_odds_ratio": -0.3946777284145355, "logits/chosen": -1.180078148841858, "logits/rejected": -1.029882788658142, "logps/chosen": -0.6666015386581421, "logps/rejected": -1.5859375, "loss": 0.9639, "nll_loss": 0.8587890863418579, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -0.06667480617761612, "rewards/margins": 0.09202881157398224, "rewards/rejected": -0.15861816704273224, "step": 4260 }, { "epoch": 0.3242832732105563, "grad_norm": 1.4926416333348114, "learning_rate": 1.2242667355656797e-06, "log_odds_chosen": 1.128930687904358, "log_odds_ratio": -0.5132812261581421, "logits/chosen": -1.061132788658142, "logits/rejected": -0.9330078363418579, "logps/chosen": -0.788281261920929, "logps/rejected": -1.622656226158142, "loss": 0.9597, "nll_loss": 0.958984375, "rewards/accuracies": 0.6875, "rewards/chosen": -0.07884521782398224, "rewards/margins": 0.08331451565027237, "rewards/rejected": -0.16203613579273224, "step": 4270 }, { "epoch": 0.32504271881526486, "grad_norm": 1.6957435078213894, "learning_rate": 1.2228356810314862e-06, "log_odds_chosen": 1.2946288585662842, "log_odds_ratio": -0.40449219942092896, "logits/chosen": -1.201757788658142, "logits/rejected": -1.0, "logps/chosen": -0.668749988079071, "logps/rejected": -1.586328148841858, "loss": 0.9623, "nll_loss": 0.894726574420929, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.06684570014476776, "rewards/margins": 0.09185180813074112, "rewards/rejected": -0.1585693359375, "step": 4280 }, { "epoch": 0.3258021644199734, "grad_norm": 1.640142335104121, "learning_rate": 1.2214096331049186e-06, "log_odds_chosen": 1.141210913658142, "log_odds_ratio": -0.46220701932907104, "logits/chosen": -1.0695312023162842, "logits/rejected": -0.9662109613418579, "logps/chosen": -0.731249988079071, "logps/rejected": -1.5166015625, "loss": 0.9504, "nll_loss": 0.982617199420929, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.07304687798023224, "rewards/margins": 0.07852783054113388, "rewards/rejected": -0.15156249701976776, "step": 4290 }, { "epoch": 0.326561610024682, "grad_norm": 1.596037224488681, "learning_rate": 1.2199885626608373e-06, "log_odds_chosen": 1.329492211341858, "log_odds_ratio": -0.3946289122104645, "logits/chosen": -1.17578125, "logits/rejected": -0.9740234613418579, "logps/chosen": -0.6744140386581421, "logps/rejected": -1.6023437976837158, "loss": 0.951, "nll_loss": 0.8609374761581421, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.06745605170726776, "rewards/margins": 0.09288330376148224, "rewards/rejected": -0.16032715141773224, "step": 4300 }, { "epoch": 0.32732105562939057, "grad_norm": 1.7614781106502297, "learning_rate": 1.2185724408107546e-06, "log_odds_chosen": 1.1067993640899658, "log_odds_ratio": -0.512988269329071, "logits/chosen": -1.1044921875, "logits/rejected": -1.015234351158142, "logps/chosen": -0.730273425579071, "logps/rejected": -1.4880859851837158, "loss": 0.9575, "nll_loss": 0.873242199420929, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.07301025092601776, "rewards/margins": 0.07570953667163849, "rewards/rejected": -0.148681640625, "step": 4310 }, { "epoch": 0.3280805012340991, "grad_norm": 1.3450090412789395, "learning_rate": 1.2171612389003689e-06, "log_odds_chosen": 1.480566382408142, "log_odds_ratio": -0.39055174589157104, "logits/chosen": -1.269140601158142, "logits/rejected": -1.0187499523162842, "logps/chosen": -0.7060546875, "logps/rejected": -1.796484351158142, "loss": 0.9432, "nll_loss": 0.854785144329071, "rewards/accuracies": 0.8125, "rewards/chosen": -0.07053222507238388, "rewards/margins": 0.10895995795726776, "rewards/rejected": -0.17949219048023224, "step": 4320 }, { "epoch": 0.3288399468388077, "grad_norm": 1.5039995212737265, "learning_rate": 1.2157549285071297e-06, "log_odds_chosen": 1.0060546398162842, "log_odds_ratio": -0.5433593988418579, "logits/chosen": -1.1964843273162842, "logits/rejected": -1.0128905773162842, "logps/chosen": -0.773242175579071, "logps/rejected": -1.470117211341858, "loss": 0.9505, "nll_loss": 0.855175793170929, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.07728271186351776, "rewards/margins": 0.0697021484375, "rewards/rejected": -0.14687499403953552, "step": 4330 }, { "epoch": 0.3295993924435162, "grad_norm": 1.5164681565753724, "learning_rate": 1.2143534814378327e-06, "log_odds_chosen": 1.516503930091858, "log_odds_ratio": -0.3619140684604645, "logits/chosen": -1.1925780773162842, "logits/rejected": -1.0076172351837158, "logps/chosen": -0.662109375, "logps/rejected": -1.6806640625, "loss": 0.9677, "nll_loss": 0.840527355670929, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06619872897863388, "rewards/margins": 0.10183410346508026, "rewards/rejected": -0.16816405951976776, "step": 4340 }, { "epoch": 0.3303588380482248, "grad_norm": 1.3535955281211893, "learning_rate": 1.2129568697262454e-06, "log_odds_chosen": 1.1054198741912842, "log_odds_ratio": -0.489013671875, "logits/chosen": -1.095117211341858, "logits/rejected": -0.9072265625, "logps/chosen": -0.6888672113418579, "logps/rejected": -1.4140625, "loss": 0.9626, "nll_loss": 0.950390636920929, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.06889648735523224, "rewards/margins": 0.07251586765050888, "rewards/rejected": -0.141357421875, "step": 4350 }, { "epoch": 0.33111828365293333, "grad_norm": 1.662159564011864, "learning_rate": 1.2115650656307653e-06, "log_odds_chosen": 1.2506835460662842, "log_odds_ratio": -0.43706053495407104, "logits/chosen": -1.1658203601837158, "logits/rejected": -0.989062488079071, "logps/chosen": -0.741406261920929, "logps/rejected": -1.59765625, "loss": 0.961, "nll_loss": 0.8785156011581421, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.07418213039636612, "rewards/margins": 0.08564452826976776, "rewards/rejected": -0.15983887016773224, "step": 4360 }, { "epoch": 0.3318777292576419, "grad_norm": 2.028388247308431, "learning_rate": 1.210178041632103e-06, "log_odds_chosen": 1.2389647960662842, "log_odds_ratio": -0.4236816465854645, "logits/chosen": -1.1642577648162842, "logits/rejected": -0.9791015386581421, "logps/chosen": -0.677734375, "logps/rejected": -1.5271484851837158, "loss": 0.9647, "nll_loss": 0.823437511920929, "rewards/accuracies": 0.78125, "rewards/chosen": -0.06773681938648224, "rewards/margins": 0.08510742336511612, "rewards/rejected": -0.15273436903953552, "step": 4370 }, { "epoch": 0.3326371748623505, "grad_norm": 1.9918761570987538, "learning_rate": 1.2087957704309988e-06, "log_odds_chosen": 1.437597632408142, "log_odds_ratio": -0.40227049589157104, "logits/chosen": -1.2236328125, "logits/rejected": -1.0691406726837158, "logps/chosen": -0.698437511920929, "logps/rejected": -1.6951172351837158, "loss": 0.943, "nll_loss": 0.892578125, "rewards/accuracies": 0.78125, "rewards/chosen": -0.06981201469898224, "rewards/margins": 0.09970702975988388, "rewards/rejected": -0.16953125596046448, "step": 4380 }, { "epoch": 0.33339662046705903, "grad_norm": 1.591642812972933, "learning_rate": 1.2074182249459642e-06, "log_odds_chosen": 1.3076171875, "log_odds_ratio": -0.4637695252895355, "logits/chosen": -1.138085961341858, "logits/rejected": -0.941210925579071, "logps/chosen": -0.70458984375, "logps/rejected": -1.6015625, "loss": 0.952, "nll_loss": 0.962695300579071, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.07044677436351776, "rewards/margins": 0.0897216796875, "rewards/rejected": -0.16005858778953552, "step": 4390 }, { "epoch": 0.3341560660717676, "grad_norm": 1.7794551361604825, "learning_rate": 1.2060453783110545e-06, "log_odds_chosen": 1.200231909751892, "log_odds_ratio": -0.4727539122104645, "logits/chosen": -1.138671875, "logits/rejected": -0.9613281488418579, "logps/chosen": -0.7499023675918579, "logps/rejected": -1.6189453601837158, "loss": 0.9845, "nll_loss": 0.896289050579071, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.07503662258386612, "rewards/margins": 0.08698578178882599, "rewards/rejected": -0.16201171278953552, "step": 4400 }, { "epoch": 0.33491551167647615, "grad_norm": 1.478759963515966, "learning_rate": 1.2046772038736682e-06, "log_odds_chosen": 1.3349609375, "log_odds_ratio": -0.3974609375, "logits/chosen": -1.0451171398162842, "logits/rejected": -0.925976574420929, "logps/chosen": -0.716796875, "logps/rejected": -1.658203125, "loss": 0.9651, "nll_loss": 0.985546886920929, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.0716552734375, "rewards/margins": 0.09410400688648224, "rewards/rejected": -0.16582031548023224, "step": 4410 }, { "epoch": 0.33567495728118474, "grad_norm": 2.699167733550373, "learning_rate": 1.2033136751923736e-06, "log_odds_chosen": 1.349707007408142, "log_odds_ratio": -0.42304688692092896, "logits/chosen": -1.141210913658142, "logits/rejected": -0.979296863079071, "logps/chosen": -0.690722644329071, "logps/rejected": -1.6388671398162842, "loss": 0.9776, "nll_loss": 0.9291015863418579, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.06903076171875, "rewards/margins": 0.09488067775964737, "rewards/rejected": -0.163818359375, "step": 4420 }, { "epoch": 0.3364344028858933, "grad_norm": 1.535292574893409, "learning_rate": 1.201954766034762e-06, "log_odds_chosen": 1.0596191883087158, "log_odds_ratio": -0.46894532442092896, "logits/chosen": -1.1671874523162842, "logits/rejected": -0.943554699420929, "logps/chosen": -0.717968761920929, "logps/rejected": -1.4814453125, "loss": 0.9583, "nll_loss": 0.93359375, "rewards/accuracies": 0.75, "rewards/chosen": -0.07183837890625, "rewards/margins": 0.076324462890625, "rewards/rejected": -0.14833983778953552, "step": 4430 }, { "epoch": 0.33719384849060186, "grad_norm": 1.5530894559688115, "learning_rate": 1.2006004503753285e-06, "log_odds_chosen": 1.1879394054412842, "log_odds_ratio": -0.4884277284145355, "logits/chosen": -1.098046898841858, "logits/rejected": -0.9853515625, "logps/chosen": -0.72412109375, "logps/rejected": -1.534765601158142, "loss": 0.9673, "nll_loss": 0.8721679449081421, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.07237549126148224, "rewards/margins": 0.0810546875, "rewards/rejected": -0.15346679091453552, "step": 4440 }, { "epoch": 0.33795329409531044, "grad_norm": 1.414256223792457, "learning_rate": 1.1992507023933782e-06, "log_odds_chosen": 1.298242211341858, "log_odds_ratio": -0.408447265625, "logits/chosen": -1.110742211341858, "logits/rejected": -1.0087890625, "logps/chosen": -0.7255859375, "logps/rejected": -1.6162109375, "loss": 0.9496, "nll_loss": 0.9085937738418579, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.07253418117761612, "rewards/margins": 0.08906249701976776, "rewards/rejected": -0.16166992485523224, "step": 4450 }, { "epoch": 0.33871273970001897, "grad_norm": 1.3638843812061403, "learning_rate": 1.1979054964709597e-06, "log_odds_chosen": 1.6160156726837158, "log_odds_ratio": -0.3416503965854645, "logits/chosen": -1.1896483898162842, "logits/rejected": -0.959179699420929, "logps/chosen": -0.6449218988418579, "logps/rejected": -1.7431640625, "loss": 0.9515, "nll_loss": 0.856249988079071, "rewards/accuracies": 0.875, "rewards/chosen": -0.06455077975988388, "rewards/margins": 0.10979614406824112, "rewards/rejected": -0.17451171576976776, "step": 4460 }, { "epoch": 0.33947218530472756, "grad_norm": 1.408375350102226, "learning_rate": 1.1965648071908207e-06, "log_odds_chosen": 1.227148413658142, "log_odds_ratio": -0.44501954317092896, "logits/chosen": -1.1083984375, "logits/rejected": -1.015039086341858, "logps/chosen": -0.7642577886581421, "logps/rejected": -1.646875023841858, "loss": 0.9428, "nll_loss": 0.936328113079071, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.07644043117761612, "rewards/margins": 0.0882568359375, "rewards/rejected": -0.16459961235523224, "step": 4470 }, { "epoch": 0.3402316309094361, "grad_norm": 1.6683914528397188, "learning_rate": 1.1952286093343935e-06, "log_odds_chosen": 1.34521484375, "log_odds_ratio": -0.4500976502895355, "logits/chosen": -1.140625, "logits/rejected": -0.947265625, "logps/chosen": -0.670117199420929, "logps/rejected": -1.5791015625, "loss": 0.9806, "nll_loss": 0.856640636920929, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.06699218600988388, "rewards/margins": 0.09098510444164276, "rewards/rejected": -0.157958984375, "step": 4480 }, { "epoch": 0.3409910765141447, "grad_norm": 1.668067220025842, "learning_rate": 1.1938968778798005e-06, "log_odds_chosen": 1.0461914539337158, "log_odds_ratio": -0.5038086175918579, "logits/chosen": -1.0851562023162842, "logits/rejected": -0.941210925579071, "logps/chosen": -0.696093738079071, "logps/rejected": -1.4304687976837158, "loss": 0.9606, "nll_loss": 0.8882812261581421, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.06966552883386612, "rewards/margins": 0.07346191257238388, "rewards/rejected": -0.14313964545726776, "step": 4490 }, { "epoch": 0.34175052211885326, "grad_norm": 1.439897289989784, "learning_rate": 1.1925695879998878e-06, "log_odds_chosen": 1.329980492591858, "log_odds_ratio": -0.42236328125, "logits/chosen": -1.2126953601837158, "logits/rejected": -1.0009765625, "logps/chosen": -0.739453136920929, "logps/rejected": -1.683203101158142, "loss": 0.9588, "nll_loss": 0.966015636920929, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.07398681342601776, "rewards/margins": 0.09422607719898224, "rewards/rejected": -0.16818848252296448, "step": 4500 }, { "epoch": 0.3425099677235618, "grad_norm": 1.457321594290042, "learning_rate": 1.1912467150602794e-06, "log_odds_chosen": 1.315332055091858, "log_odds_ratio": -0.41997069120407104, "logits/chosen": -1.1335937976837158, "logits/rejected": -0.9505859613418579, "logps/chosen": -0.693164050579071, "logps/rejected": -1.6101562976837158, "loss": 0.9766, "nll_loss": 0.949023425579071, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.06927490234375, "rewards/margins": 0.09157714992761612, "rewards/rejected": -0.1610107421875, "step": 4510 }, { "epoch": 0.3432694133282704, "grad_norm": 1.4015846074015186, "learning_rate": 1.189928234617459e-06, "log_odds_chosen": 1.126953125, "log_odds_ratio": -0.4749999940395355, "logits/chosen": -1.1853516101837158, "logits/rejected": -0.982617199420929, "logps/chosen": -0.75390625, "logps/rejected": -1.533203125, "loss": 0.9651, "nll_loss": 0.927539050579071, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.07541503757238388, "rewards/margins": 0.077880859375, "rewards/rejected": -0.15339355170726776, "step": 4520 }, { "epoch": 0.3440288589329789, "grad_norm": 1.701593914915817, "learning_rate": 1.1886141224168716e-06, "log_odds_chosen": 1.192626953125, "log_odds_ratio": -0.4251464903354645, "logits/chosen": -1.041406273841858, "logits/rejected": -0.907031238079071, "logps/chosen": -0.698046863079071, "logps/rejected": -1.5066406726837158, "loss": 0.9442, "nll_loss": 0.9019531011581421, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.06978759914636612, "rewards/margins": 0.08087310940027237, "rewards/rejected": -0.15078124403953552, "step": 4530 }, { "epoch": 0.3447883045376875, "grad_norm": 1.3218747798243968, "learning_rate": 1.1873043543910495e-06, "log_odds_chosen": 1.302209496498108, "log_odds_ratio": -0.38593751192092896, "logits/chosen": -1.0929687023162842, "logits/rejected": -0.9693359136581421, "logps/chosen": -0.6563476324081421, "logps/rejected": -1.5263671875, "loss": 0.9545, "nll_loss": 0.9251953363418579, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.06569824367761612, "rewards/margins": 0.08690185844898224, "rewards/rejected": -0.15249022841453552, "step": 4540 }, { "epoch": 0.345547750142396, "grad_norm": 1.5553797297238916, "learning_rate": 1.1859989066577617e-06, "log_odds_chosen": 1.340429663658142, "log_odds_ratio": -0.4327148497104645, "logits/chosen": -1.108789086341858, "logits/rejected": -0.936718761920929, "logps/chosen": -0.6771484613418579, "logps/rejected": -1.6281249523162842, "loss": 0.927, "nll_loss": 0.8841797113418579, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.06773681938648224, "rewards/margins": 0.095184326171875, "rewards/rejected": -0.16289062798023224, "step": 4550 }, { "epoch": 0.3463071957471046, "grad_norm": 1.4426519603802264, "learning_rate": 1.1846977555181846e-06, "log_odds_chosen": 1.1991698741912842, "log_odds_ratio": -0.4817871153354645, "logits/chosen": -1.099023461341858, "logits/rejected": -1.009765625, "logps/chosen": -0.745312511920929, "logps/rejected": -1.593164086341858, "loss": 0.9516, "nll_loss": 0.953906238079071, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.07452392578125, "rewards/margins": 0.084747314453125, "rewards/rejected": -0.15932616591453552, "step": 4560 }, { "epoch": 0.3470666413518132, "grad_norm": 1.966185853977681, "learning_rate": 1.1834008774550946e-06, "log_odds_chosen": 1.123046875, "log_odds_ratio": -0.5296386480331421, "logits/chosen": -1.1863281726837158, "logits/rejected": -1.078710913658142, "logps/chosen": -0.740429699420929, "logps/rejected": -1.55859375, "loss": 0.9543, "nll_loss": 0.8812500238418579, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.07403564453125, "rewards/margins": 0.08175353705883026, "rewards/rejected": -0.15585938096046448, "step": 4570 }, { "epoch": 0.34782608695652173, "grad_norm": 1.4426445980074782, "learning_rate": 1.1821082491310835e-06, "log_odds_chosen": 1.3473632335662842, "log_odds_ratio": -0.4590820372104645, "logits/chosen": -1.177734375, "logits/rejected": -1.0255858898162842, "logps/chosen": -0.6630859375, "logps/rejected": -1.596093773841858, "loss": 0.9682, "nll_loss": 0.940234363079071, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.0662841796875, "rewards/margins": 0.09342651069164276, "rewards/rejected": -0.15969237685203552, "step": 4580 }, { "epoch": 0.3485855325612303, "grad_norm": 1.5900866348290512, "learning_rate": 1.1808198473867937e-06, "log_odds_chosen": 1.279052734375, "log_odds_ratio": -0.43925780057907104, "logits/chosen": -1.0849609375, "logits/rejected": -0.9306640625, "logps/chosen": -0.7041991949081421, "logps/rejected": -1.6046874523162842, "loss": 0.9367, "nll_loss": 0.905078113079071, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.07042236626148224, "rewards/margins": 0.09006652981042862, "rewards/rejected": -0.16042479872703552, "step": 4590 }, { "epoch": 0.34934497816593885, "grad_norm": 1.6682160326268052, "learning_rate": 1.179535649239177e-06, "log_odds_chosen": 1.4221680164337158, "log_odds_ratio": -0.37275391817092896, "logits/chosen": -1.169921875, "logits/rejected": -0.977343738079071, "logps/chosen": -0.6656249761581421, "logps/rejected": -1.669921875, "loss": 0.9541, "nll_loss": 0.897265613079071, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.06656493991613388, "rewards/margins": 0.10040283203125, "rewards/rejected": -0.16701659560203552, "step": 4600 }, { "epoch": 0.35010442377064743, "grad_norm": 1.5103961307723972, "learning_rate": 1.178255631879771e-06, "log_odds_chosen": 1.4075195789337158, "log_odds_ratio": -0.42631834745407104, "logits/chosen": -1.256250023841858, "logits/rejected": -1.0544922351837158, "logps/chosen": -0.7359374761581421, "logps/rejected": -1.762109398841858, "loss": 0.941, "nll_loss": 0.864062488079071, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.0736083984375, "rewards/margins": 0.10262451320886612, "rewards/rejected": -0.17634277045726776, "step": 4610 }, { "epoch": 0.35086386937535596, "grad_norm": 1.6551577620770679, "learning_rate": 1.1769797726729992e-06, "log_odds_chosen": 1.1349608898162842, "log_odds_ratio": -0.48286134004592896, "logits/chosen": -1.2537109851837158, "logits/rejected": -1.0810546875, "logps/chosen": -0.704785168170929, "logps/rejected": -1.528906226158142, "loss": 0.9645, "nll_loss": 0.8558593988418579, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.07047118991613388, "rewards/margins": 0.08240509033203125, "rewards/rejected": -0.15275879204273224, "step": 4620 }, { "epoch": 0.35162331498006455, "grad_norm": 1.6620549855853974, "learning_rate": 1.1757080491544881e-06, "log_odds_chosen": 1.217871069908142, "log_odds_ratio": -0.4336914122104645, "logits/chosen": -1.155664086341858, "logits/rejected": -0.9730468988418579, "logps/chosen": -0.717968761920929, "logps/rejected": -1.5447266101837158, "loss": 0.9765, "nll_loss": 0.939257800579071, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.07182617485523224, "rewards/margins": 0.08263244479894638, "rewards/rejected": -0.15446777641773224, "step": 4630 }, { "epoch": 0.35238276058477314, "grad_norm": 1.60092417583095, "learning_rate": 1.1744404390294068e-06, "log_odds_chosen": 1.088537573814392, "log_odds_ratio": -0.4754394590854645, "logits/chosen": -1.0505859851837158, "logits/rejected": -0.927734375, "logps/chosen": -0.709179699420929, "logps/rejected": -1.479882836341858, "loss": 0.963, "nll_loss": 0.932812511920929, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.0709228515625, "rewards/margins": 0.07714233547449112, "rewards/rejected": -0.14792481064796448, "step": 4640 }, { "epoch": 0.35314220618948167, "grad_norm": 1.5228705957805935, "learning_rate": 1.1731769201708264e-06, "log_odds_chosen": 1.4505615234375, "log_odds_ratio": -0.375244140625, "logits/chosen": -1.1648437976837158, "logits/rejected": -0.9613281488418579, "logps/chosen": -0.7017577886581421, "logps/rejected": -1.743749976158142, "loss": 0.953, "nll_loss": 0.888476550579071, "rewards/accuracies": 0.84375, "rewards/chosen": -0.07011719048023224, "rewards/margins": 0.10416565090417862, "rewards/rejected": -0.17446288466453552, "step": 4650 }, { "epoch": 0.35390165179419025, "grad_norm": 1.4890335733381965, "learning_rate": 1.1719174706180952e-06, "log_odds_chosen": 1.2297852039337158, "log_odds_ratio": -0.4527343809604645, "logits/chosen": -1.178125023841858, "logits/rejected": -1.0185546875, "logps/chosen": -0.721972644329071, "logps/rejected": -1.623046875, "loss": 0.928, "nll_loss": 0.892382800579071, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.07222900539636612, "rewards/margins": 0.09012451022863388, "rewards/rejected": -0.162353515625, "step": 4660 }, { "epoch": 0.3546610973988988, "grad_norm": 1.9246040073431538, "learning_rate": 1.1706620685752386e-06, "log_odds_chosen": 1.0772583484649658, "log_odds_ratio": -0.49604493379592896, "logits/chosen": -1.1882812976837158, "logits/rejected": -1.056249976158142, "logps/chosen": -0.739453136920929, "logps/rejected": -1.4675781726837158, "loss": 0.9726, "nll_loss": 0.907031238079071, "rewards/accuracies": 0.71875, "rewards/chosen": -0.073974609375, "rewards/margins": 0.07270050048828125, "rewards/rejected": -0.14663085341453552, "step": 4670 }, { "epoch": 0.35542054300360737, "grad_norm": 1.7624214604627615, "learning_rate": 1.1694106924093723e-06, "log_odds_chosen": 1.167382836341858, "log_odds_ratio": -0.47270506620407104, "logits/chosen": -1.0593750476837158, "logits/rejected": -0.9267578125, "logps/chosen": -0.644824206829071, "logps/rejected": -1.433203101158142, "loss": 0.9375, "nll_loss": 0.9537109136581421, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.06448974460363388, "rewards/margins": 0.0787353515625, "rewards/rejected": -0.14321288466453552, "step": 4680 }, { "epoch": 0.35617998860831596, "grad_norm": 1.5466431602406214, "learning_rate": 1.1681633206491381e-06, "log_odds_chosen": 1.193359375, "log_odds_ratio": -0.4615234434604645, "logits/chosen": -1.113671898841858, "logits/rejected": -0.9619140625, "logps/chosen": -0.6957031488418579, "logps/rejected": -1.485937476158142, "loss": 0.9327, "nll_loss": 0.956250011920929, "rewards/accuracies": 0.75, "rewards/chosen": -0.06953124701976776, "rewards/margins": 0.07911376655101776, "rewards/rejected": -0.14885254204273224, "step": 4690 }, { "epoch": 0.3569394342130245, "grad_norm": 1.4759578896537362, "learning_rate": 1.1669199319831564e-06, "log_odds_chosen": 1.275720238685608, "log_odds_ratio": -0.4474121034145355, "logits/chosen": -1.149804711341858, "logits/rejected": -0.970703125, "logps/chosen": -0.732421875, "logps/rejected": -1.646875023841858, "loss": 0.9451, "nll_loss": 0.853222668170929, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.07318115234375, "rewards/margins": 0.09153594821691513, "rewards/rejected": -0.16459961235523224, "step": 4700 }, { "epoch": 0.3576988798177331, "grad_norm": 1.5810687915960624, "learning_rate": 1.1656805052584958e-06, "log_odds_chosen": 1.132959008216858, "log_odds_ratio": -0.47124022245407104, "logits/chosen": -1.176171898841858, "logits/rejected": -0.9994140863418579, "logps/chosen": -0.75830078125, "logps/rejected": -1.537109375, "loss": 0.9703, "nll_loss": 1.023046851158142, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.0758056640625, "rewards/margins": 0.07791443169116974, "rewards/rejected": -0.15371093153953552, "step": 4710 }, { "epoch": 0.3584583254224416, "grad_norm": 1.6255352167606099, "learning_rate": 1.164445019479164e-06, "log_odds_chosen": 1.21533203125, "log_odds_ratio": -0.45405274629592896, "logits/chosen": -1.1896483898162842, "logits/rejected": -1.061132788658142, "logps/chosen": -0.7144531011581421, "logps/rejected": -1.5654296875, "loss": 0.9481, "nll_loss": 0.9422851800918579, "rewards/accuracies": 0.78125, "rewards/chosen": -0.07145996391773224, "rewards/margins": 0.08487548679113388, "rewards/rejected": -0.15639647841453552, "step": 4720 }, { "epoch": 0.3592177710271502, "grad_norm": 1.4223444696738041, "learning_rate": 1.1632134538046105e-06, "log_odds_chosen": 1.257226586341858, "log_odds_ratio": -0.43437498807907104, "logits/chosen": -1.179101586341858, "logits/rejected": -0.9671875238418579, "logps/chosen": -0.70703125, "logps/rejected": -1.62109375, "loss": 0.9253, "nll_loss": 0.9828125238418579, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.0706787109375, "rewards/margins": 0.09133605659008026, "rewards/rejected": -0.16196289658546448, "step": 4730 }, { "epoch": 0.3599772166318587, "grad_norm": 1.6660837768396182, "learning_rate": 1.1619857875482536e-06, "log_odds_chosen": 1.235742211341858, "log_odds_ratio": -0.48579102754592896, "logits/chosen": -1.168554663658142, "logits/rejected": -0.9505859613418579, "logps/chosen": -0.74658203125, "logps/rejected": -1.6533203125, "loss": 0.961, "nll_loss": 0.9427734613418579, "rewards/accuracies": 0.75, "rewards/chosen": -0.07467041164636612, "rewards/margins": 0.09065552055835724, "rewards/rejected": -0.165283203125, "step": 4740 }, { "epoch": 0.3607366622365673, "grad_norm": 1.6235208131160974, "learning_rate": 1.1607620001760185e-06, "log_odds_chosen": 1.3660156726837158, "log_odds_ratio": -0.3797363340854645, "logits/chosen": -1.193945288658142, "logits/rejected": -0.9873046875, "logps/chosen": -0.6846679449081421, "logps/rejected": -1.6179687976837158, "loss": 0.9511, "nll_loss": 0.8755859136581421, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.06845702975988388, "rewards/margins": 0.09342040866613388, "rewards/rejected": -0.16176757216453552, "step": 4750 }, { "epoch": 0.3614961078412759, "grad_norm": 1.4543149544377185, "learning_rate": 1.1595420713048968e-06, "log_odds_chosen": 1.1067383289337158, "log_odds_ratio": -0.469970703125, "logits/chosen": -1.2265625, "logits/rejected": -1.0625, "logps/chosen": -0.67724609375, "logps/rejected": -1.4396483898162842, "loss": 0.9441, "nll_loss": 0.87890625, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.06771240383386612, "rewards/margins": 0.07618407905101776, "rewards/rejected": -0.14401856064796448, "step": 4760 }, { "epoch": 0.3622555534459844, "grad_norm": 1.6261619433320085, "learning_rate": 1.1583259807015182e-06, "log_odds_chosen": 1.2234375476837158, "log_odds_ratio": -0.43339842557907104, "logits/chosen": -1.1238281726837158, "logits/rejected": -0.942578136920929, "logps/chosen": -0.7392578125, "logps/rejected": -1.624414086341858, "loss": 0.9297, "nll_loss": 1.0236327648162842, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.07396240532398224, "rewards/margins": 0.08838500827550888, "rewards/rejected": -0.16232910752296448, "step": 4770 }, { "epoch": 0.363014999050693, "grad_norm": 1.6273249242325887, "learning_rate": 1.1571137082807434e-06, "log_odds_chosen": 1.1051757335662842, "log_odds_ratio": -0.44877928495407104, "logits/chosen": -1.166406273841858, "logits/rejected": -1.080468773841858, "logps/chosen": -0.7197265625, "logps/rejected": -1.4679687023162842, "loss": 0.9385, "nll_loss": 0.933398425579071, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.07199706882238388, "rewards/margins": 0.07475586235523224, "rewards/rejected": -0.14670410752296448, "step": 4780 }, { "epoch": 0.36377444465540154, "grad_norm": 1.7390223773893867, "learning_rate": 1.155905234104269e-06, "log_odds_chosen": 1.408300757408142, "log_odds_ratio": -0.40971678495407104, "logits/chosen": -1.1375000476837158, "logits/rejected": -0.9976562261581421, "logps/chosen": -0.68359375, "logps/rejected": -1.6892578601837158, "loss": 0.9362, "nll_loss": 0.8853515386581421, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.06832275539636612, "rewards/margins": 0.10059203952550888, "rewards/rejected": -0.16892090439796448, "step": 4790 }, { "epoch": 0.3645338902601101, "grad_norm": 1.446753875092688, "learning_rate": 1.1547005383792516e-06, "log_odds_chosen": 1.1369140148162842, "log_odds_ratio": -0.4607910215854645, "logits/chosen": -1.2023437023162842, "logits/rejected": -0.998828113079071, "logps/chosen": -0.722460925579071, "logps/rejected": -1.512109398841858, "loss": 0.9348, "nll_loss": 0.882617175579071, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.07230224460363388, "rewards/margins": 0.07890625298023224, "rewards/rejected": -0.1512451171875, "step": 4800 }, { "epoch": 0.36529333586481866, "grad_norm": 2.3061320607527134, "learning_rate": 1.1534996014569446e-06, "log_odds_chosen": 1.3888671398162842, "log_odds_ratio": -0.405029296875, "logits/chosen": -1.163476586341858, "logits/rejected": -0.974804699420929, "logps/chosen": -0.699414074420929, "logps/rejected": -1.6941406726837158, "loss": 0.9629, "nll_loss": 0.85400390625, "rewards/accuracies": 0.78125, "rewards/chosen": -0.06989745795726776, "rewards/margins": 0.09962157905101776, "rewards/rejected": -0.16943359375, "step": 4810 }, { "epoch": 0.36605278146952724, "grad_norm": 1.5479083912086051, "learning_rate": 1.1523024038313547e-06, "log_odds_chosen": 1.215478539466858, "log_odds_ratio": -0.49238282442092896, "logits/chosen": -1.215234398841858, "logits/rejected": -1.06640625, "logps/chosen": -0.716503918170929, "logps/rejected": -1.597265601158142, "loss": 0.943, "nll_loss": 0.8203125, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.07177734375, "rewards/margins": 0.08805236965417862, "rewards/rejected": -0.15976563096046448, "step": 4820 }, { "epoch": 0.36681222707423583, "grad_norm": 1.3063283747857406, "learning_rate": 1.1511089261379083e-06, "log_odds_chosen": 1.141015648841858, "log_odds_ratio": -0.45112305879592896, "logits/chosen": -1.12890625, "logits/rejected": -0.9736328125, "logps/chosen": -0.676562488079071, "logps/rejected": -1.450781226158142, "loss": 0.9185, "nll_loss": 0.947070300579071, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.06770019233226776, "rewards/margins": 0.07735595852136612, "rewards/rejected": -0.14501953125, "step": 4830 }, { "epoch": 0.36757167267894436, "grad_norm": 1.7877584901110606, "learning_rate": 1.149919149152138e-06, "log_odds_chosen": 0.9994140863418579, "log_odds_ratio": -0.50732421875, "logits/chosen": -1.259374976158142, "logits/rejected": -1.101953148841858, "logps/chosen": -0.7210937738418579, "logps/rejected": -1.4314453601837158, "loss": 0.9344, "nll_loss": 0.8267577886581421, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.07210693508386612, "rewards/margins": 0.0710906982421875, "rewards/rejected": -0.14309081435203552, "step": 4840 }, { "epoch": 0.36833111828365295, "grad_norm": 1.762510919442105, "learning_rate": 1.148733053788381e-06, "log_odds_chosen": 1.20123291015625, "log_odds_ratio": -0.44794923067092896, "logits/chosen": -1.078125, "logits/rejected": -0.928515613079071, "logps/chosen": -0.7210937738418579, "logps/rejected": -1.592187523841858, "loss": 0.9406, "nll_loss": 0.9507812261581421, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.07207031548023224, "rewards/margins": 0.08710022270679474, "rewards/rejected": -0.1591796875, "step": 4850 }, { "epoch": 0.3690905638883615, "grad_norm": 1.5479072377631165, "learning_rate": 1.1475506210984938e-06, "log_odds_chosen": 1.2771728038787842, "log_odds_ratio": -0.466796875, "logits/chosen": -1.1865234375, "logits/rejected": -1.022070288658142, "logps/chosen": -0.70947265625, "logps/rejected": -1.586523413658142, "loss": 0.9376, "nll_loss": 0.9224609136581421, "rewards/accuracies": 0.71875, "rewards/chosen": -0.07097168266773224, "rewards/margins": 0.08756408840417862, "rewards/rejected": -0.15854492783546448, "step": 4860 }, { "epoch": 0.36985000949307006, "grad_norm": 2.181063013910603, "learning_rate": 1.1463718322705807e-06, "log_odds_chosen": 1.5302734375, "log_odds_ratio": -0.3770507872104645, "logits/chosen": -1.135156273841858, "logits/rejected": -0.969921886920929, "logps/chosen": -0.719531238079071, "logps/rejected": -1.8269531726837158, "loss": 0.9405, "nll_loss": 0.916796863079071, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.07192382961511612, "rewards/margins": 0.11070556938648224, "rewards/rejected": -0.1826171875, "step": 4870 }, { "epoch": 0.3706094550977786, "grad_norm": 1.4851113910055338, "learning_rate": 1.1451966686277364e-06, "log_odds_chosen": 1.1806151866912842, "log_odds_ratio": -0.43964844942092896, "logits/chosen": -1.16015625, "logits/rejected": -0.9925781488418579, "logps/chosen": -0.667773425579071, "logps/rejected": -1.4943358898162842, "loss": 0.9469, "nll_loss": 0.8753906488418579, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.06673584133386612, "rewards/margins": 0.08252258598804474, "rewards/rejected": -0.14936523139476776, "step": 4880 }, { "epoch": 0.3713689007024872, "grad_norm": 1.6834093091088522, "learning_rate": 1.1440251116268034e-06, "log_odds_chosen": 1.026220679283142, "log_odds_ratio": -0.4849609434604645, "logits/chosen": -1.18359375, "logits/rejected": -1.037500023841858, "logps/chosen": -0.706835925579071, "logps/rejected": -1.430078148841858, "loss": 0.9481, "nll_loss": 0.947070300579071, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.07065429538488388, "rewards/margins": 0.07236786186695099, "rewards/rejected": -0.14306640625, "step": 4890 }, { "epoch": 0.37212834630719577, "grad_norm": 1.8438799621552189, "learning_rate": 1.1428571428571428e-06, "log_odds_chosen": 1.099829077720642, "log_odds_ratio": -0.47883301973342896, "logits/chosen": -1.153906226158142, "logits/rejected": -1.0275390148162842, "logps/chosen": -0.7623046636581421, "logps/rejected": -1.5166015625, "loss": 0.9512, "nll_loss": 0.8984375, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.07631836086511612, "rewards/margins": 0.07546691596508026, "rewards/rejected": -0.15158692002296448, "step": 4900 }, { "epoch": 0.3728877919119043, "grad_norm": 1.5731590346074515, "learning_rate": 1.14169274403942e-06, "log_odds_chosen": 1.2581055164337158, "log_odds_ratio": -0.451904296875, "logits/chosen": -1.129296898841858, "logits/rejected": -0.992968738079071, "logps/chosen": -0.7476562261581421, "logps/rejected": -1.666015625, "loss": 0.9649, "nll_loss": 0.908007800579071, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.07473144680261612, "rewards/margins": 0.09201355278491974, "rewards/rejected": -0.16672363877296448, "step": 4910 }, { "epoch": 0.3736472375166129, "grad_norm": 2.1441226045268422, "learning_rate": 1.140531897024402e-06, "log_odds_chosen": 1.388281226158142, "log_odds_ratio": -0.39763182401657104, "logits/chosen": -1.220312476158142, "logits/rejected": -1.0398437976837158, "logps/chosen": -0.6805664300918579, "logps/rejected": -1.640625, "loss": 0.9555, "nll_loss": 0.892578125, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.06805419921875, "rewards/margins": 0.09611816704273224, "rewards/rejected": -0.16408690810203552, "step": 4920 }, { "epoch": 0.3744066831213214, "grad_norm": 1.9477788219991834, "learning_rate": 1.13937458379177e-06, "log_odds_chosen": 1.20166015625, "log_odds_ratio": -0.4423828125, "logits/chosen": -1.1105468273162842, "logits/rejected": -0.9271484613418579, "logps/chosen": -0.758007824420929, "logps/rejected": -1.555078148841858, "loss": 0.9334, "nll_loss": 0.914843738079071, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.07576904445886612, "rewards/margins": 0.07977294921875, "rewards/rejected": -0.15549317002296448, "step": 4930 }, { "epoch": 0.37516612872603, "grad_norm": 1.7685899086781558, "learning_rate": 1.1382207864489444e-06, "log_odds_chosen": 1.3769409656524658, "log_odds_ratio": -0.41053467988967896, "logits/chosen": -1.169335961341858, "logits/rejected": -0.971484363079071, "logps/chosen": -0.688183605670929, "logps/rejected": -1.6378905773162842, "loss": 0.943, "nll_loss": 0.9007812738418579, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.06882324069738388, "rewards/margins": 0.09494171291589737, "rewards/rejected": -0.16376952826976776, "step": 4940 }, { "epoch": 0.3759255743307386, "grad_norm": 1.7351815548560279, "learning_rate": 1.1370704872299223e-06, "log_odds_chosen": 1.39453125, "log_odds_ratio": -0.41552734375, "logits/chosen": -1.1533203125, "logits/rejected": -0.9150390625, "logps/chosen": -0.7037109136581421, "logps/rejected": -1.689062476158142, "loss": 0.9215, "nll_loss": 0.8550781011581421, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.0704345703125, "rewards/margins": 0.09841308742761612, "rewards/rejected": -0.16875000298023224, "step": 4950 }, { "epoch": 0.3766850199354471, "grad_norm": 1.593554351981433, "learning_rate": 1.1359236684941295e-06, "log_odds_chosen": 1.3957030773162842, "log_odds_ratio": -0.3798828125, "logits/chosen": -1.142968773841858, "logits/rejected": -0.956250011920929, "logps/chosen": -0.73486328125, "logps/rejected": -1.728906273841858, "loss": 0.9734, "nll_loss": 0.980273425579071, "rewards/accuracies": 0.84375, "rewards/chosen": -0.07362060248851776, "rewards/margins": 0.09926757961511612, "rewards/rejected": -0.17270508408546448, "step": 4960 }, { "epoch": 0.3774444655401557, "grad_norm": 1.4804908377282058, "learning_rate": 1.1347803127252839e-06, "log_odds_chosen": 1.5822265148162842, "log_odds_ratio": -0.3753418028354645, "logits/chosen": -1.141992211341858, "logits/rejected": -0.999804675579071, "logps/chosen": -0.6552734375, "logps/rejected": -1.796484351158142, "loss": 0.9387, "nll_loss": 0.8773437738418579, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.06556396186351776, "rewards/margins": 0.11414794623851776, "rewards/rejected": -0.17973633110523224, "step": 4970 }, { "epoch": 0.37820391114486424, "grad_norm": 1.7363577267442125, "learning_rate": 1.1336404025302715e-06, "log_odds_chosen": 1.176904320716858, "log_odds_ratio": -0.4439941346645355, "logits/chosen": -1.105078101158142, "logits/rejected": -0.9593750238418579, "logps/chosen": -0.7603515386581421, "logps/rejected": -1.6023437976837158, "loss": 0.945, "nll_loss": 0.9560546875, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.07609863579273224, "rewards/margins": 0.08417968451976776, "rewards/rejected": -0.16035155951976776, "step": 4980 }, { "epoch": 0.3789633567495728, "grad_norm": 2.0958208432243826, "learning_rate": 1.1325039206380352e-06, "log_odds_chosen": 1.1557128429412842, "log_odds_ratio": -0.4334472715854645, "logits/chosen": -1.1201171875, "logits/rejected": -0.9716796875, "logps/chosen": -0.737988293170929, "logps/rejected": -1.486328125, "loss": 0.9648, "nll_loss": 0.916796863079071, "rewards/accuracies": 0.78125, "rewards/chosen": -0.07380370795726776, "rewards/margins": 0.07482452690601349, "rewards/rejected": -0.14877930283546448, "step": 4990 }, { "epoch": 0.37972280235428135, "grad_norm": 1.5427495370501891, "learning_rate": 1.131370849898476e-06, "log_odds_chosen": 1.327246069908142, "log_odds_ratio": -0.4297851622104645, "logits/chosen": -1.1953125, "logits/rejected": -1.019921898841858, "logps/chosen": -0.771289050579071, "logps/rejected": -1.734765648841858, "loss": 0.9792, "nll_loss": 0.9002929925918579, "rewards/accuracies": 0.78125, "rewards/chosen": -0.07713623344898224, "rewards/margins": 0.09630127251148224, "rewards/rejected": -0.17343750596046448, "step": 5000 }, { "epoch": 0.37972280235428135, "eval_log_odds_chosen": 1.0117181539535522, "eval_log_odds_ratio": -0.49931100010871887, "eval_logits/chosen": -1.1310417652130127, "eval_logits/rejected": -0.9617669582366943, "eval_logps/chosen": -0.7886484265327454, "eval_logps/rejected": -1.522111415863037, "eval_loss": 1.171636939048767, "eval_nll_loss": 1.1251970529556274, "eval_rewards/accuracies": 0.7246080636978149, "eval_rewards/chosen": -0.07886141538619995, "eval_rewards/margins": 0.07333838939666748, "eval_rewards/rejected": -0.1522044539451599, "eval_runtime": 1688.0231, "eval_samples_per_second": 55.596, "eval_steps_per_second": 0.869, "step": 5000 }, { "epoch": 0.38048224795898994, "grad_norm": 1.5291579832948565, "learning_rate": 1.130241173281366e-06, "log_odds_chosen": 1.1808593273162842, "log_odds_ratio": -0.4474121034145355, "logits/chosen": -1.1941406726837158, "logits/rejected": -1.0417969226837158, "logps/chosen": -0.734375, "logps/rejected": -1.5730469226837158, "loss": 0.934, "nll_loss": 0.9164062738418579, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.07352294772863388, "rewards/margins": 0.0838775634765625, "rewards/rejected": -0.15729980170726776, "step": 5010 }, { "epoch": 0.3812416935636985, "grad_norm": 1.560589915919458, "learning_rate": 1.1291148738752732e-06, "log_odds_chosen": 1.2580077648162842, "log_odds_ratio": -0.40131837129592896, "logits/chosen": -1.1435546875, "logits/rejected": -1.034765601158142, "logps/chosen": -0.68798828125, "logps/rejected": -1.564062476158142, "loss": 0.9442, "nll_loss": 0.8638671636581421, "rewards/accuracies": 0.8125, "rewards/chosen": -0.06875000149011612, "rewards/margins": 0.087493896484375, "rewards/rejected": -0.15642090141773224, "step": 5020 }, { "epoch": 0.38200113916840706, "grad_norm": 1.8875962499468777, "learning_rate": 1.1279919348864981e-06, "log_odds_chosen": 1.5498046875, "log_odds_ratio": -0.3501220643520355, "logits/chosen": -1.1710937023162842, "logits/rejected": -0.9964843988418579, "logps/chosen": -0.6998046636581421, "logps/rejected": -1.781640648841858, "loss": 0.9483, "nll_loss": 0.906445324420929, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.07000732421875, "rewards/margins": 0.10820312798023224, "rewards/rejected": -0.17812499403953552, "step": 5030 }, { "epoch": 0.38276058477311564, "grad_norm": 1.4309179484540977, "learning_rate": 1.126872339638022e-06, "log_odds_chosen": 1.4729492664337158, "log_odds_ratio": -0.4010253846645355, "logits/chosen": -1.1671874523162842, "logits/rejected": -0.9750000238418579, "logps/chosen": -0.7154296636581421, "logps/rejected": -1.755859375, "loss": 0.9423, "nll_loss": 0.8667968511581421, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.07154540717601776, "rewards/margins": 0.1041259765625, "rewards/rejected": -0.17543944716453552, "step": 5040 }, { "epoch": 0.3835200303778242, "grad_norm": 2.4851754364859113, "learning_rate": 1.1257560715684668e-06, "log_odds_chosen": 1.385156273841858, "log_odds_ratio": -0.38691407442092896, "logits/chosen": -1.1013672351837158, "logits/rejected": -0.9814453125, "logps/chosen": -0.661914050579071, "logps/rejected": -1.617578148841858, "loss": 0.9383, "nll_loss": 0.8646484613418579, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.06624756008386612, "rewards/margins": 0.09547118842601776, "rewards/rejected": -0.16164550185203552, "step": 5050 }, { "epoch": 0.38427947598253276, "grad_norm": 1.5138291473715648, "learning_rate": 1.1246431142310665e-06, "log_odds_chosen": 1.57861328125, "log_odds_ratio": -0.3837890625, "logits/chosen": -1.116601586341858, "logits/rejected": -0.9111328125, "logps/chosen": -0.7225586175918579, "logps/rejected": -1.869531273841858, "loss": 0.9801, "nll_loss": 0.9515625238418579, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.07230224460363388, "rewards/margins": 0.11476440727710724, "rewards/rejected": -0.18698731064796448, "step": 5060 }, { "epoch": 0.3850389215872413, "grad_norm": 1.485637234135261, "learning_rate": 1.1235334512926484e-06, "log_odds_chosen": 1.4617187976837158, "log_odds_ratio": -0.4024414122104645, "logits/chosen": -1.1962890625, "logits/rejected": -0.940625011920929, "logps/chosen": -0.7435547113418579, "logps/rejected": -1.7937500476837158, "loss": 0.9548, "nll_loss": 0.907031238079071, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.07429198920726776, "rewards/margins": 0.10505370795726776, "rewards/rejected": -0.17939452826976776, "step": 5070 }, { "epoch": 0.3857983671919499, "grad_norm": 1.7384504030572465, "learning_rate": 1.1224270665326274e-06, "log_odds_chosen": 1.357275366783142, "log_odds_ratio": -0.4326171875, "logits/chosen": -1.2001953125, "logits/rejected": -1.0398437976837158, "logps/chosen": -0.721875011920929, "logps/rejected": -1.7244141101837158, "loss": 0.9528, "nll_loss": 0.9046875238418579, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.07224120944738388, "rewards/margins": 0.10030975192785263, "rewards/rejected": -0.17258301377296448, "step": 5080 }, { "epoch": 0.38655781279665846, "grad_norm": 1.8827019389810915, "learning_rate": 1.12132394384201e-06, "log_odds_chosen": 1.526831030845642, "log_odds_ratio": -0.3536621034145355, "logits/chosen": -1.239648461341858, "logits/rejected": -0.9925781488418579, "logps/chosen": -0.6944335699081421, "logps/rejected": -1.7839844226837158, "loss": 0.9291, "nll_loss": 0.8833984136581421, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -0.06939697265625, "rewards/margins": 0.10917969048023224, "rewards/rejected": -0.17854003608226776, "step": 5090 }, { "epoch": 0.387317258401367, "grad_norm": 1.4883044699285815, "learning_rate": 1.1202240672224076e-06, "log_odds_chosen": 1.5398437976837158, "log_odds_ratio": -0.3570800721645355, "logits/chosen": -1.1472656726837158, "logits/rejected": -0.9888671636581421, "logps/chosen": -0.6875976324081421, "logps/rejected": -1.764062523841858, "loss": 0.9475, "nll_loss": 0.8623046875, "rewards/accuracies": 0.84375, "rewards/chosen": -0.0687255859375, "rewards/margins": 0.10764160007238388, "rewards/rejected": -0.17636719346046448, "step": 5100 }, { "epoch": 0.3880767040060756, "grad_norm": 1.7516606501834784, "learning_rate": 1.1191274207850654e-06, "log_odds_chosen": 1.2722656726837158, "log_odds_ratio": -0.43671876192092896, "logits/chosen": -1.237890601158142, "logits/rejected": -1.061914086341858, "logps/chosen": -0.6966797113418579, "logps/rejected": -1.5763671398162842, "loss": 0.9393, "nll_loss": 0.8890625238418579, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.06967773288488388, "rewards/margins": 0.088043212890625, "rewards/rejected": -0.15778808295726776, "step": 5110 }, { "epoch": 0.3888361496107841, "grad_norm": 1.840141475569147, "learning_rate": 1.1180339887498948e-06, "log_odds_chosen": 1.2108886241912842, "log_odds_ratio": -0.4476562440395355, "logits/chosen": -1.1658203601837158, "logits/rejected": -0.984179675579071, "logps/chosen": -0.71435546875, "logps/rejected": -1.5496094226837158, "loss": 0.9582, "nll_loss": 0.9365234375, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.07139892876148224, "rewards/margins": 0.08355712890625, "rewards/rejected": -0.155029296875, "step": 5120 }, { "epoch": 0.3895955952154927, "grad_norm": 1.7002064239482768, "learning_rate": 1.1169437554445213e-06, "log_odds_chosen": 1.384924292564392, "log_odds_ratio": -0.4324707090854645, "logits/chosen": -1.180273413658142, "logits/rejected": -0.991406261920929, "logps/chosen": -0.7503906488418579, "logps/rejected": -1.763281226158142, "loss": 0.9647, "nll_loss": 0.9771484136581421, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.07503662258386612, "rewards/margins": 0.10137252509593964, "rewards/rejected": -0.1763916015625, "step": 5130 }, { "epoch": 0.3903550408202012, "grad_norm": 2.0661773098266565, "learning_rate": 1.1158567053033413e-06, "log_odds_chosen": 1.494140625, "log_odds_ratio": -0.35986328125, "logits/chosen": -1.203125, "logits/rejected": -0.9916015863418579, "logps/chosen": -0.677929699420929, "logps/rejected": -1.7019531726837158, "loss": 0.9348, "nll_loss": 0.8548828363418579, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.06766357272863388, "rewards/margins": 0.10247802734375, "rewards/rejected": -0.17026367783546448, "step": 5140 }, { "epoch": 0.3911144864249098, "grad_norm": 1.7536400307783535, "learning_rate": 1.1147728228665882e-06, "log_odds_chosen": 1.172888159751892, "log_odds_ratio": -0.4712890684604645, "logits/chosen": -1.1501953601837158, "logits/rejected": -1.0027344226837158, "logps/chosen": -0.7132812738418579, "logps/rejected": -1.5261719226837158, "loss": 0.9433, "nll_loss": 0.9310547113418579, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.07130126655101776, "rewards/margins": 0.08122710883617401, "rewards/rejected": -0.15261229872703552, "step": 5150 }, { "epoch": 0.3918739320296184, "grad_norm": 1.5530567568249345, "learning_rate": 1.1136920927794092e-06, "log_odds_chosen": 1.236328125, "log_odds_ratio": -0.4305664002895355, "logits/chosen": -1.1134765148162842, "logits/rejected": -0.992968738079071, "logps/chosen": -0.697460949420929, "logps/rejected": -1.5330078601837158, "loss": 0.9128, "nll_loss": 0.8453124761581421, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.06975097954273224, "rewards/margins": 0.08349609375, "rewards/rejected": -0.15314941108226776, "step": 5160 }, { "epoch": 0.39263337763432693, "grad_norm": 2.0044264403012533, "learning_rate": 1.1126144997909508e-06, "log_odds_chosen": 1.3826172351837158, "log_odds_ratio": -0.43867188692092896, "logits/chosen": -1.1931641101837158, "logits/rejected": -1.0242187976837158, "logps/chosen": -0.7300781011581421, "logps/rejected": -1.692773461341858, "loss": 0.9385, "nll_loss": 0.893750011920929, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.07296142727136612, "rewards/margins": 0.09624938666820526, "rewards/rejected": -0.16928711533546448, "step": 5170 }, { "epoch": 0.3933928232390355, "grad_norm": 1.87566416547275, "learning_rate": 1.1115400287534568e-06, "log_odds_chosen": 1.4027831554412842, "log_odds_ratio": -0.4148925840854645, "logits/chosen": -1.1710937023162842, "logits/rejected": -1.0226562023162842, "logps/chosen": -0.706347644329071, "logps/rejected": -1.6902344226837158, "loss": 0.9373, "nll_loss": 0.9341796636581421, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.07062987983226776, "rewards/margins": 0.09815368801355362, "rewards/rejected": -0.1688232421875, "step": 5180 }, { "epoch": 0.39415226884374405, "grad_norm": 1.7871394982494075, "learning_rate": 1.110468664621372e-06, "log_odds_chosen": 1.4841797351837158, "log_odds_ratio": -0.38129884004592896, "logits/chosen": -1.283593773841858, "logits/rejected": -1.063085913658142, "logps/chosen": -0.7044922113418579, "logps/rejected": -1.779296875, "loss": 0.9209, "nll_loss": 0.8935546875, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.0704345703125, "rewards/margins": 0.10742797702550888, "rewards/rejected": -0.17790527641773224, "step": 5190 }, { "epoch": 0.39491171444845263, "grad_norm": 1.3982750571641764, "learning_rate": 1.1094003924504583e-06, "log_odds_chosen": 1.0552246570587158, "log_odds_ratio": -0.5071777105331421, "logits/chosen": -1.228124976158142, "logits/rejected": -1.0246093273162842, "logps/chosen": -0.8226562738418579, "logps/rejected": -1.5763671398162842, "loss": 0.9393, "nll_loss": 0.9847656488418579, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.08222655951976776, "rewards/margins": 0.07537841796875, "rewards/rejected": -0.15751953423023224, "step": 5200 }, { "epoch": 0.3956711600531612, "grad_norm": 2.0627212335765797, "learning_rate": 1.1083351973969191e-06, "log_odds_chosen": 1.1569335460662842, "log_odds_ratio": -0.457763671875, "logits/chosen": -1.0945312976837158, "logits/rejected": -0.9361327886581421, "logps/chosen": -0.69091796875, "logps/rejected": -1.513281226158142, "loss": 0.9146, "nll_loss": 0.8505859375, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.06903076171875, "rewards/margins": 0.08240356296300888, "rewards/rejected": -0.15146484971046448, "step": 5210 }, { "epoch": 0.39643060565786975, "grad_norm": 1.444450309624864, "learning_rate": 1.107273064716533e-06, "log_odds_chosen": 0.9994872808456421, "log_odds_ratio": -0.5115722417831421, "logits/chosen": -1.2248046398162842, "logits/rejected": -1.0294921398162842, "logps/chosen": -0.792187511920929, "logps/rejected": -1.4953124523162842, "loss": 0.9282, "nll_loss": 0.852734386920929, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.07917480170726776, "rewards/margins": 0.07033081352710724, "rewards/rejected": -0.14948730170726776, "step": 5220 }, { "epoch": 0.39719005126257834, "grad_norm": 1.3865849800319912, "learning_rate": 1.1062139797637962e-06, "log_odds_chosen": 1.248437523841858, "log_odds_ratio": -0.44194334745407104, "logits/chosen": -1.07421875, "logits/rejected": -0.9458984136581421, "logps/chosen": -0.6810547113418579, "logps/rejected": -1.566796898841858, "loss": 0.9441, "nll_loss": 0.942578136920929, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.06804199516773224, "rewards/margins": 0.08867187798023224, "rewards/rejected": -0.15676268935203552, "step": 5230 }, { "epoch": 0.39794949686728687, "grad_norm": 1.6405100422449344, "learning_rate": 1.1051579279910751e-06, "log_odds_chosen": 1.363623023033142, "log_odds_ratio": -0.417236328125, "logits/chosen": -1.011132836341858, "logits/rejected": -0.9072265625, "logps/chosen": -0.7138671875, "logps/rejected": -1.6875, "loss": 0.9057, "nll_loss": 0.894726574420929, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.07139892876148224, "rewards/margins": 0.09736938774585724, "rewards/rejected": -0.16875000298023224, "step": 5240 }, { "epoch": 0.39870894247199545, "grad_norm": 1.466264549348693, "learning_rate": 1.1041048949477667e-06, "log_odds_chosen": 1.39599609375, "log_odds_ratio": -0.44926756620407104, "logits/chosen": -1.1472656726837158, "logits/rejected": -0.9935547113418579, "logps/chosen": -0.72265625, "logps/rejected": -1.693750023841858, "loss": 0.9349, "nll_loss": 0.877734363079071, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.072265625, "rewards/margins": 0.09720458835363388, "rewards/rejected": -0.16933593153953552, "step": 5250 }, { "epoch": 0.399468388076704, "grad_norm": 1.3982551211554097, "learning_rate": 1.1030548662794673e-06, "log_odds_chosen": 1.290624976158142, "log_odds_ratio": -0.42231446504592896, "logits/chosen": -1.1642577648162842, "logits/rejected": -0.9496093988418579, "logps/chosen": -0.716601550579071, "logps/rejected": -1.6453125476837158, "loss": 0.9227, "nll_loss": 0.8636718988418579, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.07167968899011612, "rewards/margins": 0.09300537407398224, "rewards/rejected": -0.16459961235523224, "step": 5260 }, { "epoch": 0.40022783368141257, "grad_norm": 1.8411713792847049, "learning_rate": 1.102007827727152e-06, "log_odds_chosen": 0.9326171875, "log_odds_ratio": -0.49072265625, "logits/chosen": -1.1396484375, "logits/rejected": -0.9779297113418579, "logps/chosen": -0.7240234613418579, "logps/rejected": -1.3351562023162842, "loss": 0.9425, "nll_loss": 0.913281261920929, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.07237549126148224, "rewards/margins": 0.06107483059167862, "rewards/rejected": -0.1334228515625, "step": 5270 }, { "epoch": 0.40098727928612116, "grad_norm": 3.5090253346697065, "learning_rate": 1.1009637651263607e-06, "log_odds_chosen": 1.37744140625, "log_odds_ratio": -0.40458983182907104, "logits/chosen": -1.2080078125, "logits/rejected": -1.0085937976837158, "logps/chosen": -0.712695300579071, "logps/rejected": -1.6697266101837158, "loss": 0.9678, "nll_loss": 0.91796875, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.07126464694738388, "rewards/margins": 0.095672607421875, "rewards/rejected": -0.16701659560203552, "step": 5280 }, { "epoch": 0.4017467248908297, "grad_norm": 1.4314601344936848, "learning_rate": 1.0999226644063927e-06, "log_odds_chosen": 1.638281226158142, "log_odds_ratio": -0.3639160096645355, "logits/chosen": -1.1769530773162842, "logits/rejected": -1.021484375, "logps/chosen": -0.702343761920929, "logps/rejected": -1.8761718273162842, "loss": 0.9434, "nll_loss": 0.867382824420929, "rewards/accuracies": 0.84375, "rewards/chosen": -0.07023926079273224, "rewards/margins": 0.11727295070886612, "rewards/rejected": -0.18757323920726776, "step": 5290 }, { "epoch": 0.4025061704955383, "grad_norm": 1.7190151041985229, "learning_rate": 1.0988845115895123e-06, "log_odds_chosen": 1.1351807117462158, "log_odds_ratio": -0.47563475370407104, "logits/chosen": -1.152734398841858, "logits/rejected": -0.9720703363418579, "logps/chosen": -0.7865234613418579, "logps/rejected": -1.58203125, "loss": 0.9537, "nll_loss": 0.9212890863418579, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.07860107719898224, "rewards/margins": 0.079437255859375, "rewards/rejected": -0.15815429389476776, "step": 5300 }, { "epoch": 0.4032656161002468, "grad_norm": 1.4685863649479254, "learning_rate": 1.0978492927901574e-06, "log_odds_chosen": 1.265527367591858, "log_odds_ratio": -0.4730468690395355, "logits/chosen": -1.0509765148162842, "logits/rejected": -0.941210925579071, "logps/chosen": -0.6880859136581421, "logps/rejected": -1.594335913658142, "loss": 0.9261, "nll_loss": 0.908984363079071, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.06878662109375, "rewards/margins": 0.09077148139476776, "rewards/rejected": -0.15959472954273224, "step": 5310 }, { "epoch": 0.4040250617049554, "grad_norm": 1.7062229643964917, "learning_rate": 1.0968169942141634e-06, "log_odds_chosen": 1.437597632408142, "log_odds_ratio": -0.4586425721645355, "logits/chosen": -1.128320336341858, "logits/rejected": -0.989453136920929, "logps/chosen": -0.7474609613418579, "logps/rejected": -1.8074219226837158, "loss": 0.9469, "nll_loss": 0.998046875, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.07470703125, "rewards/margins": 0.10600586235523224, "rewards/rejected": -0.18068847060203552, "step": 5320 }, { "epoch": 0.4047845073096639, "grad_norm": 1.566188954790269, "learning_rate": 1.0957876021579874e-06, "log_odds_chosen": 1.5320312976837158, "log_odds_ratio": -0.416259765625, "logits/chosen": -1.165624976158142, "logits/rejected": -1.030859351158142, "logps/chosen": -0.6756836175918579, "logps/rejected": -1.7578125, "loss": 0.9517, "nll_loss": 0.78564453125, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.06756591796875, "rewards/margins": 0.10819091647863388, "rewards/rejected": -0.17561034858226776, "step": 5330 }, { "epoch": 0.4055439529143725, "grad_norm": 1.6293163622923572, "learning_rate": 1.0947611030079466e-06, "log_odds_chosen": 1.3927733898162842, "log_odds_ratio": -0.4186035096645355, "logits/chosen": -1.189843773841858, "logits/rejected": -0.9574218988418579, "logps/chosen": -0.758984386920929, "logps/rejected": -1.7820312976837158, "loss": 0.9476, "nll_loss": 0.890429675579071, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.07584228366613388, "rewards/margins": 0.10230712592601776, "rewards/rejected": -0.1781005859375, "step": 5340 }, { "epoch": 0.4063033985190811, "grad_norm": 1.5274440174764952, "learning_rate": 1.0937374832394612e-06, "log_odds_chosen": 1.2087891101837158, "log_odds_ratio": -0.43657225370407104, "logits/chosen": -1.169921875, "logits/rejected": -1.0048828125, "logps/chosen": -0.7099609375, "logps/rejected": -1.545312523841858, "loss": 0.9409, "nll_loss": 0.908984363079071, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.07095947116613388, "rewards/margins": 0.08346863090991974, "rewards/rejected": -0.15451660752296448, "step": 5350 }, { "epoch": 0.4070628441237896, "grad_norm": 1.6941853458805771, "learning_rate": 1.092716729416306e-06, "log_odds_chosen": 1.05126953125, "log_odds_ratio": -0.4723144471645355, "logits/chosen": -1.1611328125, "logits/rejected": -1.0341796875, "logps/chosen": -0.7579101324081421, "logps/rejected": -1.4988281726837158, "loss": 0.8898, "nll_loss": 0.87890625, "rewards/accuracies": 0.75, "rewards/chosen": -0.07578124850988388, "rewards/margins": 0.07416991889476776, "rewards/rejected": -0.14992675185203552, "step": 5360 }, { "epoch": 0.4078222897284982, "grad_norm": 1.6810072674065315, "learning_rate": 1.0916988281898703e-06, "log_odds_chosen": 1.2433593273162842, "log_odds_ratio": -0.4471679627895355, "logits/chosen": -1.2380859851837158, "logits/rejected": -1.0138671398162842, "logps/chosen": -0.704296886920929, "logps/rejected": -1.566992163658142, "loss": 0.9284, "nll_loss": 0.8935546875, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.07039795070886612, "rewards/margins": 0.08627166599035263, "rewards/rejected": -0.15666504204273224, "step": 5370 }, { "epoch": 0.40858173533320674, "grad_norm": 1.8574402445891756, "learning_rate": 1.0906837662984237e-06, "log_odds_chosen": 1.023535132408142, "log_odds_ratio": -0.48198240995407104, "logits/chosen": -1.171289086341858, "logits/rejected": -1.006445288658142, "logps/chosen": -0.7088867425918579, "logps/rejected": -1.3953125476837158, "loss": 0.9257, "nll_loss": 0.8958984613418579, "rewards/accuracies": 0.75, "rewards/chosen": -0.07084961235523224, "rewards/margins": 0.06867065280675888, "rewards/rejected": -0.13945312798023224, "step": 5380 }, { "epoch": 0.4093411809379153, "grad_norm": 1.3795196142572261, "learning_rate": 1.089671530566391e-06, "log_odds_chosen": 1.2458984851837158, "log_odds_ratio": -0.4473632872104645, "logits/chosen": -1.1716797351837158, "logits/rejected": -0.992968738079071, "logps/chosen": -0.7279297113418579, "logps/rejected": -1.6033203601837158, "loss": 0.9266, "nll_loss": 0.88427734375, "rewards/accuracies": 0.75, "rewards/chosen": -0.07285156100988388, "rewards/margins": 0.08739318698644638, "rewards/rejected": -0.16020508110523224, "step": 5390 }, { "epoch": 0.41010062654262386, "grad_norm": 1.7205772061692497, "learning_rate": 1.0886621079036346e-06, "log_odds_chosen": 1.399999976158142, "log_odds_ratio": -0.433349609375, "logits/chosen": -1.2673828601837158, "logits/rejected": -1.0224609375, "logps/chosen": -0.7490234375, "logps/rejected": -1.7527344226837158, "loss": 0.9431, "nll_loss": 0.951953113079071, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.07486572116613388, "rewards/margins": 0.10039062798023224, "rewards/rejected": -0.17524413764476776, "step": 5400 }, { "epoch": 0.41086007214733244, "grad_norm": 1.5227897237652233, "learning_rate": 1.0876554853047417e-06, "log_odds_chosen": 1.2351562976837158, "log_odds_ratio": -0.41254884004592896, "logits/chosen": -1.0681641101837158, "logits/rejected": -0.9375, "logps/chosen": -0.6728515625, "logps/rejected": -1.5380859375, "loss": 0.937, "nll_loss": 0.83203125, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.06727294623851776, "rewards/margins": 0.08648681640625, "rewards/rejected": -0.15383300185203552, "step": 5410 }, { "epoch": 0.41161951775204103, "grad_norm": 2.0576317590521302, "learning_rate": 1.0866516498483225e-06, "log_odds_chosen": 1.275976538658142, "log_odds_ratio": -0.41303712129592896, "logits/chosen": -1.243554711341858, "logits/rejected": -1.0242187976837158, "logps/chosen": -0.711132824420929, "logps/rejected": -1.620703101158142, "loss": 0.9408, "nll_loss": 0.9078124761581421, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.07110595703125, "rewards/margins": 0.0910797119140625, "rewards/rejected": -0.162109375, "step": 5420 }, { "epoch": 0.41237896335674956, "grad_norm": 1.5337530658181449, "learning_rate": 1.0856505886963116e-06, "log_odds_chosen": 1.398535132408142, "log_odds_ratio": -0.41552734375, "logits/chosen": -1.1355469226837158, "logits/rejected": -0.9457031488418579, "logps/chosen": -0.684863269329071, "logps/rejected": -1.6785156726837158, "loss": 0.9277, "nll_loss": 0.8758789300918579, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.06843261420726776, "rewards/margins": 0.09946899116039276, "rewards/rejected": -0.1678466796875, "step": 5430 }, { "epoch": 0.41313840896145815, "grad_norm": 1.7208933302917169, "learning_rate": 1.0846522890932808e-06, "log_odds_chosen": 1.344140648841858, "log_odds_ratio": -0.395751953125, "logits/chosen": -1.1162109375, "logits/rejected": -0.9498046636581421, "logps/chosen": -0.7367187738418579, "logps/rejected": -1.6896483898162842, "loss": 0.9127, "nll_loss": 0.863085925579071, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -0.0736083984375, "rewards/margins": 0.09532471001148224, "rewards/rejected": -0.16887207329273224, "step": 5440 }, { "epoch": 0.4138978545661667, "grad_norm": 1.5265850227835105, "learning_rate": 1.0836567383657542e-06, "log_odds_chosen": 1.474023461341858, "log_odds_ratio": -0.4117187559604645, "logits/chosen": -1.3152344226837158, "logits/rejected": -1.0978515148162842, "logps/chosen": -0.744433581829071, "logps/rejected": -1.832421898841858, "loss": 0.9278, "nll_loss": 0.936328113079071, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.07445068657398224, "rewards/margins": 0.1087646484375, "rewards/rejected": -0.1832275390625, "step": 5450 }, { "epoch": 0.41465730017087526, "grad_norm": 1.5480663400420236, "learning_rate": 1.0826639239215334e-06, "log_odds_chosen": 1.486914038658142, "log_odds_ratio": -0.4352050721645355, "logits/chosen": -1.117578148841858, "logits/rejected": -1.0, "logps/chosen": -0.71484375, "logps/rejected": -1.810546875, "loss": 0.924, "nll_loss": 0.927929699420929, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.07156982272863388, "rewards/margins": 0.10943603515625, "rewards/rejected": -0.18098144233226776, "step": 5460 }, { "epoch": 0.41541674577558385, "grad_norm": 1.8016219889754974, "learning_rate": 1.0816738332490292e-06, "log_odds_chosen": 1.3328125476837158, "log_odds_ratio": -0.44306641817092896, "logits/chosen": -1.241796851158142, "logits/rejected": -1.001367211341858, "logps/chosen": -0.7601562738418579, "logps/rejected": -1.750390648841858, "loss": 0.9392, "nll_loss": 0.865234375, "rewards/accuracies": 0.78125, "rewards/chosen": -0.07602538913488388, "rewards/margins": 0.09902343899011612, "rewards/rejected": -0.17512206733226776, "step": 5470 }, { "epoch": 0.4161761913802924, "grad_norm": 1.4008358407888266, "learning_rate": 1.0806864539165982e-06, "log_odds_chosen": 1.4025390148162842, "log_odds_ratio": -0.4404296875, "logits/chosen": -1.035546898841858, "logits/rejected": -0.8968750238418579, "logps/chosen": -0.7451171875, "logps/rejected": -1.7980468273162842, "loss": 0.9404, "nll_loss": 0.942187488079071, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.07448730617761612, "rewards/margins": 0.10543213039636612, "rewards/rejected": -0.17988280951976776, "step": 5480 }, { "epoch": 0.41693563698500097, "grad_norm": 1.6057708308658367, "learning_rate": 1.0797017735718878e-06, "log_odds_chosen": 1.442773461341858, "log_odds_ratio": -0.3764404356479645, "logits/chosen": -1.2062499523162842, "logits/rejected": -1.0457031726837158, "logps/chosen": -0.6548827886581421, "logps/rejected": -1.657617211341858, "loss": 0.9325, "nll_loss": 0.834179699420929, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.06545410305261612, "rewards/margins": 0.10020141303539276, "rewards/rejected": -0.16567382216453552, "step": 5490 }, { "epoch": 0.4176950825897095, "grad_norm": 2.148170529620603, "learning_rate": 1.0787197799411874e-06, "log_odds_chosen": 1.383691430091858, "log_odds_ratio": -0.4491210877895355, "logits/chosen": -1.153906226158142, "logits/rejected": -1.031835913658142, "logps/chosen": -0.698535144329071, "logps/rejected": -1.682031273841858, "loss": 0.9187, "nll_loss": 0.8755859136581421, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.06979980319738388, "rewards/margins": 0.09835205227136612, "rewards/rejected": -0.16816405951976776, "step": 5500 }, { "epoch": 0.4184545281944181, "grad_norm": 1.4485095099398015, "learning_rate": 1.0777404608287846e-06, "log_odds_chosen": 1.3551757335662842, "log_odds_ratio": -0.41362303495407104, "logits/chosen": -1.191015601158142, "logits/rejected": -0.997851550579071, "logps/chosen": -0.7217773199081421, "logps/rejected": -1.701562523841858, "loss": 0.9429, "nll_loss": 0.8912109136581421, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.07215575873851776, "rewards/margins": 0.09809570014476776, "rewards/rejected": -0.17026367783546448, "step": 5510 }, { "epoch": 0.4192139737991266, "grad_norm": 1.3849530089893927, "learning_rate": 1.0767638041163309e-06, "log_odds_chosen": 1.220947265625, "log_odds_ratio": -0.4481445252895355, "logits/chosen": -1.2068359851837158, "logits/rejected": -1.0525391101837158, "logps/chosen": -0.671582043170929, "logps/rejected": -1.538671851158142, "loss": 0.9355, "nll_loss": 0.8836914300918579, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.06715087592601776, "rewards/margins": 0.08673705905675888, "rewards/rejected": -0.15377196669578552, "step": 5520 }, { "epoch": 0.4199734194038352, "grad_norm": 1.4389016410099646, "learning_rate": 1.0757897977622107e-06, "log_odds_chosen": 1.224609375, "log_odds_ratio": -0.435302734375, "logits/chosen": -1.154687523841858, "logits/rejected": -0.9921875, "logps/chosen": -0.7701171636581421, "logps/rejected": -1.605078101158142, "loss": 0.9347, "nll_loss": 0.8331054449081421, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.07698974758386612, "rewards/margins": 0.08338622748851776, "rewards/rejected": -0.16042479872703552, "step": 5530 }, { "epoch": 0.4207328650085438, "grad_norm": 1.6419463719280891, "learning_rate": 1.074818429800918e-06, "log_odds_chosen": 1.3687012195587158, "log_odds_ratio": -0.4013671875, "logits/chosen": -1.2462890148162842, "logits/rejected": -1.052148461341858, "logps/chosen": -0.71484375, "logps/rejected": -1.6492187976837158, "loss": 0.9363, "nll_loss": 0.897265613079071, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.07154540717601776, "rewards/margins": 0.09346923977136612, "rewards/rejected": -0.1649169921875, "step": 5540 }, { "epoch": 0.4214923106132523, "grad_norm": 1.6599204949716686, "learning_rate": 1.073849688342439e-06, "log_odds_chosen": 1.19873046875, "log_odds_ratio": -0.44697266817092896, "logits/chosen": -1.2365233898162842, "logits/rejected": -1.0439453125, "logps/chosen": -0.7403320074081421, "logps/rejected": -1.580468773841858, "loss": 0.9074, "nll_loss": 0.9052734375, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.07401122897863388, "rewards/margins": 0.083984375, "rewards/rejected": -0.15805664658546448, "step": 5550 }, { "epoch": 0.4222517562179609, "grad_norm": 1.6557271253162857, "learning_rate": 1.07288356157164e-06, "log_odds_chosen": 1.423730492591858, "log_odds_ratio": -0.4172607362270355, "logits/chosen": -1.2052733898162842, "logits/rejected": -1.0623047351837158, "logps/chosen": -0.7109375, "logps/rejected": -1.794921875, "loss": 0.9255, "nll_loss": 0.927539050579071, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.07105712592601776, "rewards/margins": 0.10831908881664276, "rewards/rejected": -0.179443359375, "step": 5560 }, { "epoch": 0.42301120182266944, "grad_norm": 1.86639823425965, "learning_rate": 1.0719200377476648e-06, "log_odds_chosen": 1.4299805164337158, "log_odds_ratio": -0.45673829317092896, "logits/chosen": -1.088281273841858, "logits/rejected": -0.933398425579071, "logps/chosen": -0.696484386920929, "logps/rejected": -1.725000023841858, "loss": 0.9275, "nll_loss": 0.880078136920929, "rewards/accuracies": 0.75, "rewards/chosen": -0.06966552883386612, "rewards/margins": 0.10292510688304901, "rewards/rejected": -0.17258301377296448, "step": 5570 }, { "epoch": 0.423770647427378, "grad_norm": 1.854987851104599, "learning_rate": 1.0709591052033317e-06, "log_odds_chosen": 1.31884765625, "log_odds_ratio": -0.48002928495407104, "logits/chosen": -1.0847656726837158, "logits/rejected": -0.93359375, "logps/chosen": -0.7513672113418579, "logps/rejected": -1.729882836341858, "loss": 0.9461, "nll_loss": 0.950390636920929, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.07513427734375, "rewards/margins": 0.09794922173023224, "rewards/rejected": -0.17314453423023224, "step": 5580 }, { "epoch": 0.42453009303208655, "grad_norm": 1.5431464540209217, "learning_rate": 1.0700007523445435e-06, "log_odds_chosen": 1.4816405773162842, "log_odds_ratio": -0.41962891817092896, "logits/chosen": -1.2009766101837158, "logits/rejected": -1.030664086341858, "logps/chosen": -0.7236328125, "logps/rejected": -1.826562523841858, "loss": 0.9254, "nll_loss": 0.8617187738418579, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.07236327975988388, "rewards/margins": 0.11017455905675888, "rewards/rejected": -0.18242187798023224, "step": 5590 }, { "epoch": 0.42528953863679514, "grad_norm": 1.4503673025334631, "learning_rate": 1.0690449676496976e-06, "log_odds_chosen": 1.30517578125, "log_odds_ratio": -0.42631834745407104, "logits/chosen": -1.171484351158142, "logits/rejected": -1.028906226158142, "logps/chosen": -0.639453113079071, "logps/rejected": -1.5625, "loss": 0.9163, "nll_loss": 0.883593738079071, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.06392822414636612, "rewards/margins": 0.09235839545726776, "rewards/rejected": -0.15627440810203552, "step": 5600 }, { "epoch": 0.4260489842415037, "grad_norm": 1.4875336167374713, "learning_rate": 1.0680917396691054e-06, "log_odds_chosen": 1.473046898841858, "log_odds_ratio": -0.3726562559604645, "logits/chosen": -1.1183593273162842, "logits/rejected": -0.9302734136581421, "logps/chosen": -0.6797851324081421, "logps/rejected": -1.697656273841858, "loss": 0.9077, "nll_loss": 0.8246093988418579, "rewards/accuracies": 0.8125, "rewards/chosen": -0.0679931640625, "rewards/margins": 0.10166015475988388, "rewards/rejected": -0.16962890326976776, "step": 5610 }, { "epoch": 0.42680842984621226, "grad_norm": 1.7001349693207548, "learning_rate": 1.0671410570244164e-06, "log_odds_chosen": 1.4954102039337158, "log_odds_ratio": -0.3932128846645355, "logits/chosen": -1.2263672351837158, "logits/rejected": -1.001562476158142, "logps/chosen": -0.635937511920929, "logps/rejected": -1.675390601158142, "loss": 0.9094, "nll_loss": 0.8080078363418579, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.06363525241613388, "rewards/margins": 0.10396728664636612, "rewards/rejected": -0.16762694716453552, "step": 5620 }, { "epoch": 0.42756787545092084, "grad_norm": 1.5029985175944296, "learning_rate": 1.0661929084080466e-06, "log_odds_chosen": 1.3957030773162842, "log_odds_ratio": -0.42509764432907104, "logits/chosen": -1.195898413658142, "logits/rejected": -0.9908202886581421, "logps/chosen": -0.720703125, "logps/rejected": -1.7023437023162842, "loss": 0.9152, "nll_loss": 0.9251953363418579, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.07198486477136612, "rewards/margins": 0.09825439751148224, "rewards/rejected": -0.17019042372703552, "step": 5630 }, { "epoch": 0.4283273210556294, "grad_norm": 1.421811068082732, "learning_rate": 1.0652472825826149e-06, "log_odds_chosen": 1.45703125, "log_odds_ratio": -0.40507811307907104, "logits/chosen": -1.1326172351837158, "logits/rejected": -0.9527343511581421, "logps/chosen": -0.7408202886581421, "logps/rejected": -1.808203101158142, "loss": 0.9218, "nll_loss": 0.929492175579071, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.07403564453125, "rewards/margins": 0.10683593899011612, "rewards/rejected": -0.18081054091453552, "step": 5640 }, { "epoch": 0.42908676666033796, "grad_norm": 1.5228126575696839, "learning_rate": 1.0643041683803828e-06, "log_odds_chosen": 1.305566430091858, "log_odds_ratio": -0.46687012910842896, "logits/chosen": -1.134374976158142, "logits/rejected": -0.9769531488418579, "logps/chosen": -0.768359363079071, "logps/rejected": -1.748046875, "loss": 0.9243, "nll_loss": 0.9432617425918579, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.07685546576976776, "rewards/margins": 0.09790954738855362, "rewards/rejected": -0.17475585639476776, "step": 5650 }, { "epoch": 0.4298462122650465, "grad_norm": 2.2072882884803753, "learning_rate": 1.063363554702701e-06, "log_odds_chosen": 1.3776366710662842, "log_odds_ratio": -0.43388670682907104, "logits/chosen": -1.1921875476837158, "logits/rejected": -1.0070312023162842, "logps/chosen": -0.7064453363418579, "logps/rejected": -1.705468773841858, "loss": 0.9282, "nll_loss": 0.8958984613418579, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.07062987983226776, "rewards/margins": 0.09985657036304474, "rewards/rejected": -0.17048339545726776, "step": 5660 }, { "epoch": 0.4306056578697551, "grad_norm": 1.7924191761192407, "learning_rate": 1.0624254305194609e-06, "log_odds_chosen": 1.2024414539337158, "log_odds_ratio": -0.43842774629592896, "logits/chosen": -1.1560547351837158, "logits/rejected": -0.9921875, "logps/chosen": -0.75, "logps/rejected": -1.615625023841858, "loss": 0.9209, "nll_loss": 0.859375, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.07498779147863388, "rewards/margins": 0.08665160834789276, "rewards/rejected": -0.1617431640625, "step": 5670 }, { "epoch": 0.43136510347446366, "grad_norm": 1.4895573917094431, "learning_rate": 1.0614897848685505e-06, "log_odds_chosen": 1.4484374523162842, "log_odds_ratio": -0.4004882872104645, "logits/chosen": -1.145898461341858, "logits/rejected": -1.0564453601837158, "logps/chosen": -0.6607421636581421, "logps/rejected": -1.6476562023162842, "loss": 0.9357, "nll_loss": 0.87890625, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.06610107421875, "rewards/margins": 0.09855957329273224, "rewards/rejected": -0.16474609076976776, "step": 5680 }, { "epoch": 0.4321245490791722, "grad_norm": 1.6173230574930584, "learning_rate": 1.0605566068553173e-06, "log_odds_chosen": 1.45458984375, "log_odds_ratio": -0.40864259004592896, "logits/chosen": -1.2117187976837158, "logits/rejected": -1.027929663658142, "logps/chosen": -0.71484375, "logps/rejected": -1.768945336341858, "loss": 0.9202, "nll_loss": 0.870898425579071, "rewards/accuracies": 0.78125, "rewards/chosen": -0.07148437201976776, "rewards/margins": 0.10549011081457138, "rewards/rejected": -0.177001953125, "step": 5690 }, { "epoch": 0.4328839946838808, "grad_norm": 1.5765306223228364, "learning_rate": 1.0596258856520351e-06, "log_odds_chosen": 1.446679711341858, "log_odds_ratio": -0.412841796875, "logits/chosen": -1.138085961341858, "logits/rejected": -0.9658203125, "logps/chosen": -0.683300793170929, "logps/rejected": -1.7197265625, "loss": 0.9246, "nll_loss": 0.8466796875, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06829833984375, "rewards/margins": 0.10364989936351776, "rewards/rejected": -0.17182616889476776, "step": 5700 }, { "epoch": 0.4336434402885893, "grad_norm": 1.4421153691041861, "learning_rate": 1.0586976104973764e-06, "log_odds_chosen": 1.262182593345642, "log_odds_ratio": -0.4761718809604645, "logits/chosen": -1.2345702648162842, "logits/rejected": -1.056249976158142, "logps/chosen": -0.732226550579071, "logps/rejected": -1.619726538658142, "loss": 0.9236, "nll_loss": 0.9029296636581421, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.07322998344898224, "rewards/margins": 0.08872680366039276, "rewards/rejected": -0.16201171278953552, "step": 5710 }, { "epoch": 0.4344028858932979, "grad_norm": 1.9141611683744637, "learning_rate": 1.05777177069589e-06, "log_odds_chosen": 1.521093726158142, "log_odds_ratio": -0.3741210997104645, "logits/chosen": -1.203515648841858, "logits/rejected": -1.0234375, "logps/chosen": -0.6483398675918579, "logps/rejected": -1.7101562023162842, "loss": 0.9168, "nll_loss": 0.8285156488418579, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -0.06483153998851776, "rewards/margins": 0.10618896782398224, "rewards/rejected": -0.17106933891773224, "step": 5720 }, { "epoch": 0.4351623314980065, "grad_norm": 1.598423657087501, "learning_rate": 1.0568483556174834e-06, "log_odds_chosen": 1.5594482421875, "log_odds_ratio": -0.35517579317092896, "logits/chosen": -1.2257812023162842, "logits/rejected": -0.96240234375, "logps/chosen": -0.703125, "logps/rejected": -1.8361327648162842, "loss": 0.922, "nll_loss": 0.984179675579071, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -0.07041015475988388, "rewards/margins": 0.11327819526195526, "rewards/rejected": -0.18364258110523224, "step": 5730 }, { "epoch": 0.435921777102715, "grad_norm": 1.7019677264452258, "learning_rate": 1.055927354696909e-06, "log_odds_chosen": 1.6296875476837158, "log_odds_ratio": -0.3736816346645355, "logits/chosen": -1.1828124523162842, "logits/rejected": -0.9931640625, "logps/chosen": -0.6880859136581421, "logps/rejected": -1.911718726158142, "loss": 0.9452, "nll_loss": 0.9248046875, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.06876220554113388, "rewards/margins": 0.12225341796875, "rewards/rejected": -0.19108887016773224, "step": 5740 }, { "epoch": 0.4366812227074236, "grad_norm": 1.9156149530671471, "learning_rate": 1.0550087574332592e-06, "log_odds_chosen": 1.327734351158142, "log_odds_ratio": -0.3839355409145355, "logits/chosen": -1.1318359375, "logits/rejected": -0.947070300579071, "logps/chosen": -0.693164050579071, "logps/rejected": -1.570898413658142, "loss": 0.9169, "nll_loss": 0.834765613079071, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -0.06925048679113388, "rewards/margins": 0.08771972358226776, "rewards/rejected": -0.156982421875, "step": 5750 }, { "epoch": 0.43744066831213213, "grad_norm": 2.1233394082731087, "learning_rate": 1.0540925533894598e-06, "log_odds_chosen": 1.190527319908142, "log_odds_ratio": -0.421142578125, "logits/chosen": -1.1748046875, "logits/rejected": -1.0197265148162842, "logps/chosen": -0.653027355670929, "logps/rejected": -1.4494140148162842, "loss": 0.9448, "nll_loss": 0.8692382574081421, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.06529541313648224, "rewards/margins": 0.07956542819738388, "rewards/rejected": -0.14492186903953552, "step": 5760 }, { "epoch": 0.4382001139168407, "grad_norm": 1.5792744160534835, "learning_rate": 1.053178732191775e-06, "log_odds_chosen": 1.4021728038787842, "log_odds_ratio": -0.38251954317092896, "logits/chosen": -1.293554663658142, "logits/rejected": -1.03125, "logps/chosen": -0.6670898199081421, "logps/rejected": -1.656640648841858, "loss": 0.9116, "nll_loss": 0.7939453125, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.06672362983226776, "rewards/margins": 0.09897766262292862, "rewards/rejected": -0.1656494140625, "step": 5770 }, { "epoch": 0.43895955952154925, "grad_norm": 1.6278886331805098, "learning_rate": 1.0522672835293127e-06, "log_odds_chosen": 1.1677734851837158, "log_odds_ratio": -0.4286132752895355, "logits/chosen": -1.218359351158142, "logits/rejected": -1.0466797351837158, "logps/chosen": -0.7027343511581421, "logps/rejected": -1.5304687023162842, "loss": 0.94, "nll_loss": 0.866406261920929, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.07028808444738388, "rewards/margins": 0.082672119140625, "rewards/rejected": -0.15297850966453552, "step": 5780 }, { "epoch": 0.43971900512625783, "grad_norm": 1.5964556130554322, "learning_rate": 1.0513581971535365e-06, "log_odds_chosen": 1.251562476158142, "log_odds_ratio": -0.43994140625, "logits/chosen": -1.180273413658142, "logits/rejected": -0.988476574420929, "logps/chosen": -0.779101550579071, "logps/rejected": -1.6925780773162842, "loss": 0.9365, "nll_loss": 0.930859386920929, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.07781982421875, "rewards/margins": 0.09141846001148224, "rewards/rejected": -0.16923828423023224, "step": 5790 }, { "epoch": 0.4404784507309664, "grad_norm": 1.8380361502104237, "learning_rate": 1.0504514628777803e-06, "log_odds_chosen": 1.508154273033142, "log_odds_ratio": -0.389892578125, "logits/chosen": -1.185937523841858, "logits/rejected": -1.0294921398162842, "logps/chosen": -0.7383788824081421, "logps/rejected": -1.8224608898162842, "loss": 0.9069, "nll_loss": 0.880859375, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.07387695461511612, "rewards/margins": 0.10839996486902237, "rewards/rejected": -0.18215331435203552, "step": 5800 }, { "epoch": 0.44123789633567495, "grad_norm": 1.7552640142033205, "learning_rate": 1.0495470705767713e-06, "log_odds_chosen": 1.4384765625, "log_odds_ratio": -0.400634765625, "logits/chosen": -1.105078101158142, "logits/rejected": -0.974414050579071, "logps/chosen": -0.67041015625, "logps/rejected": -1.6701171398162842, "loss": 0.9139, "nll_loss": 0.8236328363418579, "rewards/accuracies": 0.78125, "rewards/chosen": -0.06700439751148224, "rewards/margins": 0.099853515625, "rewards/rejected": -0.16689452528953552, "step": 5810 }, { "epoch": 0.44199734194038354, "grad_norm": 1.9722677639146122, "learning_rate": 1.0486450101861527e-06, "log_odds_chosen": 1.4155762195587158, "log_odds_ratio": -0.4532226622104645, "logits/chosen": -1.112695336341858, "logits/rejected": -0.9189453125, "logps/chosen": -0.7724609375, "logps/rejected": -1.805078148841858, "loss": 0.9128, "nll_loss": 0.865039050579071, "rewards/accuracies": 0.78125, "rewards/chosen": -0.07720947265625, "rewards/margins": 0.10319671779870987, "rewards/rejected": -0.18044432997703552, "step": 5820 }, { "epoch": 0.44275678754509207, "grad_norm": 1.5624227028552717, "learning_rate": 1.0477452717020143e-06, "log_odds_chosen": 1.5167968273162842, "log_odds_ratio": -0.37202149629592896, "logits/chosen": -1.15625, "logits/rejected": -0.966796875, "logps/chosen": -0.725390613079071, "logps/rejected": -1.8351562023162842, "loss": 0.9092, "nll_loss": 0.923046886920929, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.07246093451976776, "rewards/margins": 0.11101074516773224, "rewards/rejected": -0.18339844048023224, "step": 5830 }, { "epoch": 0.44351623314980065, "grad_norm": 2.0253513180385463, "learning_rate": 1.0468478451804272e-06, "log_odds_chosen": 1.4273192882537842, "log_odds_ratio": -0.4220214784145355, "logits/chosen": -1.1611328125, "logits/rejected": -1.0205078125, "logps/chosen": -0.6998046636581421, "logps/rejected": -1.705078125, "loss": 0.9091, "nll_loss": 0.859570324420929, "rewards/accuracies": 0.78125, "rewards/chosen": -0.06997070461511612, "rewards/margins": 0.10060729831457138, "rewards/rejected": -0.17058105766773224, "step": 5840 }, { "epoch": 0.4442756787545092, "grad_norm": 1.45036823820643, "learning_rate": 1.0459527207369814e-06, "log_odds_chosen": 1.42822265625, "log_odds_ratio": -0.4052734375, "logits/chosen": -1.241601586341858, "logits/rejected": -0.988476574420929, "logps/chosen": -0.6885741949081421, "logps/rejected": -1.703710913658142, "loss": 0.9002, "nll_loss": 0.911914050579071, "rewards/accuracies": 0.78125, "rewards/chosen": -0.06888427585363388, "rewards/margins": 0.10152740776538849, "rewards/rejected": -0.17045898735523224, "step": 5850 }, { "epoch": 0.44503512435921777, "grad_norm": 1.5576874999799164, "learning_rate": 1.0450598885463281e-06, "log_odds_chosen": 1.622656226158142, "log_odds_ratio": -0.3409179747104645, "logits/chosen": -1.240234375, "logits/rejected": -1.0009765625, "logps/chosen": -0.7105468511581421, "logps/rejected": -1.8664062023162842, "loss": 0.9323, "nll_loss": 0.904492199420929, "rewards/accuracies": 0.8687499761581421, "rewards/chosen": -0.07102050632238388, "rewards/margins": 0.11552734673023224, "rewards/rejected": -0.18662109971046448, "step": 5860 }, { "epoch": 0.44579456996392636, "grad_norm": 1.7735189800499571, "learning_rate": 1.0441693388417282e-06, "log_odds_chosen": 1.548925757408142, "log_odds_ratio": -0.430908203125, "logits/chosen": -1.168554663658142, "logits/rejected": -1.0167968273162842, "logps/chosen": -0.7152343988418579, "logps/rejected": -1.879296898841858, "loss": 0.9373, "nll_loss": 0.855664074420929, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.07156982272863388, "rewards/margins": 0.11650695651769638, "rewards/rejected": -0.18815918266773224, "step": 5870 }, { "epoch": 0.4465540155686349, "grad_norm": 1.524313612587998, "learning_rate": 1.0432810619146023e-06, "log_odds_chosen": 1.399316430091858, "log_odds_ratio": -0.4214843809604645, "logits/chosen": -1.1257812976837158, "logits/rejected": -0.9593750238418579, "logps/chosen": -0.747265636920929, "logps/rejected": -1.787500023841858, "loss": 0.9293, "nll_loss": 0.9423828125, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.07469482719898224, "rewards/margins": 0.10396728664636612, "rewards/rejected": -0.17878417670726776, "step": 5880 }, { "epoch": 0.4473134611733435, "grad_norm": 1.6587441073040852, "learning_rate": 1.042395048114086e-06, "log_odds_chosen": 1.329077124595642, "log_odds_ratio": -0.43427735567092896, "logits/chosen": -1.153906226158142, "logits/rejected": -0.9837890863418579, "logps/chosen": -0.6898437738418579, "logps/rejected": -1.6554687023162842, "loss": 0.9133, "nll_loss": 0.8697265386581421, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06898193061351776, "rewards/margins": 0.0965576171875, "rewards/rejected": -0.16552734375, "step": 5890 }, { "epoch": 0.448072906778052, "grad_norm": 2.279474869098519, "learning_rate": 1.041511287846591e-06, "log_odds_chosen": 1.150048851966858, "log_odds_ratio": -0.45048826932907104, "logits/chosen": -1.296289086341858, "logits/rejected": -1.0984375476837158, "logps/chosen": -0.7442382574081421, "logps/rejected": -1.535742163658142, "loss": 0.9126, "nll_loss": 0.82421875, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.07443847507238388, "rewards/margins": 0.07917938381433487, "rewards/rejected": -0.15373535454273224, "step": 5900 }, { "epoch": 0.4488323523827606, "grad_norm": 1.4815545850170078, "learning_rate": 1.0406297715753674e-06, "log_odds_chosen": 1.423437476158142, "log_odds_ratio": -0.40234375, "logits/chosen": -1.1853516101837158, "logits/rejected": -1.0154297351837158, "logps/chosen": -0.772656261920929, "logps/rejected": -1.8068358898162842, "loss": 0.9001, "nll_loss": 0.919726550579071, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.07729492336511612, "rewards/margins": 0.10333251953125, "rewards/rejected": -0.18056640028953552, "step": 5910 }, { "epoch": 0.4495917979874691, "grad_norm": 1.4738486337236572, "learning_rate": 1.0397504898200726e-06, "log_odds_chosen": 1.4245116710662842, "log_odds_ratio": -0.4004150331020355, "logits/chosen": -1.1535155773162842, "logits/rejected": -0.956250011920929, "logps/chosen": -0.679394543170929, "logps/rejected": -1.668359398841858, "loss": 0.9089, "nll_loss": 0.9229491949081421, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.06791992485523224, "rewards/margins": 0.098876953125, "rewards/rejected": -0.16672363877296448, "step": 5920 }, { "epoch": 0.4503512435921777, "grad_norm": 1.9068259702195556, "learning_rate": 1.0388734331563415e-06, "log_odds_chosen": 1.24755859375, "log_odds_ratio": -0.47968751192092896, "logits/chosen": -1.1876952648162842, "logits/rejected": -1.001367211341858, "logps/chosen": -0.763867199420929, "logps/rejected": -1.6628906726837158, "loss": 0.9356, "nll_loss": 0.922656238079071, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.07636718451976776, "rewards/margins": 0.08985748142004013, "rewards/rejected": -0.16635742783546448, "step": 5930 }, { "epoch": 0.4511106891968863, "grad_norm": 1.9209387097801713, "learning_rate": 1.037998592215364e-06, "log_odds_chosen": 1.37744140625, "log_odds_ratio": -0.399169921875, "logits/chosen": -1.2111327648162842, "logits/rejected": -1.000585913658142, "logps/chosen": -0.7046874761581421, "logps/rejected": -1.6496093273162842, "loss": 0.9095, "nll_loss": 0.8441406488418579, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.07042236626148224, "rewards/margins": 0.09445800632238388, "rewards/rejected": -0.16489258408546448, "step": 5940 }, { "epoch": 0.4518701348015948, "grad_norm": 1.526865906841316, "learning_rate": 1.037125957683463e-06, "log_odds_chosen": 1.6033203601837158, "log_odds_ratio": -0.3919433653354645, "logits/chosen": -1.1017577648162842, "logits/rejected": -0.8896484375, "logps/chosen": -0.645703136920929, "logps/rejected": -1.831640601158142, "loss": 0.9445, "nll_loss": 0.952929675579071, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.06461181491613388, "rewards/margins": 0.1185302734375, "rewards/rejected": -0.18293456733226776, "step": 5950 }, { "epoch": 0.4526295804063034, "grad_norm": 1.6373030163469664, "learning_rate": 1.0362555203016794e-06, "log_odds_chosen": 1.292382836341858, "log_odds_ratio": -0.4423828125, "logits/chosen": -1.156640648841858, "logits/rejected": -0.965624988079071, "logps/chosen": -0.695605456829071, "logps/rejected": -1.634765625, "loss": 0.92, "nll_loss": 0.7861328125, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.06949462741613388, "rewards/margins": 0.09393920749425888, "rewards/rejected": -0.16350097954273224, "step": 5960 }, { "epoch": 0.45338902601101194, "grad_norm": 1.6631206507946543, "learning_rate": 1.035387270865359e-06, "log_odds_chosen": 1.6338379383087158, "log_odds_ratio": -0.3847900331020355, "logits/chosen": -1.1613280773162842, "logits/rejected": -1.0177733898162842, "logps/chosen": -0.66162109375, "logps/rejected": -1.818750023841858, "loss": 0.9046, "nll_loss": 0.821093738079071, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.06614990532398224, "rewards/margins": 0.1156463623046875, "rewards/rejected": -0.1820068359375, "step": 5970 }, { "epoch": 0.45414847161572053, "grad_norm": 1.5732144186637915, "learning_rate": 1.0345212002237434e-06, "log_odds_chosen": 1.5517578125, "log_odds_ratio": -0.3792968690395355, "logits/chosen": -1.092382788658142, "logits/rejected": -0.931640625, "logps/chosen": -0.6375976800918579, "logps/rejected": -1.6980469226837158, "loss": 0.9122, "nll_loss": 0.839062511920929, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06376953423023224, "rewards/margins": 0.10615234076976776, "rewards/rejected": -0.1700439453125, "step": 5980 }, { "epoch": 0.4549079172204291, "grad_norm": 2.028659073020803, "learning_rate": 1.0336572992795644e-06, "log_odds_chosen": 1.325781226158142, "log_odds_ratio": -0.42414551973342896, "logits/chosen": -1.0886719226837158, "logits/rejected": -0.9615234136581421, "logps/chosen": -0.7464843988418579, "logps/rejected": -1.704687476158142, "loss": 0.9166, "nll_loss": 0.8863281011581421, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.07464599609375, "rewards/margins": 0.09577636420726776, "rewards/rejected": -0.17041015625, "step": 5990 }, { "epoch": 0.45566736282513765, "grad_norm": 1.4435181996025415, "learning_rate": 1.0327955589886444e-06, "log_odds_chosen": 1.478124976158142, "log_odds_ratio": -0.3985839784145355, "logits/chosen": -1.2628905773162842, "logits/rejected": -1.022851586341858, "logps/chosen": -0.7251952886581421, "logps/rejected": -1.779687523841858, "loss": 0.9228, "nll_loss": 0.8042968511581421, "rewards/accuracies": 0.78125, "rewards/chosen": -0.07248535007238388, "rewards/margins": 0.10560302436351776, "rewards/rejected": -0.17807617783546448, "step": 6000 }, { "epoch": 0.45642680842984623, "grad_norm": 1.6565613616158643, "learning_rate": 1.0319359703594971e-06, "log_odds_chosen": 1.372460961341858, "log_odds_ratio": -0.3787597715854645, "logits/chosen": -1.2218749523162842, "logits/rejected": -1.0048828125, "logps/chosen": -0.7119140625, "logps/rejected": -1.684179663658142, "loss": 0.9403, "nll_loss": 0.877148449420929, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.07115478813648224, "rewards/margins": 0.09728393703699112, "rewards/rejected": -0.16840820014476776, "step": 6010 }, { "epoch": 0.45718625403455476, "grad_norm": 1.970436945496721, "learning_rate": 1.0310785244529341e-06, "log_odds_chosen": 1.3662109375, "log_odds_ratio": -0.3836425840854645, "logits/chosen": -1.184179663658142, "logits/rejected": -1.0441405773162842, "logps/chosen": -0.6693359613418579, "logps/rejected": -1.5984375476837158, "loss": 0.9282, "nll_loss": 0.8480468988418579, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.06695556640625, "rewards/margins": 0.09279785305261612, "rewards/rejected": -0.15981444716453552, "step": 6020 }, { "epoch": 0.45794569963926335, "grad_norm": 18.887963316076195, "learning_rate": 1.0302232123816746e-06, "log_odds_chosen": 1.43310546875, "log_odds_ratio": -0.40922850370407104, "logits/chosen": -1.1787109375, "logits/rejected": -0.999218761920929, "logps/chosen": -0.6976562738418579, "logps/rejected": -1.7511718273162842, "loss": 0.9371, "nll_loss": 0.8666015863418579, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.06972656399011612, "rewards/margins": 0.10526122897863388, "rewards/rejected": -0.17502442002296448, "step": 6030 }, { "epoch": 0.4587051452439719, "grad_norm": 1.4862653171083733, "learning_rate": 1.0293700253099576e-06, "log_odds_chosen": 1.623046875, "log_odds_ratio": -0.3536621034145355, "logits/chosen": -1.2667968273162842, "logits/rejected": -1.073828101158142, "logps/chosen": -0.6805664300918579, "logps/rejected": -1.841796875, "loss": 0.935, "nll_loss": 0.8750976324081421, "rewards/accuracies": 0.8125, "rewards/chosen": -0.06801757961511612, "rewards/margins": 0.11599121242761612, "rewards/rejected": -0.18425293266773224, "step": 6040 }, { "epoch": 0.45946459084868047, "grad_norm": 1.967551220977189, "learning_rate": 1.02851895445316e-06, "log_odds_chosen": 1.07421875, "log_odds_ratio": -0.46435546875, "logits/chosen": -1.1124999523162842, "logits/rejected": -0.981640636920929, "logps/chosen": -0.698437511920929, "logps/rejected": -1.4373047351837158, "loss": 0.9526, "nll_loss": 0.9251953363418579, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.06984863430261612, "rewards/margins": 0.073883056640625, "rewards/rejected": -0.1436767578125, "step": 6050 }, { "epoch": 0.46022403645338905, "grad_norm": 1.7968069665038986, "learning_rate": 1.0276699910774159e-06, "log_odds_chosen": 1.3098633289337158, "log_odds_ratio": -0.41362303495407104, "logits/chosen": -1.2726562023162842, "logits/rejected": -1.012109398841858, "logps/chosen": -0.730664074420929, "logps/rejected": -1.657812476158142, "loss": 0.9068, "nll_loss": 0.881054699420929, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.07302246242761612, "rewards/margins": 0.092864990234375, "rewards/rejected": -0.16579589247703552, "step": 6060 }, { "epoch": 0.4609834820580976, "grad_norm": 1.4587242084061058, "learning_rate": 1.0268231264992398e-06, "log_odds_chosen": 1.5499999523162842, "log_odds_ratio": -0.36870115995407104, "logits/chosen": -1.290624976158142, "logits/rejected": -1.047460913658142, "logps/chosen": -0.68994140625, "logps/rejected": -1.749609351158142, "loss": 0.9164, "nll_loss": 0.8285156488418579, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.06899414211511612, "rewards/margins": 0.10596923530101776, "rewards/rejected": -0.17497558891773224, "step": 6070 }, { "epoch": 0.46174292766280617, "grad_norm": 2.3006706435100934, "learning_rate": 1.0259783520851542e-06, "log_odds_chosen": 1.305566430091858, "log_odds_ratio": -0.44074708223342896, "logits/chosen": -1.1589844226837158, "logits/rejected": -1.017187476158142, "logps/chosen": -0.7279297113418579, "logps/rejected": -1.6630859375, "loss": 0.9201, "nll_loss": 0.8873046636581421, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.07286377251148224, "rewards/margins": 0.09345702826976776, "rewards/rejected": -0.1663818359375, "step": 6080 }, { "epoch": 0.4625023732675147, "grad_norm": 1.9692232462766712, "learning_rate": 1.0251356592513193e-06, "log_odds_chosen": 1.5852539539337158, "log_odds_ratio": -0.3880859315395355, "logits/chosen": -1.1326172351837158, "logits/rejected": -0.977343738079071, "logps/chosen": -0.667675793170929, "logps/rejected": -1.8271484375, "loss": 0.9364, "nll_loss": 0.861523449420929, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -0.06676025688648224, "rewards/margins": 0.11607055366039276, "rewards/rejected": -0.1827392578125, "step": 6090 }, { "epoch": 0.4632618188722233, "grad_norm": 1.8593943241776336, "learning_rate": 1.0242950394631678e-06, "log_odds_chosen": 1.527246117591858, "log_odds_ratio": -0.3741699159145355, "logits/chosen": -1.156640648841858, "logits/rejected": -0.940625011920929, "logps/chosen": -0.6689453125, "logps/rejected": -1.7453124523162842, "loss": 0.907, "nll_loss": 0.845410168170929, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06688232719898224, "rewards/margins": 0.1075439453125, "rewards/rejected": -0.17453613877296448, "step": 6100 }, { "epoch": 0.4640212644769318, "grad_norm": 1.5540332099875662, "learning_rate": 1.0234564842350404e-06, "log_odds_chosen": 1.2805664539337158, "log_odds_ratio": -0.446044921875, "logits/chosen": -1.1867187023162842, "logits/rejected": -0.979687511920929, "logps/chosen": -0.708203136920929, "logps/rejected": -1.615234375, "loss": 0.9069, "nll_loss": 0.8490234613418579, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.07080078125, "rewards/margins": 0.09091796725988388, "rewards/rejected": -0.16169433295726776, "step": 6110 }, { "epoch": 0.4647807100816404, "grad_norm": 1.4963311343694126, "learning_rate": 1.0226199851298272e-06, "log_odds_chosen": 1.354101538658142, "log_odds_ratio": -0.38837891817092896, "logits/chosen": -1.2404296398162842, "logits/rejected": -1.0048828125, "logps/chosen": -0.713671863079071, "logps/rejected": -1.666406273841858, "loss": 0.9082, "nll_loss": 0.8333984613418579, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.07136230170726776, "rewards/margins": 0.09516601264476776, "rewards/rejected": -0.1666259765625, "step": 6120 }, { "epoch": 0.465540155686349, "grad_norm": 1.9906300823832301, "learning_rate": 1.0217855337586106e-06, "log_odds_chosen": 1.34228515625, "log_odds_ratio": -0.41508787870407104, "logits/chosen": -1.224609375, "logits/rejected": -1.0207030773162842, "logps/chosen": -0.7201172113418579, "logps/rejected": -1.7039062976837158, "loss": 0.9062, "nll_loss": 0.8031250238418579, "rewards/accuracies": 0.78125, "rewards/chosen": -0.07199706882238388, "rewards/margins": 0.09837646782398224, "rewards/rejected": -0.17036132514476776, "step": 6130 }, { "epoch": 0.4662996012910575, "grad_norm": 1.7178706762265161, "learning_rate": 1.0209531217803119e-06, "log_odds_chosen": 1.4267578125, "log_odds_ratio": -0.42534178495407104, "logits/chosen": -1.183984398841858, "logits/rejected": -0.986328125, "logps/chosen": -0.7447265386581421, "logps/rejected": -1.777734398841858, "loss": 0.9144, "nll_loss": 0.880078136920929, "rewards/accuracies": 0.8125, "rewards/chosen": -0.07452392578125, "rewards/margins": 0.103179931640625, "rewards/rejected": -0.177734375, "step": 6140 }, { "epoch": 0.4670590468957661, "grad_norm": 1.6727577907214386, "learning_rate": 1.0201227409013412e-06, "log_odds_chosen": 0.9747070074081421, "log_odds_ratio": -0.55322265625, "logits/chosen": -1.099218726158142, "logits/rejected": -0.963085949420929, "logps/chosen": -0.794921875, "logps/rejected": -1.5144531726837158, "loss": 0.9085, "nll_loss": 0.902050793170929, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.0794677734375, "rewards/margins": 0.07191161811351776, "rewards/rejected": -0.15144042670726776, "step": 6150 }, { "epoch": 0.46781849250047464, "grad_norm": 1.3815680301069175, "learning_rate": 1.0192943828752509e-06, "log_odds_chosen": 1.0742676258087158, "log_odds_ratio": -0.5059570074081421, "logits/chosen": -1.124414086341858, "logits/rejected": -0.960156261920929, "logps/chosen": -0.731738269329071, "logps/rejected": -1.473242163658142, "loss": 0.9024, "nll_loss": 0.9189453125, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.07313232123851776, "rewards/margins": 0.0740203857421875, "rewards/rejected": -0.14726562798023224, "step": 6160 }, { "epoch": 0.4685779381051832, "grad_norm": 1.8287055221581, "learning_rate": 1.0184680395023912e-06, "log_odds_chosen": 1.548828125, "log_odds_ratio": -0.37006837129592896, "logits/chosen": -1.2003905773162842, "logits/rejected": -0.993359386920929, "logps/chosen": -0.694140613079071, "logps/rejected": -1.8351562023162842, "loss": 0.9033, "nll_loss": 0.88671875, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.06942138820886612, "rewards/margins": 0.11405029147863388, "rewards/rejected": -0.18339844048023224, "step": 6170 }, { "epoch": 0.46933738370989175, "grad_norm": 1.7088255389826874, "learning_rate": 1.0176437026295688e-06, "log_odds_chosen": 1.390722632408142, "log_odds_ratio": -0.40717774629592896, "logits/chosen": -1.1486327648162842, "logits/rejected": -0.955859363079071, "logps/chosen": -0.709765613079071, "logps/rejected": -1.714257836341858, "loss": 0.9075, "nll_loss": 0.8798828125, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.07097168266773224, "rewards/margins": 0.10056152194738388, "rewards/rejected": -0.17148438096046448, "step": 6180 }, { "epoch": 0.47009682931460034, "grad_norm": 1.6230275287821498, "learning_rate": 1.0168213641497094e-06, "log_odds_chosen": 1.1037108898162842, "log_odds_ratio": -0.4568847715854645, "logits/chosen": -1.1962890625, "logits/rejected": -1.025390625, "logps/chosen": -0.7186523675918579, "logps/rejected": -1.4921875, "loss": 0.889, "nll_loss": 0.8052734136581421, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.07180175930261612, "rewards/margins": 0.07738037407398224, "rewards/rejected": -0.14931640028953552, "step": 6190 }, { "epoch": 0.4708562749193089, "grad_norm": 1.668042602843461, "learning_rate": 1.016001016001524e-06, "log_odds_chosen": 1.2456543445587158, "log_odds_ratio": -0.43950194120407104, "logits/chosen": -1.263085961341858, "logits/rejected": -1.079687476158142, "logps/chosen": -0.7378906011581421, "logps/rejected": -1.665624976158142, "loss": 0.8817, "nll_loss": 0.8216797113418579, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.07379150390625, "rewards/margins": 0.09264221042394638, "rewards/rejected": -0.16647949814796448, "step": 6200 }, { "epoch": 0.47161572052401746, "grad_norm": 2.08615189846624, "learning_rate": 1.0151826501691747e-06, "log_odds_chosen": 1.4113280773162842, "log_odds_ratio": -0.40302735567092896, "logits/chosen": -1.2121093273162842, "logits/rejected": -1.049218773841858, "logps/chosen": -0.7020508050918579, "logps/rejected": -1.7292969226837158, "loss": 0.9232, "nll_loss": 0.9541015625, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.07020263373851776, "rewards/margins": 0.10270996391773224, "rewards/rejected": -0.1728515625, "step": 6210 }, { "epoch": 0.47237516612872604, "grad_norm": 1.64377883555205, "learning_rate": 1.0143662586819475e-06, "log_odds_chosen": 1.22637939453125, "log_odds_ratio": -0.42412108182907104, "logits/chosen": -1.120507836341858, "logits/rejected": -0.977734386920929, "logps/chosen": -0.739062488079071, "logps/rejected": -1.619531273841858, "loss": 0.9069, "nll_loss": 0.934374988079071, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.07395019382238388, "rewards/margins": 0.08812103420495987, "rewards/rejected": -0.16215820610523224, "step": 6220 }, { "epoch": 0.4731346117334346, "grad_norm": 1.6076070426711484, "learning_rate": 1.0135518336139257e-06, "log_odds_chosen": 1.46826171875, "log_odds_ratio": -0.39995115995407104, "logits/chosen": -1.1806640625, "logits/rejected": -0.960742175579071, "logps/chosen": -0.7528320550918579, "logps/rejected": -1.8175780773162842, "loss": 0.9155, "nll_loss": 0.826953113079071, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.07529296725988388, "rewards/margins": 0.10648193210363388, "rewards/rejected": -0.18190917372703552, "step": 6230 }, { "epoch": 0.47389405733814316, "grad_norm": 1.751560146571624, "learning_rate": 1.0127393670836666e-06, "log_odds_chosen": 1.352636694908142, "log_odds_ratio": -0.4173828065395355, "logits/chosen": -1.2039062976837158, "logits/rejected": -1.037109375, "logps/chosen": -0.76708984375, "logps/rejected": -1.714453101158142, "loss": 0.934, "nll_loss": 0.874707043170929, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.07672119140625, "rewards/margins": 0.094635009765625, "rewards/rejected": -0.17133788764476776, "step": 6240 }, { "epoch": 0.4746535029428517, "grad_norm": 1.6936704413918289, "learning_rate": 1.0119288512538813e-06, "log_odds_chosen": 1.387841820716858, "log_odds_ratio": -0.4012695252895355, "logits/chosen": -1.299414038658142, "logits/rejected": -1.0947265625, "logps/chosen": -0.657519519329071, "logps/rejected": -1.603906273841858, "loss": 0.8756, "nll_loss": 0.785449206829071, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.06573486328125, "rewards/margins": 0.09447326511144638, "rewards/rejected": -0.16030272841453552, "step": 6250 }, { "epoch": 0.4754129485475603, "grad_norm": 1.6012256255386623, "learning_rate": 1.0111202783311173e-06, "log_odds_chosen": 1.0867187976837158, "log_odds_ratio": -0.48164063692092896, "logits/chosen": -1.178320288658142, "logits/rejected": -1.051367163658142, "logps/chosen": -0.6988281011581421, "logps/rejected": -1.4406249523162842, "loss": 0.9105, "nll_loss": 0.822070300579071, "rewards/accuracies": 0.75, "rewards/chosen": -0.06989745795726776, "rewards/margins": 0.07430420070886612, "rewards/rejected": -0.14433594048023224, "step": 6260 }, { "epoch": 0.47617239415226886, "grad_norm": 1.7877192729462237, "learning_rate": 1.010313640565443e-06, "log_odds_chosen": 1.3838989734649658, "log_odds_ratio": -0.44267576932907104, "logits/chosen": -1.223046898841858, "logits/rejected": -1.006250023841858, "logps/chosen": -0.7671874761581421, "logps/rejected": -1.7371094226837158, "loss": 0.9271, "nll_loss": 0.939453125, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.07666015625, "rewards/margins": 0.09708404541015625, "rewards/rejected": -0.17375488579273224, "step": 6270 }, { "epoch": 0.4769318397569774, "grad_norm": 1.8745296500502175, "learning_rate": 1.0095089302501373e-06, "log_odds_chosen": 1.272119164466858, "log_odds_ratio": -0.43828123807907104, "logits/chosen": -1.2021484375, "logits/rejected": -1.0564453601837158, "logps/chosen": -0.7064453363418579, "logps/rejected": -1.560156226158142, "loss": 0.891, "nll_loss": 0.8359375, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.07071533054113388, "rewards/margins": 0.08539123833179474, "rewards/rejected": -0.15615233778953552, "step": 6280 }, { "epoch": 0.477691285361686, "grad_norm": 1.7852428971764132, "learning_rate": 1.0087061397213787e-06, "log_odds_chosen": 1.503515601158142, "log_odds_ratio": -0.39111328125, "logits/chosen": -1.196874976158142, "logits/rejected": -1.0390625, "logps/chosen": -0.681640625, "logps/rejected": -1.751562476158142, "loss": 0.9018, "nll_loss": 0.8316406011581421, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.06818847358226776, "rewards/margins": 0.10711669921875, "rewards/rejected": -0.17534179985523224, "step": 6290 }, { "epoch": 0.4784507309663945, "grad_norm": 1.7331762115353657, "learning_rate": 1.007905261357939e-06, "log_odds_chosen": 1.4280273914337158, "log_odds_ratio": -0.3942016661167145, "logits/chosen": -1.1287109851837158, "logits/rejected": -0.988476574420929, "logps/chosen": -0.628613293170929, "logps/rejected": -1.6003906726837158, "loss": 0.8827, "nll_loss": 0.8052734136581421, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.0628662109375, "rewards/margins": 0.09711913764476776, "rewards/rejected": -0.1600341796875, "step": 6300 }, { "epoch": 0.4792101765711031, "grad_norm": 1.4778431906653542, "learning_rate": 1.0071062875808811e-06, "log_odds_chosen": 1.5597655773162842, "log_odds_ratio": -0.38847655057907104, "logits/chosen": -1.189062476158142, "logits/rejected": -0.9886718988418579, "logps/chosen": -0.693554699420929, "logps/rejected": -1.8591797351837158, "loss": 0.8947, "nll_loss": 0.8154296875, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.06938476860523224, "rewards/margins": 0.116424560546875, "rewards/rejected": -0.18602295219898224, "step": 6310 }, { "epoch": 0.4799696221758117, "grad_norm": 1.9217796149641029, "learning_rate": 1.0063092108532552e-06, "log_odds_chosen": 1.4177734851837158, "log_odds_ratio": -0.42631834745407104, "logits/chosen": -1.2619140148162842, "logits/rejected": -1.0859375, "logps/chosen": -0.727246105670929, "logps/rejected": -1.751953125, "loss": 0.9229, "nll_loss": 0.852343738079071, "rewards/accuracies": 0.8125, "rewards/chosen": -0.07275390625, "rewards/margins": 0.10246582329273224, "rewards/rejected": -0.17531737685203552, "step": 6320 }, { "epoch": 0.4807290677805202, "grad_norm": 1.557805618466395, "learning_rate": 1.005514023679802e-06, "log_odds_chosen": 1.334082007408142, "log_odds_ratio": -0.4222168028354645, "logits/chosen": -1.2986328601837158, "logits/rejected": -1.0847656726837158, "logps/chosen": -0.7300781011581421, "logps/rejected": -1.7039062976837158, "loss": 0.9077, "nll_loss": 0.8720703125, "rewards/accuracies": 0.8125, "rewards/chosen": -0.07301025092601776, "rewards/margins": 0.09730224311351776, "rewards/rejected": -0.17031249403953552, "step": 6330 }, { "epoch": 0.4814885133852288, "grad_norm": 2.008437065264603, "learning_rate": 1.0047207186066567e-06, "log_odds_chosen": 1.5925781726837158, "log_odds_ratio": -0.40473634004592896, "logits/chosen": -1.193359375, "logits/rejected": -0.9896484613418579, "logps/chosen": -0.746386706829071, "logps/rejected": -1.919921875, "loss": 0.8919, "nll_loss": 0.897265613079071, "rewards/accuracies": 0.8125, "rewards/chosen": -0.07462158054113388, "rewards/margins": 0.11724853515625, "rewards/rejected": -0.19191893935203552, "step": 6340 }, { "epoch": 0.48224795898993733, "grad_norm": 2.0286362612422124, "learning_rate": 1.0039292882210538e-06, "log_odds_chosen": 1.4387695789337158, "log_odds_ratio": -0.42036134004592896, "logits/chosen": -1.163671851158142, "logits/rejected": -0.996874988079071, "logps/chosen": -0.720410168170929, "logps/rejected": -1.801367163658142, "loss": 0.9105, "nll_loss": 0.90576171875, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.07199706882238388, "rewards/margins": 0.10804443061351776, "rewards/rejected": -0.18002930283546448, "step": 6350 }, { "epoch": 0.4830074045946459, "grad_norm": 1.8110345055934287, "learning_rate": 1.0031397251510382e-06, "log_odds_chosen": 1.720117211341858, "log_odds_ratio": -0.34912109375, "logits/chosen": -1.2626953125, "logits/rejected": -1.0564453601837158, "logps/chosen": -0.705078125, "logps/rejected": -1.960546851158142, "loss": 0.8948, "nll_loss": 0.8359375, "rewards/accuracies": 0.84375, "rewards/chosen": -0.07048340141773224, "rewards/margins": 0.1253662109375, "rewards/rejected": -0.19584961235523224, "step": 6360 }, { "epoch": 0.48376685019935445, "grad_norm": 1.6815696261130768, "learning_rate": 1.0023520220651762e-06, "log_odds_chosen": 1.380761742591858, "log_odds_ratio": -0.41650390625, "logits/chosen": -1.2458984851837158, "logits/rejected": -1.026757836341858, "logps/chosen": -0.688281238079071, "logps/rejected": -1.6708984375, "loss": 0.9092, "nll_loss": 0.8951171636581421, "rewards/accuracies": 0.78125, "rewards/chosen": -0.06885986030101776, "rewards/margins": 0.09812011569738388, "rewards/rejected": -0.16701659560203552, "step": 6370 }, { "epoch": 0.48452629580406303, "grad_norm": 1.6072474545019382, "learning_rate": 1.0015661716722687e-06, "log_odds_chosen": 1.1504395008087158, "log_odds_ratio": -0.4600585997104645, "logits/chosen": -1.196679711341858, "logits/rejected": -1.0380859375, "logps/chosen": -0.7164062261581421, "logps/rejected": -1.5251953601837158, "loss": 0.9132, "nll_loss": 0.9072265625, "rewards/accuracies": 0.71875, "rewards/chosen": -0.07163085788488388, "rewards/margins": 0.08078613132238388, "rewards/rejected": -0.15244141221046448, "step": 6380 }, { "epoch": 0.4852857414087716, "grad_norm": 1.5483944495539594, "learning_rate": 1.0007821667210687e-06, "log_odds_chosen": 1.120703101158142, "log_odds_ratio": -0.47700196504592896, "logits/chosen": -1.19140625, "logits/rejected": -0.963671863079071, "logps/chosen": -0.7738281488418579, "logps/rejected": -1.596093773841858, "loss": 0.9141, "nll_loss": 0.9111328125, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.07745361328125, "rewards/margins": 0.08226318657398224, "rewards/rejected": -0.15974120795726776, "step": 6390 }, { "epoch": 0.48604518701348015, "grad_norm": 1.5428909598078062, "learning_rate": 1e-06, "log_odds_chosen": 1.408837914466858, "log_odds_ratio": -0.4095214903354645, "logits/chosen": -1.147070288658142, "logits/rejected": -0.9916015863418579, "logps/chosen": -0.7503906488418579, "logps/rejected": -1.766210913658142, "loss": 0.8751, "nll_loss": 0.8153320550918579, "rewards/accuracies": 0.78125, "rewards/chosen": -0.07509765774011612, "rewards/margins": 0.10159911960363388, "rewards/rejected": -0.17678222060203552, "step": 6400 }, { "epoch": 0.48680463261818874, "grad_norm": 1.572014188010316, "learning_rate": 9.992196643368784e-07, "log_odds_chosen": 1.4861328601837158, "log_odds_ratio": -0.39765626192092896, "logits/chosen": -1.2443358898162842, "logits/rejected": -1.0046875476837158, "logps/chosen": -0.7611328363418579, "logps/rejected": -1.814062476158142, "loss": 0.9133, "nll_loss": 0.912304699420929, "rewards/accuracies": 0.8125, "rewards/chosen": -0.07607422024011612, "rewards/margins": 0.10552978515625, "rewards/rejected": -0.181640625, "step": 6410 }, { "epoch": 0.48756407822289727, "grad_norm": 1.539044078662682, "learning_rate": 9.984411525986355e-07, "log_odds_chosen": 1.351709008216858, "log_odds_ratio": -0.4286132752895355, "logits/chosen": -1.2160155773162842, "logits/rejected": -1.044335961341858, "logps/chosen": -0.7554687261581421, "logps/rejected": -1.6818358898162842, "loss": 0.9062, "nll_loss": 0.8804687261581421, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.07548828423023224, "rewards/margins": 0.09268493950366974, "rewards/rejected": -0.16823729872703552, "step": 6420 }, { "epoch": 0.48832352382760585, "grad_norm": 2.159212204704787, "learning_rate": 9.97664457691046e-07, "log_odds_chosen": 1.363867163658142, "log_odds_ratio": -0.41923826932907104, "logits/chosen": -1.123632788658142, "logits/rejected": -1.0007812976837158, "logps/chosen": -0.661914050579071, "logps/rejected": -1.6171875, "loss": 0.8932, "nll_loss": 0.783203125, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.06618652492761612, "rewards/margins": 0.09548339992761612, "rewards/rejected": -0.16169433295726776, "step": 6430 }, { "epoch": 0.4890829694323144, "grad_norm": 1.9280367536980059, "learning_rate": 9.968895725584535e-07, "log_odds_chosen": 1.2951171398162842, "log_odds_ratio": -0.4126953184604645, "logits/chosen": -1.1902344226837158, "logits/rejected": -1.072265625, "logps/chosen": -0.7139648199081421, "logps/rejected": -1.613867163658142, "loss": 0.9191, "nll_loss": 0.9287109375, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.07138671725988388, "rewards/margins": 0.08994750678539276, "rewards/rejected": -0.16145019233226776, "step": 6440 }, { "epoch": 0.48984241503702297, "grad_norm": 1.8017948916195108, "learning_rate": 9.961164901835046e-07, "log_odds_chosen": 1.579931616783142, "log_odds_ratio": -0.39433592557907104, "logits/chosen": -1.2267577648162842, "logits/rejected": -1.031835913658142, "logps/chosen": -0.6836913824081421, "logps/rejected": -1.8103516101837158, "loss": 0.903, "nll_loss": 0.859375, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -0.06834717094898224, "rewards/margins": 0.11259765923023224, "rewards/rejected": -0.18098144233226776, "step": 6450 }, { "epoch": 0.49060186064173156, "grad_norm": 1.7363092763810664, "learning_rate": 9.95345203586879e-07, "log_odds_chosen": 1.4088866710662842, "log_odds_ratio": -0.3971191346645355, "logits/chosen": -1.2224609851837158, "logits/rejected": -1.059960961341858, "logps/chosen": -0.6751953363418579, "logps/rejected": -1.646484375, "loss": 0.9015, "nll_loss": 0.844433605670929, "rewards/accuracies": 0.8125, "rewards/chosen": -0.06755371391773224, "rewards/margins": 0.09705200046300888, "rewards/rejected": -0.16455078125, "step": 6460 }, { "epoch": 0.4913613062464401, "grad_norm": 1.7907727060246756, "learning_rate": 9.94575705827027e-07, "log_odds_chosen": 1.4801757335662842, "log_odds_ratio": -0.41962891817092896, "logits/chosen": -1.1759765148162842, "logits/rejected": -0.979296863079071, "logps/chosen": -0.712207019329071, "logps/rejected": -1.758203148841858, "loss": 0.9027, "nll_loss": 0.908398449420929, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.07126464694738388, "rewards/margins": 0.10452880710363388, "rewards/rejected": -0.17568358778953552, "step": 6470 }, { "epoch": 0.4921207518511487, "grad_norm": 2.754086972221821, "learning_rate": 9.938079899999065e-07, "log_odds_chosen": 1.25732421875, "log_odds_ratio": -0.45637208223342896, "logits/chosen": -1.171289086341858, "logits/rejected": -1.0916016101837158, "logps/chosen": -0.696484386920929, "logps/rejected": -1.609375, "loss": 0.9135, "nll_loss": 0.8360351324081421, "rewards/accuracies": 0.75, "rewards/chosen": -0.0697021484375, "rewards/margins": 0.09130249172449112, "rewards/rejected": -0.160888671875, "step": 6480 }, { "epoch": 0.4928801974558572, "grad_norm": 1.7724700006892098, "learning_rate": 9.930420492387219e-07, "log_odds_chosen": 1.6203124523162842, "log_odds_ratio": -0.38037109375, "logits/chosen": -1.2478516101837158, "logits/rejected": -1.015625, "logps/chosen": -0.7020508050918579, "logps/rejected": -1.912109375, "loss": 0.8939, "nll_loss": 0.8076171875, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.0701904296875, "rewards/margins": 0.12099609524011612, "rewards/rejected": -0.19125977158546448, "step": 6490 }, { "epoch": 0.4936396430605658, "grad_norm": 1.6454519714006688, "learning_rate": 9.922778767136676e-07, "log_odds_chosen": 1.438085913658142, "log_odds_ratio": -0.45947265625, "logits/chosen": -1.2216796875, "logits/rejected": -1.046289086341858, "logps/chosen": -0.676953136920929, "logps/rejected": -1.7470703125, "loss": 0.9284, "nll_loss": 0.8460937738418579, "rewards/accuracies": 0.71875, "rewards/chosen": -0.06768798828125, "rewards/margins": 0.10677947849035263, "rewards/rejected": -0.17458495497703552, "step": 6500 }, { "epoch": 0.4943990886652743, "grad_norm": 3.2901914403382992, "learning_rate": 9.915154656316713e-07, "log_odds_chosen": 1.2297852039337158, "log_odds_ratio": -0.4562011659145355, "logits/chosen": -1.2765624523162842, "logits/rejected": -1.1083984375, "logps/chosen": -0.7499023675918579, "logps/rejected": -1.641015648841858, "loss": 0.9023, "nll_loss": 0.8580077886581421, "rewards/accuracies": 0.71875, "rewards/chosen": -0.07500000298023224, "rewards/margins": 0.08905639499425888, "rewards/rejected": -0.16408690810203552, "step": 6510 }, { "epoch": 0.4951585342699829, "grad_norm": 1.9569842511606652, "learning_rate": 9.907548092361398e-07, "log_odds_chosen": 1.432226538658142, "log_odds_ratio": -0.4278320372104645, "logits/chosen": -1.114648461341858, "logits/rejected": -0.974414050579071, "logps/chosen": -0.70703125, "logps/rejected": -1.748632788658142, "loss": 0.9147, "nll_loss": 0.9267578125, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.07073974609375, "rewards/margins": 0.10433349758386612, "rewards/rejected": -0.17507323622703552, "step": 6520 }, { "epoch": 0.4959179798746915, "grad_norm": 2.2023220777508685, "learning_rate": 9.899959008067097e-07, "log_odds_chosen": 1.1810791492462158, "log_odds_ratio": -0.532519519329071, "logits/chosen": -1.192968726158142, "logits/rejected": -1.066015601158142, "logps/chosen": -0.76806640625, "logps/rejected": -1.605078101158142, "loss": 0.9208, "nll_loss": 0.845898449420929, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.07683105766773224, "rewards/margins": 0.08373947441577911, "rewards/rejected": -0.16059570014476776, "step": 6530 }, { "epoch": 0.4966774254794, "grad_norm": 1.8148382864442336, "learning_rate": 9.892387336589959e-07, "log_odds_chosen": 1.29345703125, "log_odds_ratio": -0.4363769590854645, "logits/chosen": -1.1435546875, "logits/rejected": -1.039453148841858, "logps/chosen": -0.739453136920929, "logps/rejected": -1.662109375, "loss": 0.9279, "nll_loss": 0.9400390386581421, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.07391357421875, "rewards/margins": 0.092315673828125, "rewards/rejected": -0.16623535752296448, "step": 6540 }, { "epoch": 0.4974368710841086, "grad_norm": 1.6904800286627018, "learning_rate": 9.884833011443446e-07, "log_odds_chosen": 1.380859375, "log_odds_ratio": -0.427001953125, "logits/chosen": -1.2283203601837158, "logits/rejected": -1.0011718273162842, "logps/chosen": -0.68603515625, "logps/rejected": -1.6896483898162842, "loss": 0.9021, "nll_loss": 0.8609374761581421, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.06865234673023224, "rewards/margins": 0.10017089545726776, "rewards/rejected": -0.16887207329273224, "step": 6550 }, { "epoch": 0.49819631668881714, "grad_norm": 1.746395416084233, "learning_rate": 9.877295966495897e-07, "log_odds_chosen": 1.2960937023162842, "log_odds_ratio": -0.42255860567092896, "logits/chosen": -1.175390601158142, "logits/rejected": -1.058691382408142, "logps/chosen": -0.7193359136581421, "logps/rejected": -1.634765625, "loss": 0.9158, "nll_loss": 0.943359375, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.07191161811351776, "rewards/margins": 0.0914306640625, "rewards/rejected": -0.16330567002296448, "step": 6560 }, { "epoch": 0.49895576229352573, "grad_norm": 1.9384476854592174, "learning_rate": 9.86977613596807e-07, "log_odds_chosen": 1.193115234375, "log_odds_ratio": -0.4940429627895355, "logits/chosen": -1.194921851158142, "logits/rejected": -1.0632812976837158, "logps/chosen": -0.767382800579071, "logps/rejected": -1.6484375, "loss": 0.8729, "nll_loss": 0.786328136920929, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.07677002251148224, "rewards/margins": 0.08810119330883026, "rewards/rejected": -0.1649169921875, "step": 6570 }, { "epoch": 0.4997152078982343, "grad_norm": 1.7796220985404294, "learning_rate": 9.862273454430757e-07, "log_odds_chosen": 1.3638184070587158, "log_odds_ratio": -0.41748046875, "logits/chosen": -1.2068359851837158, "logits/rejected": -0.981640636920929, "logps/chosen": -0.7230468988418579, "logps/rejected": -1.6876952648162842, "loss": 0.9066, "nll_loss": 0.8228515386581421, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.07230224460363388, "rewards/margins": 0.09642181545495987, "rewards/rejected": -0.16887207329273224, "step": 6580 }, { "epoch": 0.5004746535029428, "grad_norm": 1.6876342031984084, "learning_rate": 9.85478785680238e-07, "log_odds_chosen": 1.3330078125, "log_odds_ratio": -0.43964844942092896, "logits/chosen": -1.267578125, "logits/rejected": -1.0285155773162842, "logps/chosen": -0.7466796636581421, "logps/rejected": -1.7039062976837158, "loss": 0.9028, "nll_loss": 0.836718738079071, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.07473144680261612, "rewards/margins": 0.09578857570886612, "rewards/rejected": -0.17050781846046448, "step": 6590 }, { "epoch": 0.5012340991076514, "grad_norm": 1.7106131855237259, "learning_rate": 9.847319278346618e-07, "log_odds_chosen": 1.35394287109375, "log_odds_ratio": -0.39765626192092896, "logits/chosen": -1.2380859851837158, "logits/rejected": -1.0544922351837158, "logps/chosen": -0.70703125, "logps/rejected": -1.6630859375, "loss": 0.9026, "nll_loss": 0.9105468988418579, "rewards/accuracies": 0.8125, "rewards/chosen": -0.07069091498851776, "rewards/margins": 0.09571532905101776, "rewards/rejected": -0.16645507514476776, "step": 6600 }, { "epoch": 0.50199354471236, "grad_norm": 1.5998573323618346, "learning_rate": 9.839867654670063e-07, "log_odds_chosen": 1.446313500404358, "log_odds_ratio": -0.4229980409145355, "logits/chosen": -1.185546875, "logits/rejected": -1.0314452648162842, "logps/chosen": -0.705273449420929, "logps/rejected": -1.7556641101837158, "loss": 0.9017, "nll_loss": 0.8231445550918579, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.07049560546875, "rewards/margins": 0.10494079440832138, "rewards/rejected": -0.17543944716453552, "step": 6610 }, { "epoch": 0.5027529903170685, "grad_norm": 1.8264443592835182, "learning_rate": 9.832432921719876e-07, "log_odds_chosen": 1.4728515148162842, "log_odds_ratio": -0.3629394471645355, "logits/chosen": -1.1759765148162842, "logits/rejected": -1.0095703601837158, "logps/chosen": -0.664843738079071, "logps/rejected": -1.682031273841858, "loss": 0.8997, "nll_loss": 0.8802734613418579, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -0.06645508110523224, "rewards/margins": 0.10178222507238388, "rewards/rejected": -0.168212890625, "step": 6620 }, { "epoch": 0.5035124359217771, "grad_norm": 1.4381486391709064, "learning_rate": 9.825015015781493e-07, "log_odds_chosen": 1.4621093273162842, "log_odds_ratio": -0.3807128965854645, "logits/chosen": -1.241601586341858, "logits/rejected": -1.056054711341858, "logps/chosen": -0.675976574420929, "logps/rejected": -1.678125023841858, "loss": 0.882, "nll_loss": 0.814648449420929, "rewards/accuracies": 0.84375, "rewards/chosen": -0.06761474907398224, "rewards/margins": 0.10015869140625, "rewards/rejected": -0.1678466796875, "step": 6630 }, { "epoch": 0.5042718815264856, "grad_norm": 2.0568765529786557, "learning_rate": 9.81761387347632e-07, "log_odds_chosen": 1.421875, "log_odds_ratio": -0.3936523497104645, "logits/chosen": -1.2140624523162842, "logits/rejected": -1.065039038658142, "logps/chosen": -0.6751953363418579, "logps/rejected": -1.6845703125, "loss": 0.8862, "nll_loss": 0.85986328125, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.06752929836511612, "rewards/margins": 0.10097046196460724, "rewards/rejected": -0.16853027045726776, "step": 6640 }, { "epoch": 0.5050313271311943, "grad_norm": 1.9618373809365839, "learning_rate": 9.810229431759452e-07, "log_odds_chosen": 1.53857421875, "log_odds_ratio": -0.39387208223342896, "logits/chosen": -1.172460913658142, "logits/rejected": -0.992382824420929, "logps/chosen": -0.728710949420929, "logps/rejected": -1.8582031726837158, "loss": 0.8812, "nll_loss": 0.901171863079071, "rewards/accuracies": 0.8125, "rewards/chosen": -0.0728759765625, "rewards/margins": 0.11296997219324112, "rewards/rejected": -0.18574218451976776, "step": 6650 }, { "epoch": 0.5057907727359028, "grad_norm": 2.9646172456831903, "learning_rate": 9.802861627917437e-07, "log_odds_chosen": 1.7106444835662842, "log_odds_ratio": -0.38652342557907104, "logits/chosen": -1.2087891101837158, "logits/rejected": -0.969531238079071, "logps/chosen": -0.6805664300918579, "logps/rejected": -1.9296875, "loss": 0.8998, "nll_loss": 0.8990234136581421, "rewards/accuracies": 0.8125, "rewards/chosen": -0.06801757961511612, "rewards/margins": 0.12476806342601776, "rewards/rejected": -0.19284668564796448, "step": 6660 }, { "epoch": 0.5065502183406113, "grad_norm": 1.526991026569178, "learning_rate": 9.795510399566016e-07, "log_odds_chosen": 1.3603515625, "log_odds_ratio": -0.42578125, "logits/chosen": -1.2345702648162842, "logits/rejected": -1.0226562023162842, "logps/chosen": -0.729785144329071, "logps/rejected": -1.6749999523162842, "loss": 0.9136, "nll_loss": 0.8306640386581421, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.07298584282398224, "rewards/margins": 0.094390869140625, "rewards/rejected": -0.16752929985523224, "step": 6670 }, { "epoch": 0.50730966394532, "grad_norm": 1.8353035283582346, "learning_rate": 9.788175684647926e-07, "log_odds_chosen": 1.313562035560608, "log_odds_ratio": -0.4651855528354645, "logits/chosen": -1.2189452648162842, "logits/rejected": -1.0730469226837158, "logps/chosen": -0.689257800579071, "logps/rejected": -1.6095702648162842, "loss": 0.8974, "nll_loss": 0.8724609613418579, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.0689697265625, "rewards/margins": 0.09212951362133026, "rewards/rejected": -0.16096191108226776, "step": 6680 }, { "epoch": 0.5080691095500285, "grad_norm": 1.6248766206048961, "learning_rate": 9.780857421430687e-07, "log_odds_chosen": 1.639074683189392, "log_odds_ratio": -0.40087890625, "logits/chosen": -1.174218773841858, "logits/rejected": -1.0505859851837158, "logps/chosen": -0.7020508050918579, "logps/rejected": -1.883203148841858, "loss": 0.9036, "nll_loss": 0.8042968511581421, "rewards/accuracies": 0.78125, "rewards/chosen": -0.07030029594898224, "rewards/margins": 0.11790008842945099, "rewards/rejected": -0.18813475966453552, "step": 6690 }, { "epoch": 0.508828555154737, "grad_norm": 1.918160018227594, "learning_rate": 9.773555548504417e-07, "log_odds_chosen": 1.2370116710662842, "log_odds_ratio": -0.4559082090854645, "logits/chosen": -1.2126953601837158, "logits/rejected": -1.0244140625, "logps/chosen": -0.7276366949081421, "logps/rejected": -1.571874976158142, "loss": 0.9053, "nll_loss": 0.8731445074081421, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.07279052585363388, "rewards/margins": 0.08442993462085724, "rewards/rejected": -0.15720215439796448, "step": 6700 }, { "epoch": 0.5095880007594457, "grad_norm": 1.568199667301049, "learning_rate": 9.76627000477968e-07, "log_odds_chosen": 1.47607421875, "log_odds_ratio": -0.37822264432907104, "logits/chosen": -1.233984351158142, "logits/rejected": -1.096093773841858, "logps/chosen": -0.7235351800918579, "logps/rejected": -1.7707030773162842, "loss": 0.8945, "nll_loss": 0.9234374761581421, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.07243652641773224, "rewards/margins": 0.10465087741613388, "rewards/rejected": -0.17705078423023224, "step": 6710 }, { "epoch": 0.5103474463641542, "grad_norm": 1.541010993198088, "learning_rate": 9.75900072948533e-07, "log_odds_chosen": 1.524072289466858, "log_odds_ratio": -0.3948730528354645, "logits/chosen": -1.2177734375, "logits/rejected": -1.0265624523162842, "logps/chosen": -0.717578113079071, "logps/rejected": -1.8312499523162842, "loss": 0.906, "nll_loss": 0.8853515386581421, "rewards/accuracies": 0.8125, "rewards/chosen": -0.07171630859375, "rewards/margins": 0.11144409328699112, "rewards/rejected": -0.18295899033546448, "step": 6720 }, { "epoch": 0.5111068919688627, "grad_norm": 1.8925844286932274, "learning_rate": 9.751747662166388e-07, "log_odds_chosen": 1.2253906726837158, "log_odds_ratio": -0.4520019590854645, "logits/chosen": -1.305078148841858, "logits/rejected": -1.1123046875, "logps/chosen": -0.7286132574081421, "logps/rejected": -1.624609351158142, "loss": 0.8951, "nll_loss": 0.888867199420929, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.07283935695886612, "rewards/margins": 0.08955688774585724, "rewards/rejected": -0.1624755859375, "step": 6730 }, { "epoch": 0.5118663375735713, "grad_norm": 1.5219377801240588, "learning_rate": 9.744510742681917e-07, "log_odds_chosen": 1.4801757335662842, "log_odds_ratio": -0.3872314393520355, "logits/chosen": -1.138281226158142, "logits/rejected": -0.982617199420929, "logps/chosen": -0.6712890863418579, "logps/rejected": -1.726171851158142, "loss": 0.8983, "nll_loss": 0.828125, "rewards/accuracies": 0.78125, "rewards/chosen": -0.06718750298023224, "rewards/margins": 0.10545043647289276, "rewards/rejected": -0.17255859076976776, "step": 6740 }, { "epoch": 0.5126257831782799, "grad_norm": 1.5633390017514532, "learning_rate": 9.737289911202953e-07, "log_odds_chosen": 1.0029296875, "log_odds_ratio": -0.540576159954071, "logits/chosen": -1.1457030773162842, "logits/rejected": -1.0595703125, "logps/chosen": -0.7982422113418579, "logps/rejected": -1.5187499523162842, "loss": 0.9225, "nll_loss": 0.8763672113418579, "rewards/accuracies": 0.6875, "rewards/chosen": -0.07984618842601776, "rewards/margins": 0.07212524116039276, "rewards/rejected": -0.15192870795726776, "step": 6750 }, { "epoch": 0.5133852287829884, "grad_norm": 1.4639108219139865, "learning_rate": 9.730085108210398e-07, "log_odds_chosen": 1.4176757335662842, "log_odds_ratio": -0.4161621034145355, "logits/chosen": -1.1873047351837158, "logits/rejected": -0.961718738079071, "logps/chosen": -0.655957043170929, "logps/rejected": -1.6671874523162842, "loss": 0.9072, "nll_loss": 0.7939453125, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.06563720852136612, "rewards/margins": 0.10101928561925888, "rewards/rejected": -0.16669921576976776, "step": 6760 }, { "epoch": 0.514144674387697, "grad_norm": 2.628308776689863, "learning_rate": 9.72289627449298e-07, "log_odds_chosen": 1.5968749523162842, "log_odds_ratio": -0.356201171875, "logits/chosen": -1.2150390148162842, "logits/rejected": -1.023046851158142, "logps/chosen": -0.6700195074081421, "logps/rejected": -1.8230469226837158, "loss": 0.8798, "nll_loss": 0.8447265625, "rewards/accuracies": 0.862500011920929, "rewards/chosen": -0.0670166015625, "rewards/margins": 0.11537475883960724, "rewards/rejected": -0.18239745497703552, "step": 6770 }, { "epoch": 0.5149041199924056, "grad_norm": 1.8531368786331657, "learning_rate": 9.715723351145206e-07, "log_odds_chosen": 1.354101538658142, "log_odds_ratio": -0.41728514432907104, "logits/chosen": -1.18359375, "logits/rejected": -1.010156273841858, "logps/chosen": -0.71875, "logps/rejected": -1.6640625, "loss": 0.8755, "nll_loss": 0.851757824420929, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.07188721001148224, "rewards/margins": 0.09453125298023224, "rewards/rejected": -0.16647949814796448, "step": 6780 }, { "epoch": 0.5156635655971141, "grad_norm": 1.5764679171730838, "learning_rate": 9.70856627956532e-07, "log_odds_chosen": 1.57421875, "log_odds_ratio": -0.35856932401657104, "logits/chosen": -1.115234375, "logits/rejected": -0.9986327886581421, "logps/chosen": -0.6361328363418579, "logps/rejected": -1.7117187976837158, "loss": 0.8927, "nll_loss": 0.8882812261581421, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.06357421725988388, "rewards/margins": 0.10753784328699112, "rewards/rejected": -0.17119140923023224, "step": 6790 }, { "epoch": 0.5164230112018227, "grad_norm": 1.593000722226936, "learning_rate": 9.701425001453318e-07, "log_odds_chosen": 1.3097655773162842, "log_odds_ratio": -0.45673829317092896, "logits/chosen": -1.256250023841858, "logits/rejected": -1.099218726158142, "logps/chosen": -0.685742199420929, "logps/rejected": -1.6085937023162842, "loss": 0.8964, "nll_loss": 0.831250011920929, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.06856689602136612, "rewards/margins": 0.09227295219898224, "rewards/rejected": -0.16079100966453552, "step": 6800 }, { "epoch": 0.5171824568065312, "grad_norm": 1.8282105720124027, "learning_rate": 9.694299458808932e-07, "log_odds_chosen": 1.389257788658142, "log_odds_ratio": -0.4120117127895355, "logits/chosen": -1.2976562976837158, "logits/rejected": -1.069726586341858, "logps/chosen": -0.69580078125, "logps/rejected": -1.6710937023162842, "loss": 0.8923, "nll_loss": 0.7822265625, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.06956787407398224, "rewards/margins": 0.09750976413488388, "rewards/rejected": -0.16704101860523224, "step": 6810 }, { "epoch": 0.5179419024112398, "grad_norm": 1.563614894830347, "learning_rate": 9.687189593929655e-07, "log_odds_chosen": 1.3258788585662842, "log_odds_ratio": -0.47612303495407104, "logits/chosen": -1.0998046398162842, "logits/rejected": -1.0056641101837158, "logps/chosen": -0.71533203125, "logps/rejected": -1.696679711341858, "loss": 0.8886, "nll_loss": 0.814257800579071, "rewards/accuracies": 0.75, "rewards/chosen": -0.07158203423023224, "rewards/margins": 0.09814453125, "rewards/rejected": -0.16965332627296448, "step": 6820 }, { "epoch": 0.5187013480159484, "grad_norm": 1.7390457607762546, "learning_rate": 9.680095349408789e-07, "log_odds_chosen": 1.470312476158142, "log_odds_ratio": -0.4188476502895355, "logits/chosen": -1.202734351158142, "logits/rejected": -1.023828148841858, "logps/chosen": -0.701367199420929, "logps/rejected": -1.726953148841858, "loss": 0.9199, "nll_loss": 0.8578125238418579, "rewards/accuracies": 0.8125, "rewards/chosen": -0.07017822563648224, "rewards/margins": 0.10244140774011612, "rewards/rejected": -0.17255859076976776, "step": 6830 }, { "epoch": 0.5194607936206569, "grad_norm": 2.418104297080795, "learning_rate": 9.673016668133487e-07, "log_odds_chosen": 1.4831054210662842, "log_odds_ratio": -0.41557615995407104, "logits/chosen": -1.2351562976837158, "logits/rejected": -1.0558593273162842, "logps/chosen": -0.69921875, "logps/rejected": -1.7560546398162842, "loss": 0.8926, "nll_loss": 0.883007824420929, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.06990966945886612, "rewards/margins": 0.10565185546875, "rewards/rejected": -0.17563477158546448, "step": 6840 }, { "epoch": 0.5202202392253655, "grad_norm": 2.085158469600785, "learning_rate": 9.66595349328283e-07, "log_odds_chosen": 1.338964819908142, "log_odds_ratio": -0.42426759004592896, "logits/chosen": -1.186132788658142, "logits/rejected": -1.021484375, "logps/chosen": -0.68896484375, "logps/rejected": -1.6398437023162842, "loss": 0.8971, "nll_loss": 0.8358398675918579, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.06892089545726776, "rewards/margins": 0.09503173828125, "rewards/rejected": -0.16396483778953552, "step": 6850 }, { "epoch": 0.5209796848300741, "grad_norm": 1.5451216161378996, "learning_rate": 9.658905768325902e-07, "log_odds_chosen": 1.478124976158142, "log_odds_ratio": -0.415771484375, "logits/chosen": -1.2449219226837158, "logits/rejected": -1.057226538658142, "logps/chosen": -0.6982421875, "logps/rejected": -1.771484375, "loss": 0.8907, "nll_loss": 0.8228515386581421, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.06986083835363388, "rewards/margins": 0.10721435397863388, "rewards/rejected": -0.17697754502296448, "step": 6860 }, { "epoch": 0.5217391304347826, "grad_norm": 2.0512204640982143, "learning_rate": 9.651873437019902e-07, "log_odds_chosen": 1.647070288658142, "log_odds_ratio": -0.385009765625, "logits/chosen": -1.192968726158142, "logits/rejected": -0.993945300579071, "logps/chosen": -0.723828136920929, "logps/rejected": -1.947656273841858, "loss": 0.8986, "nll_loss": 0.883593738079071, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.07236327975988388, "rewards/margins": 0.12252197414636612, "rewards/rejected": -0.19482421875, "step": 6870 }, { "epoch": 0.5224985760394911, "grad_norm": 1.8259951873765816, "learning_rate": 9.644856443408243e-07, "log_odds_chosen": 1.313574194908142, "log_odds_ratio": -0.44062501192092896, "logits/chosen": -1.197265625, "logits/rejected": -1.0529296398162842, "logps/chosen": -0.6698242425918579, "logps/rejected": -1.611914038658142, "loss": 0.8982, "nll_loss": 0.811718761920929, "rewards/accuracies": 0.75, "rewards/chosen": -0.06700439751148224, "rewards/margins": 0.094085693359375, "rewards/rejected": -0.16103515028953552, "step": 6880 }, { "epoch": 0.5232580216441998, "grad_norm": 1.682410407094835, "learning_rate": 9.637854731818697e-07, "log_odds_chosen": 1.376953125, "log_odds_ratio": -0.407958984375, "logits/chosen": -1.120507836341858, "logits/rejected": -1.052343726158142, "logps/chosen": -0.669238269329071, "logps/rejected": -1.623046875, "loss": 0.9082, "nll_loss": 0.8280273675918579, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.06688232719898224, "rewards/margins": 0.09536132961511612, "rewards/rejected": -0.16220703721046448, "step": 6890 }, { "epoch": 0.5240174672489083, "grad_norm": 1.614378114584295, "learning_rate": 9.630868246861536e-07, "log_odds_chosen": 1.3494141101837158, "log_odds_ratio": -0.41389161348342896, "logits/chosen": -1.2205078601837158, "logits/rejected": -0.9990234375, "logps/chosen": -0.641308605670929, "logps/rejected": -1.5714843273162842, "loss": 0.9148, "nll_loss": 0.819531261920929, "rewards/accuracies": 0.78125, "rewards/chosen": -0.06417236477136612, "rewards/margins": 0.09290160983800888, "rewards/rejected": -0.15708008408546448, "step": 6900 }, { "epoch": 0.5247769128536168, "grad_norm": 2.131055969994665, "learning_rate": 9.623896933427685e-07, "log_odds_chosen": 1.4792969226837158, "log_odds_ratio": -0.38496094942092896, "logits/chosen": -1.286523461341858, "logits/rejected": -1.0880858898162842, "logps/chosen": -0.692675769329071, "logps/rejected": -1.7488281726837158, "loss": 0.902, "nll_loss": 0.8306640386581421, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.06926269829273224, "rewards/margins": 0.10546875, "rewards/rejected": -0.17463378608226776, "step": 6910 }, { "epoch": 0.5255363584583255, "grad_norm": 1.5760234859879454, "learning_rate": 9.6169407366869e-07, "log_odds_chosen": 1.365869164466858, "log_odds_ratio": -0.4061523377895355, "logits/chosen": -1.248632788658142, "logits/rejected": -1.0998046398162842, "logps/chosen": -0.6644531488418579, "logps/rejected": -1.6203124523162842, "loss": 0.9008, "nll_loss": 0.884570300579071, "rewards/accuracies": 0.78125, "rewards/chosen": -0.06645508110523224, "rewards/margins": 0.09551544487476349, "rewards/rejected": -0.16191406548023224, "step": 6920 }, { "epoch": 0.526295804063034, "grad_norm": 1.82630892178093, "learning_rate": 9.609999602085963e-07, "log_odds_chosen": 1.687890648841858, "log_odds_ratio": -0.33940428495407104, "logits/chosen": -1.262109398841858, "logits/rejected": -1.014062523841858, "logps/chosen": -0.654101550579071, "logps/rejected": -1.8289062976837158, "loss": 0.8933, "nll_loss": 0.779296875, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -0.06538085639476776, "rewards/margins": 0.11759033054113388, "rewards/rejected": -0.18295899033546448, "step": 6930 }, { "epoch": 0.5270552496677425, "grad_norm": 1.5768252536068934, "learning_rate": 9.603073475346872e-07, "log_odds_chosen": 1.44580078125, "log_odds_ratio": -0.40791016817092896, "logits/chosen": -1.333398461341858, "logits/rejected": -1.1318359375, "logps/chosen": -0.675976574420929, "logps/rejected": -1.7000000476837158, "loss": 0.8875, "nll_loss": 0.817578136920929, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.06755371391773224, "rewards/margins": 0.10255737602710724, "rewards/rejected": -0.17019042372703552, "step": 6940 }, { "epoch": 0.5278146952724511, "grad_norm": 1.8182098878678983, "learning_rate": 9.596162302465074e-07, "log_odds_chosen": 1.259521484375, "log_odds_ratio": -0.4354492127895355, "logits/chosen": -1.202539086341858, "logits/rejected": -1.0505859851837158, "logps/chosen": -0.763867199420929, "logps/rejected": -1.668359398841858, "loss": 0.9083, "nll_loss": 0.904492199420929, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.07650146633386612, "rewards/margins": 0.0905914306640625, "rewards/rejected": -0.16689452528953552, "step": 6950 }, { "epoch": 0.5285741408771597, "grad_norm": 1.6306473213604655, "learning_rate": 9.589266029707683e-07, "log_odds_chosen": 1.221337914466858, "log_odds_ratio": -0.4588378965854645, "logits/chosen": -1.202734351158142, "logits/rejected": -1.0958983898162842, "logps/chosen": -0.6991211175918579, "logps/rejected": -1.586523413658142, "loss": 0.8966, "nll_loss": 0.8404296636581421, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.06995849311351776, "rewards/margins": 0.08879242092370987, "rewards/rejected": -0.15864257514476776, "step": 6960 }, { "epoch": 0.5293335864818682, "grad_norm": 1.5364482281744736, "learning_rate": 9.582384603611731e-07, "log_odds_chosen": 1.6806640625, "log_odds_ratio": -0.35332030057907104, "logits/chosen": -1.1863281726837158, "logits/rejected": -1.012109398841858, "logps/chosen": -0.744140625, "logps/rejected": -1.96484375, "loss": 0.8919, "nll_loss": 0.927929699420929, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.07438965141773224, "rewards/margins": 0.12199707329273224, "rewards/rejected": -0.19633789360523224, "step": 6970 }, { "epoch": 0.5300930320865768, "grad_norm": 1.5724936000479353, "learning_rate": 9.575517970982428e-07, "log_odds_chosen": 1.43017578125, "log_odds_ratio": -0.41679686307907104, "logits/chosen": -1.1533203125, "logits/rejected": -1.012109398841858, "logps/chosen": -0.685546875, "logps/rejected": -1.720703125, "loss": 0.8793, "nll_loss": 0.8609374761581421, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.06855468451976776, "rewards/margins": 0.10352782905101776, "rewards/rejected": -0.17207030951976776, "step": 6980 }, { "epoch": 0.5308524776912854, "grad_norm": 1.7842835334018885, "learning_rate": 9.568666078891436e-07, "log_odds_chosen": 1.381860375404358, "log_odds_ratio": -0.40375977754592896, "logits/chosen": -1.167578101158142, "logits/rejected": -0.976757824420929, "logps/chosen": -0.7103515863418579, "logps/rejected": -1.6787109375, "loss": 0.907, "nll_loss": 0.845410168170929, "rewards/accuracies": 0.8125, "rewards/chosen": -0.07103271782398224, "rewards/margins": 0.09679718315601349, "rewards/rejected": -0.1678466796875, "step": 6990 }, { "epoch": 0.5316119232959939, "grad_norm": 1.8051398889220507, "learning_rate": 9.561828874675149e-07, "log_odds_chosen": 1.351770043373108, "log_odds_ratio": -0.4705566465854645, "logits/chosen": -1.21484375, "logits/rejected": -1.044531226158142, "logps/chosen": -0.7396484613418579, "logps/rejected": -1.692968726158142, "loss": 0.8865, "nll_loss": 0.818164050579071, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.07390137016773224, "rewards/margins": 0.09537811577320099, "rewards/rejected": -0.1693115234375, "step": 7000 }, { "epoch": 0.5323713689007025, "grad_norm": 3.1268625592162587, "learning_rate": 9.555006305933e-07, "log_odds_chosen": 1.521875023841858, "log_odds_ratio": -0.3990234434604645, "logits/chosen": -1.1990234851837158, "logits/rejected": -1.076757788658142, "logps/chosen": -0.685742199420929, "logps/rejected": -1.801171898841858, "loss": 0.8971, "nll_loss": 0.8160156011581421, "rewards/accuracies": 0.78125, "rewards/chosen": -0.06857910007238388, "rewards/margins": 0.11149902641773224, "rewards/rejected": -0.18017578125, "step": 7010 }, { "epoch": 0.533130814505411, "grad_norm": 1.6571708552136055, "learning_rate": 9.548198320525771e-07, "log_odds_chosen": 1.5271484851837158, "log_odds_ratio": -0.38525390625, "logits/chosen": -1.1916015148162842, "logits/rejected": -0.9703124761581421, "logps/chosen": -0.6324218511581421, "logps/rejected": -1.6863281726837158, "loss": 0.8816, "nll_loss": 0.8374999761581421, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.06329345703125, "rewards/margins": 0.10543213039636612, "rewards/rejected": -0.16855469346046448, "step": 7020 }, { "epoch": 0.5338902601101196, "grad_norm": 1.8977718561509693, "learning_rate": 9.54140486657392e-07, "log_odds_chosen": 1.680273413658142, "log_odds_ratio": -0.36572265625, "logits/chosen": -1.1531250476837158, "logits/rejected": -1.0041015148162842, "logps/chosen": -0.7093750238418579, "logps/rejected": -1.951171875, "loss": 0.8802, "nll_loss": 0.86962890625, "rewards/accuracies": 0.8125, "rewards/chosen": -0.0709228515625, "rewards/margins": 0.12406005710363388, "rewards/rejected": -0.19511719048023224, "step": 7030 }, { "epoch": 0.5346497057148282, "grad_norm": 1.6359049353720865, "learning_rate": 9.534625892455922e-07, "log_odds_chosen": 1.4940917491912842, "log_odds_ratio": -0.4132324159145355, "logits/chosen": -1.2453124523162842, "logits/rejected": -1.0099608898162842, "logps/chosen": -0.71240234375, "logps/rejected": -1.781640648841858, "loss": 0.9169, "nll_loss": 0.8057616949081421, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.07122802734375, "rewards/margins": 0.10686035454273224, "rewards/rejected": -0.1781005859375, "step": 7040 }, { "epoch": 0.5354091513195367, "grad_norm": 1.8923193593249674, "learning_rate": 9.527861346806618e-07, "log_odds_chosen": 1.388671875, "log_odds_ratio": -0.429931640625, "logits/chosen": -1.3058593273162842, "logits/rejected": -1.0554687976837158, "logps/chosen": -0.740234375, "logps/rejected": -1.708398461341858, "loss": 0.9027, "nll_loss": 0.849414050579071, "rewards/accuracies": 0.78125, "rewards/chosen": -0.07406006008386612, "rewards/margins": 0.09671630710363388, "rewards/rejected": -0.17058105766773224, "step": 7050 }, { "epoch": 0.5361685969242453, "grad_norm": 1.490693992351195, "learning_rate": 9.521111178515582e-07, "log_odds_chosen": 1.490820288658142, "log_odds_ratio": -0.4102539122104645, "logits/chosen": -1.159765601158142, "logits/rejected": -0.9755859375, "logps/chosen": -0.6973632574081421, "logps/rejected": -1.7658202648162842, "loss": 0.8894, "nll_loss": 0.9447265863418579, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.06975097954273224, "rewards/margins": 0.10682983696460724, "rewards/rejected": -0.176513671875, "step": 7060 }, { "epoch": 0.5369280425289539, "grad_norm": 1.5964963876714084, "learning_rate": 9.514375336725502e-07, "log_odds_chosen": 1.398828148841858, "log_odds_ratio": -0.38725584745407104, "logits/chosen": -1.2314453125, "logits/rejected": -1.013671875, "logps/chosen": -0.6844726800918579, "logps/rejected": -1.636328101158142, "loss": 0.9017, "nll_loss": 0.8995116949081421, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.06851806491613388, "rewards/margins": 0.095184326171875, "rewards/rejected": -0.16364745795726776, "step": 7070 }, { "epoch": 0.5376874881336624, "grad_norm": 2.4361150993571608, "learning_rate": 9.507653770830566e-07, "log_odds_chosen": 1.38818359375, "log_odds_ratio": -0.4112792909145355, "logits/chosen": -1.092187523841858, "logits/rejected": -0.9351562261581421, "logps/chosen": -0.7510741949081421, "logps/rejected": -1.7626953125, "loss": 0.9236, "nll_loss": 0.9287109375, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.07517089694738388, "rewards/margins": 0.10114135593175888, "rewards/rejected": -0.17626953125, "step": 7080 }, { "epoch": 0.5384469337383709, "grad_norm": 1.4275489996583857, "learning_rate": 9.500946430474869e-07, "log_odds_chosen": 1.619726538658142, "log_odds_ratio": -0.33979493379592896, "logits/chosen": -1.3201172351837158, "logits/rejected": -1.0632812976837158, "logps/chosen": -0.6597656011581421, "logps/rejected": -1.777734398841858, "loss": 0.8873, "nll_loss": 0.798828125, "rewards/accuracies": 0.862500011920929, "rewards/chosen": -0.0660400390625, "rewards/margins": 0.11191406100988388, "rewards/rejected": -0.17795410752296448, "step": 7090 }, { "epoch": 0.5392063793430796, "grad_norm": 2.102101021656428, "learning_rate": 9.494253265550825e-07, "log_odds_chosen": 1.428613305091858, "log_odds_ratio": -0.4180664122104645, "logits/chosen": -1.263085961341858, "logits/rejected": -1.021484375, "logps/chosen": -0.6998046636581421, "logps/rejected": -1.7175781726837158, "loss": 0.8841, "nll_loss": 0.8070312738418579, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.0699462890625, "rewards/margins": 0.10187987983226776, "rewards/rejected": -0.17182616889476776, "step": 7100 }, { "epoch": 0.5399658249477881, "grad_norm": 1.7563548420755062, "learning_rate": 9.4875742261976e-07, "log_odds_chosen": 1.369726538658142, "log_odds_ratio": -0.44428712129592896, "logits/chosen": -1.248437523841858, "logits/rejected": -1.0652344226837158, "logps/chosen": -0.753710925579071, "logps/rejected": -1.782812476158142, "loss": 0.8865, "nll_loss": 0.883593738079071, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.07537841796875, "rewards/margins": 0.10289306938648224, "rewards/rejected": -0.17836913466453552, "step": 7110 }, { "epoch": 0.5407252705524966, "grad_norm": 1.7063589663847534, "learning_rate": 9.480909262799544e-07, "log_odds_chosen": 1.540429711341858, "log_odds_ratio": -0.4129638671875, "logits/chosen": -1.19140625, "logits/rejected": -1.049414038658142, "logps/chosen": -0.730664074420929, "logps/rejected": -1.8699219226837158, "loss": 0.8713, "nll_loss": 0.8565429449081421, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.07304687798023224, "rewards/margins": 0.113861083984375, "rewards/rejected": -0.18681640923023224, "step": 7120 }, { "epoch": 0.5414847161572053, "grad_norm": 1.704834603520489, "learning_rate": 9.47425832598465e-07, "log_odds_chosen": 1.626367211341858, "log_odds_ratio": -0.39042967557907104, "logits/chosen": -1.134374976158142, "logits/rejected": -1.042382836341858, "logps/chosen": -0.716015636920929, "logps/rejected": -1.908203125, "loss": 0.8875, "nll_loss": 0.841601550579071, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.07158203423023224, "rewards/margins": 0.11943359673023224, "rewards/rejected": -0.19096679985523224, "step": 7130 }, { "epoch": 0.5422441617619138, "grad_norm": 1.9799101766887692, "learning_rate": 9.467621366623017e-07, "log_odds_chosen": 1.387109398841858, "log_odds_ratio": -0.4287109375, "logits/chosen": -1.2429687976837158, "logits/rejected": -1.073632836341858, "logps/chosen": -0.688281238079071, "logps/rejected": -1.6765625476837158, "loss": 0.8677, "nll_loss": 0.7935546636581421, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.06883545219898224, "rewards/margins": 0.09877929836511612, "rewards/rejected": -0.16762694716453552, "step": 7140 }, { "epoch": 0.5430036073666223, "grad_norm": 1.7995674508941903, "learning_rate": 9.46099833582532e-07, "log_odds_chosen": 1.4296875, "log_odds_ratio": -0.409423828125, "logits/chosen": -1.1328125, "logits/rejected": -0.9632812738418579, "logps/chosen": -0.7215820550918579, "logps/rejected": -1.741796851158142, "loss": 0.9204, "nll_loss": 0.849609375, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.0721435546875, "rewards/margins": 0.10200195014476776, "rewards/rejected": -0.17424316704273224, "step": 7150 }, { "epoch": 0.5437630529713309, "grad_norm": 1.7326576841822012, "learning_rate": 9.45438918494131e-07, "log_odds_chosen": 1.574609398841858, "log_odds_ratio": -0.38349610567092896, "logits/chosen": -1.1779296398162842, "logits/rejected": -1.0041015148162842, "logps/chosen": -0.720507800579071, "logps/rejected": -1.835546851158142, "loss": 0.9055, "nll_loss": 0.875781238079071, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.07208251953125, "rewards/margins": 0.11139221489429474, "rewards/rejected": -0.18354491889476776, "step": 7160 }, { "epoch": 0.5445224985760395, "grad_norm": 1.7297959243113772, "learning_rate": 9.447793865558291e-07, "log_odds_chosen": 1.5378906726837158, "log_odds_ratio": -0.36669921875, "logits/chosen": -1.2937500476837158, "logits/rejected": -1.042382836341858, "logps/chosen": -0.6732422113418579, "logps/rejected": -1.716406226158142, "loss": 0.9195, "nll_loss": 0.915234386920929, "rewards/accuracies": 0.856249988079071, "rewards/chosen": -0.06733398139476776, "rewards/margins": 0.10439453274011612, "rewards/rejected": -0.171630859375, "step": 7170 }, { "epoch": 0.545281944180748, "grad_norm": 1.864365133646019, "learning_rate": 9.441212329499659e-07, "log_odds_chosen": 1.504296898841858, "log_odds_ratio": -0.3768066465854645, "logits/chosen": -1.2042968273162842, "logits/rejected": -1.0236327648162842, "logps/chosen": -0.6958984136581421, "logps/rejected": -1.7941405773162842, "loss": 0.8481, "nll_loss": 0.7855468988418579, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.06961669772863388, "rewards/margins": 0.10982666164636612, "rewards/rejected": -0.1793212890625, "step": 7180 }, { "epoch": 0.5460413897854566, "grad_norm": 1.5809381521194492, "learning_rate": 9.434644528823399e-07, "log_odds_chosen": 1.37298583984375, "log_odds_ratio": -0.41948240995407104, "logits/chosen": -1.1798827648162842, "logits/rejected": -1.0046875476837158, "logps/chosen": -0.681640625, "logps/rejected": -1.6251952648162842, "loss": 0.8849, "nll_loss": 0.8291015625, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.06818847358226776, "rewards/margins": 0.09424743801355362, "rewards/rejected": -0.16240234673023224, "step": 7190 }, { "epoch": 0.5468008353901652, "grad_norm": 1.4777404106200942, "learning_rate": 9.428090415820634e-07, "log_odds_chosen": 1.3937499523162842, "log_odds_ratio": -0.4214843809604645, "logits/chosen": -1.161718726158142, "logits/rejected": -1.007421851158142, "logps/chosen": -0.7001953125, "logps/rejected": -1.69140625, "loss": 0.9079, "nll_loss": 0.8753906488418579, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.06998290866613388, "rewards/margins": 0.09902496635913849, "rewards/rejected": -0.16904297471046448, "step": 7200 }, { "epoch": 0.5475602809948737, "grad_norm": 1.619658754577302, "learning_rate": 9.42154994301416e-07, "log_odds_chosen": 1.3867676258087158, "log_odds_ratio": -0.4249511659145355, "logits/chosen": -1.2488281726837158, "logits/rejected": -1.0812499523162842, "logps/chosen": -0.716992199420929, "logps/rejected": -1.720117211341858, "loss": 0.9097, "nll_loss": 0.9056640863418579, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.07169189304113388, "rewards/margins": 0.10031890869140625, "rewards/rejected": -0.171875, "step": 7210 }, { "epoch": 0.5483197265995823, "grad_norm": 1.6594421951059106, "learning_rate": 9.415023063157008e-07, "log_odds_chosen": 1.7443358898162842, "log_odds_ratio": -0.3497070372104645, "logits/chosen": -1.1554687023162842, "logits/rejected": -0.9994140863418579, "logps/chosen": -0.699414074420929, "logps/rejected": -1.986328125, "loss": 0.8814, "nll_loss": 0.842578113079071, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06995849311351776, "rewards/margins": 0.12869873642921448, "rewards/rejected": -0.19870606064796448, "step": 7220 }, { "epoch": 0.5490791722042909, "grad_norm": 1.8351322285293723, "learning_rate": 9.408509729231009e-07, "log_odds_chosen": 1.193945288658142, "log_odds_ratio": -0.44306641817092896, "logits/chosen": -1.1384766101837158, "logits/rejected": -1.018945336341858, "logps/chosen": -0.7314453125, "logps/rejected": -1.535546898841858, "loss": 0.9062, "nll_loss": 0.8584960699081421, "rewards/accuracies": 0.78125, "rewards/chosen": -0.07315673679113388, "rewards/margins": 0.08049926906824112, "rewards/rejected": -0.15373535454273224, "step": 7230 }, { "epoch": 0.5498386178089995, "grad_norm": 1.9377646010127565, "learning_rate": 9.402009894445369e-07, "log_odds_chosen": 1.425195336341858, "log_odds_ratio": -0.39794921875, "logits/chosen": -1.2412109375, "logits/rejected": -1.0427734851837158, "logps/chosen": -0.686718761920929, "logps/rejected": -1.686914086341858, "loss": 0.8708, "nll_loss": 0.771679699420929, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.06865234673023224, "rewards/margins": 0.0999755859375, "rewards/rejected": -0.16860350966453552, "step": 7240 }, { "epoch": 0.550598063413708, "grad_norm": 1.553109519064187, "learning_rate": 9.395523512235255e-07, "log_odds_chosen": 1.6037108898162842, "log_odds_ratio": -0.37890625, "logits/chosen": -1.194921851158142, "logits/rejected": -1.0720703601837158, "logps/chosen": -0.6927734613418579, "logps/rejected": -1.8585937023162842, "loss": 0.8831, "nll_loss": 0.9205077886581421, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.06926269829273224, "rewards/margins": 0.11661376804113388, "rewards/rejected": -0.1859130859375, "step": 7250 }, { "epoch": 0.5513575090184165, "grad_norm": 1.5671135527061977, "learning_rate": 9.389050536260404e-07, "log_odds_chosen": 1.4091796875, "log_odds_ratio": -0.40971678495407104, "logits/chosen": -1.142187476158142, "logits/rejected": -1.001562476158142, "logps/chosen": -0.649218738079071, "logps/rejected": -1.6037108898162842, "loss": 0.897, "nll_loss": 0.8125, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.06490478664636612, "rewards/margins": 0.095550537109375, "rewards/rejected": -0.16035155951976776, "step": 7260 }, { "epoch": 0.5521169546231252, "grad_norm": 1.6414703028127753, "learning_rate": 9.382590920403722e-07, "log_odds_chosen": 1.37646484375, "log_odds_ratio": -0.4488281309604645, "logits/chosen": -1.1863281726837158, "logits/rejected": -1.0001952648162842, "logps/chosen": -0.7318359613418579, "logps/rejected": -1.7568359375, "loss": 0.9077, "nll_loss": 0.899218738079071, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.07320556789636612, "rewards/margins": 0.10236205905675888, "rewards/rejected": -0.17580565810203552, "step": 7270 }, { "epoch": 0.5528764002278337, "grad_norm": 1.6481748217664094, "learning_rate": 9.376144618769908e-07, "log_odds_chosen": 1.476953148841858, "log_odds_ratio": -0.39702147245407104, "logits/chosen": -1.209570288658142, "logits/rejected": -1.036523461341858, "logps/chosen": -0.7650390863418579, "logps/rejected": -1.834375023841858, "loss": 0.8731, "nll_loss": 0.792285144329071, "rewards/accuracies": 0.8125, "rewards/chosen": -0.07644043117761612, "rewards/margins": 0.10711669921875, "rewards/rejected": -0.18359375, "step": 7280 }, { "epoch": 0.5536358458325422, "grad_norm": 1.586301112412468, "learning_rate": 9.369711585684086e-07, "log_odds_chosen": 1.468237280845642, "log_odds_ratio": -0.41962891817092896, "logits/chosen": -1.236328125, "logits/rejected": -1.0958983898162842, "logps/chosen": -0.692578136920929, "logps/rejected": -1.7199218273162842, "loss": 0.8732, "nll_loss": 0.80712890625, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06928710639476776, "rewards/margins": 0.10288085788488388, "rewards/rejected": -0.17194823920726776, "step": 7290 }, { "epoch": 0.5543952914372509, "grad_norm": 1.8198361440239692, "learning_rate": 9.363291775690445e-07, "log_odds_chosen": 1.002099633216858, "log_odds_ratio": -0.509326159954071, "logits/chosen": -1.147851586341858, "logits/rejected": -0.9896484613418579, "logps/chosen": -0.7548828125, "logps/rejected": -1.4630858898162842, "loss": 0.8868, "nll_loss": 0.875195324420929, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.07546386867761612, "rewards/margins": 0.07082977145910263, "rewards/rejected": -0.14633789658546448, "step": 7300 }, { "epoch": 0.5551547370419594, "grad_norm": 2.0365215371160112, "learning_rate": 9.356885143550886e-07, "log_odds_chosen": 1.5712158679962158, "log_odds_ratio": -0.39404296875, "logits/chosen": -1.180273413658142, "logits/rejected": -0.991992175579071, "logps/chosen": -0.7032226324081421, "logps/rejected": -1.810546875, "loss": 0.8978, "nll_loss": 0.8994140625, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -0.07034911960363388, "rewards/margins": 0.11060790717601776, "rewards/rejected": -0.18093261122703552, "step": 7310 }, { "epoch": 0.5559141826466679, "grad_norm": 1.7408148412896836, "learning_rate": 9.350491644243688e-07, "log_odds_chosen": 1.1449706554412842, "log_odds_ratio": -0.4581054747104645, "logits/chosen": -1.132226586341858, "logits/rejected": -1.007421851158142, "logps/chosen": -0.731249988079071, "logps/rejected": -1.5525391101837158, "loss": 0.8857, "nll_loss": 0.8857421875, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.07320556789636612, "rewards/margins": 0.08211822807788849, "rewards/rejected": -0.15529784560203552, "step": 7320 }, { "epoch": 0.5566736282513765, "grad_norm": 1.583013014994713, "learning_rate": 9.344111232962179e-07, "log_odds_chosen": 1.379492163658142, "log_odds_ratio": -0.45771485567092896, "logits/chosen": -1.202539086341858, "logits/rejected": -1.073828101158142, "logps/chosen": -0.7386718988418579, "logps/rejected": -1.7109375, "loss": 0.898, "nll_loss": 0.854296863079071, "rewards/accuracies": 0.78125, "rewards/chosen": -0.07384033501148224, "rewards/margins": 0.09733887016773224, "rewards/rejected": -0.17116698622703552, "step": 7330 }, { "epoch": 0.5574330738560851, "grad_norm": 1.8522038878095224, "learning_rate": 9.337743865113415e-07, "log_odds_chosen": 1.3371093273162842, "log_odds_ratio": -0.42753905057907104, "logits/chosen": -1.182031273841858, "logits/rejected": -0.9839843511581421, "logps/chosen": -0.688281238079071, "logps/rejected": -1.6472656726837158, "loss": 0.8745, "nll_loss": 0.888867199420929, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.06882324069738388, "rewards/margins": 0.09598388522863388, "rewards/rejected": -0.16469725966453552, "step": 7340 }, { "epoch": 0.5581925194607936, "grad_norm": 2.0110574013367213, "learning_rate": 9.331389496316868e-07, "log_odds_chosen": 1.68505859375, "log_odds_ratio": -0.34375, "logits/chosen": -1.3017578125, "logits/rejected": -1.0714843273162842, "logps/chosen": -0.7001953125, "logps/rejected": -1.907812476158142, "loss": 0.8718, "nll_loss": 0.7574218511581421, "rewards/accuracies": 0.856249988079071, "rewards/chosen": -0.06995849311351776, "rewards/margins": 0.12086181342601776, "rewards/rejected": -0.19082030653953552, "step": 7350 }, { "epoch": 0.5589519650655022, "grad_norm": 1.5675892021885889, "learning_rate": 9.325048082403138e-07, "log_odds_chosen": 1.3244140148162842, "log_odds_ratio": -0.42314451932907104, "logits/chosen": -1.183203101158142, "logits/rejected": -1.009374976158142, "logps/chosen": -0.6668945550918579, "logps/rejected": -1.596289038658142, "loss": 0.8843, "nll_loss": 0.8173828125, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.066650390625, "rewards/margins": 0.09291992336511612, "rewards/rejected": -0.1595458984375, "step": 7360 }, { "epoch": 0.5597114106702108, "grad_norm": 1.9741248145535868, "learning_rate": 9.318719579412648e-07, "log_odds_chosen": 1.414648413658142, "log_odds_ratio": -0.4264160096645355, "logits/chosen": -1.1359374523162842, "logits/rejected": -0.983593761920929, "logps/chosen": -0.736132800579071, "logps/rejected": -1.7433593273162842, "loss": 0.9011, "nll_loss": 0.9122070074081421, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.07370605319738388, "rewards/margins": 0.10074462741613388, "rewards/rejected": -0.17436523735523224, "step": 7370 }, { "epoch": 0.5604708562749193, "grad_norm": 1.6952228983287227, "learning_rate": 9.312403943594374e-07, "log_odds_chosen": 1.405664086341858, "log_odds_ratio": -0.4287109375, "logits/chosen": -1.0876953601837158, "logits/rejected": -0.9242187738418579, "logps/chosen": -0.6888672113418579, "logps/rejected": -1.720312476158142, "loss": 0.8897, "nll_loss": 0.891796886920929, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.06884765625, "rewards/margins": 0.103271484375, "rewards/rejected": -0.17221680283546448, "step": 7380 }, { "epoch": 0.5612303018796279, "grad_norm": 1.7980188417877847, "learning_rate": 9.306101131404582e-07, "log_odds_chosen": 1.409765601158142, "log_odds_ratio": -0.41889649629592896, "logits/chosen": -1.202734351158142, "logits/rejected": -1.069726586341858, "logps/chosen": -0.657519519329071, "logps/rejected": -1.59765625, "loss": 0.8585, "nll_loss": 0.8016601800918579, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.06569824367761612, "rewards/margins": 0.0941162109375, "rewards/rejected": -0.1597900390625, "step": 7390 }, { "epoch": 0.5619897474843364, "grad_norm": 1.7467779033338529, "learning_rate": 9.299811099505542e-07, "log_odds_chosen": 1.745019555091858, "log_odds_ratio": -0.40400391817092896, "logits/chosen": -1.249414086341858, "logits/rejected": -1.1427733898162842, "logps/chosen": -0.7119140625, "logps/rejected": -2.048046827316284, "loss": 0.8885, "nll_loss": 0.8070312738418579, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.07108154147863388, "rewards/margins": 0.133544921875, "rewards/rejected": -0.2047119140625, "step": 7400 }, { "epoch": 0.562749193089045, "grad_norm": 2.550971649236331, "learning_rate": 9.293533804764305e-07, "log_odds_chosen": 1.562109351158142, "log_odds_ratio": -0.388671875, "logits/chosen": -1.19921875, "logits/rejected": -1.046875, "logps/chosen": -0.6885741949081421, "logps/rejected": -1.864843726158142, "loss": 0.8962, "nll_loss": 0.8373047113418579, "rewards/accuracies": 0.78125, "rewards/chosen": -0.06882324069738388, "rewards/margins": 0.11746826022863388, "rewards/rejected": -0.18645019829273224, "step": 7410 }, { "epoch": 0.5635086386937536, "grad_norm": 1.5073006863619984, "learning_rate": 9.28726920425144e-07, "log_odds_chosen": 1.289941430091858, "log_odds_ratio": -0.41533201932907104, "logits/chosen": -1.2275390625, "logits/rejected": -1.0353515148162842, "logps/chosen": -0.6727539300918579, "logps/rejected": -1.571874976158142, "loss": 0.8489, "nll_loss": 0.725292980670929, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.06728515774011612, "rewards/margins": 0.09001465141773224, "rewards/rejected": -0.15732422471046448, "step": 7420 }, { "epoch": 0.5642680842984621, "grad_norm": 1.8361870560882096, "learning_rate": 9.281017255239815e-07, "log_odds_chosen": 1.262353539466858, "log_odds_ratio": -0.4532226622104645, "logits/chosen": -1.1570312976837158, "logits/rejected": -0.9886718988418579, "logps/chosen": -0.703125, "logps/rejected": -1.588476538658142, "loss": 0.8954, "nll_loss": 0.8921874761581421, "rewards/accuracies": 0.78125, "rewards/chosen": -0.07037353515625, "rewards/margins": 0.0885467529296875, "rewards/rejected": -0.15891113877296448, "step": 7430 }, { "epoch": 0.5650275299031707, "grad_norm": 1.489675537843064, "learning_rate": 9.274777915203365e-07, "log_odds_chosen": 1.238671898841858, "log_odds_ratio": -0.4280761778354645, "logits/chosen": -1.178125023841858, "logits/rejected": -1.025390625, "logps/chosen": -0.705273449420929, "logps/rejected": -1.5769531726837158, "loss": 0.8759, "nll_loss": 0.8896484375, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.070556640625, "rewards/margins": 0.08713378757238388, "rewards/rejected": -0.15771484375, "step": 7440 }, { "epoch": 0.5657869755078793, "grad_norm": 1.6362185580065927, "learning_rate": 9.268551141815875e-07, "log_odds_chosen": 1.4560546875, "log_odds_ratio": -0.4248046875, "logits/chosen": -1.243554711341858, "logits/rejected": -1.0615234375, "logps/chosen": -0.7509765625, "logps/rejected": -1.790624976158142, "loss": 0.8859, "nll_loss": 0.8321288824081421, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.07504882663488388, "rewards/margins": 0.10402832180261612, "rewards/rejected": -0.17893067002296448, "step": 7450 }, { "epoch": 0.5665464211125878, "grad_norm": 1.7319363228150781, "learning_rate": 9.262336892949784e-07, "log_odds_chosen": 1.4459960460662842, "log_odds_ratio": -0.3995605409145355, "logits/chosen": -1.238671898841858, "logits/rejected": -1.0498046875, "logps/chosen": -0.7041991949081421, "logps/rejected": -1.734375, "loss": 0.9018, "nll_loss": 0.831347644329071, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.07042236626148224, "rewards/margins": 0.10310669243335724, "rewards/rejected": -0.17368164658546448, "step": 7460 }, { "epoch": 0.5673058667172963, "grad_norm": 2.3343732854043533, "learning_rate": 9.256135126674977e-07, "log_odds_chosen": 1.66015625, "log_odds_ratio": -0.3629150390625, "logits/chosen": -1.2683594226837158, "logits/rejected": -1.050390601158142, "logps/chosen": -0.708691418170929, "logps/rejected": -1.9128906726837158, "loss": 0.877, "nll_loss": 0.8988281488418579, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -0.07088623195886612, "rewards/margins": 0.12045898288488388, "rewards/rejected": -0.19130858778953552, "step": 7470 }, { "epoch": 0.568065312322005, "grad_norm": 1.8562345751035942, "learning_rate": 9.249945801257605e-07, "log_odds_chosen": 1.2073242664337158, "log_odds_ratio": -0.514355480670929, "logits/chosen": -1.2380859851837158, "logits/rejected": -1.088476538658142, "logps/chosen": -0.787890613079071, "logps/rejected": -1.6388671398162842, "loss": 0.8914, "nll_loss": 0.9390624761581421, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.0787353515625, "rewards/margins": 0.08513794094324112, "rewards/rejected": -0.16398926079273224, "step": 7480 }, { "epoch": 0.5688247579267135, "grad_norm": 2.107420432210027, "learning_rate": 9.243768875158902e-07, "log_odds_chosen": 1.353515625, "log_odds_ratio": -0.4097656309604645, "logits/chosen": -1.1845703125, "logits/rejected": -0.991015613079071, "logps/chosen": -0.7010742425918579, "logps/rejected": -1.647851586341858, "loss": 0.8963, "nll_loss": 0.8759765625, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.07008056342601776, "rewards/margins": 0.09459228813648224, "rewards/rejected": -0.16474609076976776, "step": 7490 }, { "epoch": 0.569584203531422, "grad_norm": 1.621362190540628, "learning_rate": 9.23760430703401e-07, "log_odds_chosen": 1.5744140148162842, "log_odds_ratio": -0.42485350370407104, "logits/chosen": -1.183007836341858, "logits/rejected": -1.007421851158142, "logps/chosen": -0.6802734136581421, "logps/rejected": -1.805078148841858, "loss": 0.8641, "nll_loss": 0.771191418170929, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.067962646484375, "rewards/margins": 0.11253662407398224, "rewards/rejected": -0.1805419921875, "step": 7500 }, { "epoch": 0.5703436491361307, "grad_norm": 2.0861581189888017, "learning_rate": 9.231452055730832e-07, "log_odds_chosen": 1.4074218273162842, "log_odds_ratio": -0.4034667909145355, "logits/chosen": -1.1990234851837158, "logits/rejected": -1.0730469226837158, "logps/chosen": -0.6844726800918579, "logps/rejected": -1.6632812023162842, "loss": 0.8779, "nll_loss": 0.7900390625, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.06840820610523224, "rewards/margins": 0.09772948920726776, "rewards/rejected": -0.16616210341453552, "step": 7510 }, { "epoch": 0.5711030947408392, "grad_norm": 1.799077414419198, "learning_rate": 9.225312080288851e-07, "log_odds_chosen": 1.474023461341858, "log_odds_ratio": -0.392822265625, "logits/chosen": -1.138671875, "logits/rejected": -0.9765625, "logps/chosen": -0.6748046875, "logps/rejected": -1.7501952648162842, "loss": 0.8687, "nll_loss": 0.82958984375, "rewards/accuracies": 0.78125, "rewards/chosen": -0.0673828125, "rewards/margins": 0.10758056491613388, "rewards/rejected": -0.17502442002296448, "step": 7520 }, { "epoch": 0.5718625403455477, "grad_norm": 1.5997969119096624, "learning_rate": 9.219184339938013e-07, "log_odds_chosen": 1.2322266101837158, "log_odds_ratio": -0.44843751192092896, "logits/chosen": -1.2537109851837158, "logits/rejected": -1.097265601158142, "logps/chosen": -0.6717773675918579, "logps/rejected": -1.5009765625, "loss": 0.879, "nll_loss": 0.849609375, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.067138671875, "rewards/margins": 0.08291320502758026, "rewards/rejected": -0.150146484375, "step": 7530 }, { "epoch": 0.5726219859502563, "grad_norm": 1.813077402590576, "learning_rate": 9.213068794097574e-07, "log_odds_chosen": 1.7003905773162842, "log_odds_ratio": -0.3343749940395355, "logits/chosen": -1.1492187976837158, "logits/rejected": -1.0232422351837158, "logps/chosen": -0.662304699420929, "logps/rejected": -1.8839843273162842, "loss": 0.8909, "nll_loss": 0.8443359136581421, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -0.06621094048023224, "rewards/margins": 0.12208251655101776, "rewards/rejected": -0.18828125298023224, "step": 7540 }, { "epoch": 0.5733814315549649, "grad_norm": 2.3479208101601574, "learning_rate": 9.206965402374975e-07, "log_odds_chosen": 1.453515648841858, "log_odds_ratio": -0.39838868379592896, "logits/chosen": -1.2742187976837158, "logits/rejected": -1.068750023841858, "logps/chosen": -0.6986328363418579, "logps/rejected": -1.689843773841858, "loss": 0.8755, "nll_loss": 0.837695300579071, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.06986083835363388, "rewards/margins": 0.09909667819738388, "rewards/rejected": -0.16899414360523224, "step": 7550 }, { "epoch": 0.5741408771596734, "grad_norm": 1.8998265907475624, "learning_rate": 9.200874124564723e-07, "log_odds_chosen": 1.350244164466858, "log_odds_ratio": -0.4532226622104645, "logits/chosen": -1.2517578601837158, "logits/rejected": -1.026757836341858, "logps/chosen": -0.711230456829071, "logps/rejected": -1.668359398841858, "loss": 0.8674, "nll_loss": 0.8179687261581421, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.07108154147863388, "rewards/margins": 0.09564514458179474, "rewards/rejected": -0.1668701171875, "step": 7560 }, { "epoch": 0.574900322764382, "grad_norm": 1.4553521999890706, "learning_rate": 9.194794920647274e-07, "log_odds_chosen": 1.3986327648162842, "log_odds_ratio": -0.40327149629592896, "logits/chosen": -1.2185547351837158, "logits/rejected": -1.0041015148162842, "logps/chosen": -0.775195300579071, "logps/rejected": -1.8056640625, "loss": 0.8839, "nll_loss": 0.7953125238418579, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.07757568359375, "rewards/margins": 0.10302734375, "rewards/rejected": -0.18068847060203552, "step": 7570 }, { "epoch": 0.5756597683690906, "grad_norm": 1.6702386881260476, "learning_rate": 9.188727750787932e-07, "log_odds_chosen": 1.418188452720642, "log_odds_ratio": -0.4087158143520355, "logits/chosen": -1.25, "logits/rejected": -1.049414038658142, "logps/chosen": -0.767382800579071, "logps/rejected": -1.76953125, "loss": 0.8836, "nll_loss": 0.863476574420929, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.07679443061351776, "rewards/margins": 0.10029907524585724, "rewards/rejected": -0.17705078423023224, "step": 7580 }, { "epoch": 0.5764192139737991, "grad_norm": 2.1108998839842252, "learning_rate": 9.182672575335757e-07, "log_odds_chosen": 1.3107421398162842, "log_odds_ratio": -0.46044921875, "logits/chosen": -1.1259765625, "logits/rejected": -0.912890613079071, "logps/chosen": -0.683789074420929, "logps/rejected": -1.593359351158142, "loss": 0.8997, "nll_loss": 0.803515613079071, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.06833495944738388, "rewards/margins": 0.09104003757238388, "rewards/rejected": -0.15937499701976776, "step": 7590 }, { "epoch": 0.5771786595785077, "grad_norm": 1.890048503463801, "learning_rate": 9.176629354822469e-07, "log_odds_chosen": 1.46826171875, "log_odds_ratio": -0.42707520723342896, "logits/chosen": -1.3017578125, "logits/rejected": -1.0712890625, "logps/chosen": -0.6908203363418579, "logps/rejected": -1.7109375, "loss": 0.8724, "nll_loss": 0.8169921636581421, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.06905517727136612, "rewards/margins": 0.10203857719898224, "rewards/rejected": -0.17109374701976776, "step": 7600 }, { "epoch": 0.5779381051832162, "grad_norm": 1.7023418455914054, "learning_rate": 9.170598049961371e-07, "log_odds_chosen": 1.3718750476837158, "log_odds_ratio": -0.3922363221645355, "logits/chosen": -1.240625023841858, "logits/rejected": -1.0363280773162842, "logps/chosen": -0.699902355670929, "logps/rejected": -1.652734398841858, "loss": 0.8703, "nll_loss": 0.8246093988418579, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.06997070461511612, "rewards/margins": 0.09521484375, "rewards/rejected": -0.16518554091453552, "step": 7610 }, { "epoch": 0.5786975507879248, "grad_norm": 1.7178601649154002, "learning_rate": 9.164578621646276e-07, "log_odds_chosen": 1.5498046875, "log_odds_ratio": -0.373779296875, "logits/chosen": -1.1572265625, "logits/rejected": -1.017187476158142, "logps/chosen": -0.68994140625, "logps/rejected": -1.7999999523162842, "loss": 0.8547, "nll_loss": 0.7962890863418579, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -0.06900634616613388, "rewards/margins": 0.11098022758960724, "rewards/rejected": -0.18010254204273224, "step": 7620 }, { "epoch": 0.5794569963926334, "grad_norm": 1.9538125426245612, "learning_rate": 9.15857103095044e-07, "log_odds_chosen": 1.402197241783142, "log_odds_ratio": -0.4303222596645355, "logits/chosen": -1.1847655773162842, "logits/rejected": -1.0548827648162842, "logps/chosen": -0.739453136920929, "logps/rejected": -1.69921875, "loss": 0.8642, "nll_loss": 0.815722644329071, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.07388915866613388, "rewards/margins": 0.09601440280675888, "rewards/rejected": -0.16994628310203552, "step": 7630 }, { "epoch": 0.5802164419973419, "grad_norm": 2.2729791084691002, "learning_rate": 9.15257523912551e-07, "log_odds_chosen": 1.125390648841858, "log_odds_ratio": -0.44902342557907104, "logits/chosen": -1.284570336341858, "logits/rejected": -1.162500023841858, "logps/chosen": -0.65234375, "logps/rejected": -1.3914062976837158, "loss": 0.8604, "nll_loss": 0.74658203125, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.0653076171875, "rewards/margins": 0.07390137016773224, "rewards/rejected": -0.13916015625, "step": 7640 }, { "epoch": 0.5809758876020505, "grad_norm": 1.5284032024536216, "learning_rate": 9.146591207600472e-07, "log_odds_chosen": 1.1044921875, "log_odds_ratio": -0.48383790254592896, "logits/chosen": -1.266015648841858, "logits/rejected": -1.1242187023162842, "logps/chosen": -0.726855456829071, "logps/rejected": -1.5085937976837158, "loss": 0.8886, "nll_loss": 0.8667968511581421, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.07269287109375, "rewards/margins": 0.0782470703125, "rewards/rejected": -0.15080566704273224, "step": 7650 }, { "epoch": 0.5817353332067591, "grad_norm": 1.584596971534271, "learning_rate": 9.140618897980601e-07, "log_odds_chosen": 1.632226586341858, "log_odds_ratio": -0.3453125059604645, "logits/chosen": -1.2156250476837158, "logits/rejected": -1.015234351158142, "logps/chosen": -0.6666015386581421, "logps/rejected": -1.785546898841858, "loss": 0.8911, "nll_loss": 0.834765613079071, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -0.06671142578125, "rewards/margins": 0.11176757514476776, "rewards/rejected": -0.17851562798023224, "step": 7660 }, { "epoch": 0.5824947788114676, "grad_norm": 1.6817933833609267, "learning_rate": 9.134658272046442e-07, "log_odds_chosen": 1.273339867591858, "log_odds_ratio": -0.44013673067092896, "logits/chosen": -1.208593726158142, "logits/rejected": -1.056640625, "logps/chosen": -0.6905273199081421, "logps/rejected": -1.5994141101837158, "loss": 0.8939, "nll_loss": 0.9009765386581421, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.069091796875, "rewards/margins": 0.09099273383617401, "rewards/rejected": -0.159912109375, "step": 7670 }, { "epoch": 0.5832542244161761, "grad_norm": 1.8502842758387703, "learning_rate": 9.128709291752768e-07, "log_odds_chosen": 1.5333983898162842, "log_odds_ratio": -0.40888673067092896, "logits/chosen": -1.179101586341858, "logits/rejected": -0.988085925579071, "logps/chosen": -0.6385742425918579, "logps/rejected": -1.695703148841858, "loss": 0.8809, "nll_loss": 0.804882824420929, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.06381835788488388, "rewards/margins": 0.105743408203125, "rewards/rejected": -0.1695556640625, "step": 7680 }, { "epoch": 0.5840136700208848, "grad_norm": 1.706704809935507, "learning_rate": 9.122771919227568e-07, "log_odds_chosen": 1.3220703601837158, "log_odds_ratio": -0.424560546875, "logits/chosen": -1.220312476158142, "logits/rejected": -1.024023413658142, "logps/chosen": -0.6375976800918579, "logps/rejected": -1.5439453125, "loss": 0.8807, "nll_loss": 0.820117175579071, "rewards/accuracies": 0.8125, "rewards/chosen": -0.06375732272863388, "rewards/margins": 0.09062500298023224, "rewards/rejected": -0.15449218451976776, "step": 7690 }, { "epoch": 0.5847731156255933, "grad_norm": 1.7428294709192573, "learning_rate": 9.116846116771035e-07, "log_odds_chosen": 1.556640625, "log_odds_ratio": -0.35810548067092896, "logits/chosen": -1.2791016101837158, "logits/rejected": -1.029296875, "logps/chosen": -0.650390625, "logps/rejected": -1.743749976158142, "loss": 0.9029, "nll_loss": 0.901562511920929, "rewards/accuracies": 0.862500011920929, "rewards/chosen": -0.06503906100988388, "rewards/margins": 0.10928954929113388, "rewards/rejected": -0.17436523735523224, "step": 7700 }, { "epoch": 0.5855325612303018, "grad_norm": 1.8504413325777838, "learning_rate": 9.110931846854553e-07, "log_odds_chosen": 1.4050781726837158, "log_odds_ratio": -0.43266600370407104, "logits/chosen": -1.1728515625, "logits/rejected": -1.024023413658142, "logps/chosen": -0.70703125, "logps/rejected": -1.7107422351837158, "loss": 0.8847, "nll_loss": 0.869335949420929, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.07071533054113388, "rewards/margins": 0.10038147121667862, "rewards/rejected": -0.17111817002296448, "step": 7710 }, { "epoch": 0.5862920068350105, "grad_norm": 1.9004993971796598, "learning_rate": 9.105029072119708e-07, "log_odds_chosen": 1.5705077648162842, "log_odds_ratio": -0.3763671815395355, "logits/chosen": -1.296875, "logits/rejected": -1.0568358898162842, "logps/chosen": -0.6805664300918579, "logps/rejected": -1.8289062976837158, "loss": 0.8701, "nll_loss": 0.8060547113418579, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -0.06812743842601776, "rewards/margins": 0.11491699516773224, "rewards/rejected": -0.182861328125, "step": 7720 }, { "epoch": 0.587051452439719, "grad_norm": 1.6506230573900638, "learning_rate": 9.099137755377291e-07, "log_odds_chosen": 1.3693358898162842, "log_odds_ratio": -0.4413085877895355, "logits/chosen": -1.259374976158142, "logits/rejected": -1.0537109375, "logps/chosen": -0.6917968988418579, "logps/rejected": -1.6943359375, "loss": 0.8613, "nll_loss": 0.779101550579071, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.06914062798023224, "rewards/margins": 0.10033569484949112, "rewards/rejected": -0.1695556640625, "step": 7730 }, { "epoch": 0.5878108980444275, "grad_norm": 1.4516645068050382, "learning_rate": 9.093257859606311e-07, "log_odds_chosen": 1.2443358898162842, "log_odds_ratio": -0.43994140625, "logits/chosen": -1.2130858898162842, "logits/rejected": -1.121484398841858, "logps/chosen": -0.6693359613418579, "logps/rejected": -1.52734375, "loss": 0.8967, "nll_loss": 0.8462890386581421, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.06696777045726776, "rewards/margins": 0.08585204929113388, "rewards/rejected": -0.15288086235523224, "step": 7740 }, { "epoch": 0.5885703436491362, "grad_norm": 1.8712885130428696, "learning_rate": 9.087389347953037e-07, "log_odds_chosen": 1.355078101158142, "log_odds_ratio": -0.4151367247104645, "logits/chosen": -1.2160155773162842, "logits/rejected": -1.0011718273162842, "logps/chosen": -0.70751953125, "logps/rejected": -1.6671874523162842, "loss": 0.8813, "nll_loss": 0.871874988079071, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.07077636569738388, "rewards/margins": 0.09595946967601776, "rewards/rejected": -0.1668701171875, "step": 7750 }, { "epoch": 0.5893297892538447, "grad_norm": 2.123626081642409, "learning_rate": 9.081532183729995e-07, "log_odds_chosen": 1.551660180091858, "log_odds_ratio": -0.39091795682907104, "logits/chosen": -1.224023461341858, "logits/rejected": -1.026953101158142, "logps/chosen": -0.7354491949081421, "logps/rejected": -1.854101538658142, "loss": 0.8754, "nll_loss": 0.86083984375, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.07353515923023224, "rewards/margins": 0.11174926906824112, "rewards/rejected": -0.18535156548023224, "step": 7760 }, { "epoch": 0.5900892348585532, "grad_norm": 1.9406631900672282, "learning_rate": 9.075686330415037e-07, "log_odds_chosen": 1.5167968273162842, "log_odds_ratio": -0.38300782442092896, "logits/chosen": -1.3515625, "logits/rejected": -1.113867163658142, "logps/chosen": -0.7030273675918579, "logps/rejected": -1.777734398841858, "loss": 0.8781, "nll_loss": 0.800585925579071, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.07032470405101776, "rewards/margins": 0.10723876953125, "rewards/rejected": -0.177490234375, "step": 7770 }, { "epoch": 0.5908486804632618, "grad_norm": 2.3286831945289026, "learning_rate": 9.069851751650364e-07, "log_odds_chosen": 1.655029296875, "log_odds_ratio": -0.36674803495407104, "logits/chosen": -1.2453124523162842, "logits/rejected": -1.1066405773162842, "logps/chosen": -0.694628894329071, "logps/rejected": -1.91796875, "loss": 0.8933, "nll_loss": 0.811718761920929, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06940917670726776, "rewards/margins": 0.1223907470703125, "rewards/rejected": -0.19179686903953552, "step": 7780 }, { "epoch": 0.5916081260679704, "grad_norm": 2.069325209880468, "learning_rate": 9.064028411241582e-07, "log_odds_chosen": 1.391699194908142, "log_odds_ratio": -0.40986329317092896, "logits/chosen": -1.1121094226837158, "logits/rejected": -1.008203148841858, "logps/chosen": -0.7230468988418579, "logps/rejected": -1.679296851158142, "loss": 0.8769, "nll_loss": 0.857226550579071, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.07232666015625, "rewards/margins": 0.095703125, "rewards/rejected": -0.16801758110523224, "step": 7790 }, { "epoch": 0.592367571672679, "grad_norm": 1.7473974842881335, "learning_rate": 9.058216273156764e-07, "log_odds_chosen": 1.2786133289337158, "log_odds_ratio": -0.4146484434604645, "logits/chosen": -1.1984374523162842, "logits/rejected": -1.034570336341858, "logps/chosen": -0.6849609613418579, "logps/rejected": -1.578125, "loss": 0.867, "nll_loss": 0.894726574420929, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.06853027641773224, "rewards/margins": 0.08934936672449112, "rewards/rejected": -0.1578369140625, "step": 7800 }, { "epoch": 0.5931270172773875, "grad_norm": 1.5634870059933736, "learning_rate": 9.052415301525511e-07, "log_odds_chosen": 1.4835937023162842, "log_odds_ratio": -0.369140625, "logits/chosen": -1.3738281726837158, "logits/rejected": -1.086523413658142, "logps/chosen": -0.7484375238418579, "logps/rejected": -1.827734351158142, "loss": 0.8768, "nll_loss": 0.8089843988418579, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -0.07481689751148224, "rewards/margins": 0.10798339545726776, "rewards/rejected": -0.18269042670726776, "step": 7810 }, { "epoch": 0.5938864628820961, "grad_norm": 2.2191516528843414, "learning_rate": 9.046625460638012e-07, "log_odds_chosen": 1.200952172279358, "log_odds_ratio": -0.48735350370407104, "logits/chosen": -1.185937523841858, "logits/rejected": -1.0515625476837158, "logps/chosen": -0.7520507574081421, "logps/rejected": -1.621484398841858, "loss": 0.8738, "nll_loss": 0.8949218988418579, "rewards/accuracies": 0.71875, "rewards/chosen": -0.07523193210363388, "rewards/margins": 0.08685149997472763, "rewards/rejected": -0.162109375, "step": 7820 }, { "epoch": 0.5946459084868047, "grad_norm": 2.938899904989788, "learning_rate": 9.040846714944138e-07, "log_odds_chosen": 1.163476586341858, "log_odds_ratio": -0.4588378965854645, "logits/chosen": -1.2087891101837158, "logits/rejected": -1.04296875, "logps/chosen": -0.6915038824081421, "logps/rejected": -1.5021483898162842, "loss": 0.8896, "nll_loss": 0.8697265386581421, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.06914062798023224, "rewards/margins": 0.08103027194738388, "rewards/rejected": -0.15017089247703552, "step": 7830 }, { "epoch": 0.5954053540915132, "grad_norm": 2.4115307561950288, "learning_rate": 9.035079029052513e-07, "log_odds_chosen": 1.2185547351837158, "log_odds_ratio": -0.4349609315395355, "logits/chosen": -1.1652343273162842, "logits/rejected": -1.0451171398162842, "logps/chosen": -0.757617175579071, "logps/rejected": -1.607812523841858, "loss": 0.863, "nll_loss": 0.8145507574081421, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.07572021335363388, "rewards/margins": 0.08507690578699112, "rewards/rejected": -0.16069336235523224, "step": 7840 }, { "epoch": 0.5961647996962217, "grad_norm": 2.988420051167976, "learning_rate": 9.029322367729605e-07, "log_odds_chosen": 1.4744141101837158, "log_odds_ratio": -0.38481444120407104, "logits/chosen": -1.3330078125, "logits/rejected": -1.0671875476837158, "logps/chosen": -0.72998046875, "logps/rejected": -1.774999976158142, "loss": 0.8832, "nll_loss": 0.801464855670929, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -0.07301025092601776, "rewards/margins": 0.10447387397289276, "rewards/rejected": -0.17744140326976776, "step": 7850 }, { "epoch": 0.5969242453009304, "grad_norm": 2.1011354580663286, "learning_rate": 9.02357669589883e-07, "log_odds_chosen": 1.4541015625, "log_odds_ratio": -0.3785644471645355, "logits/chosen": -1.284570336341858, "logits/rejected": -1.0927734375, "logps/chosen": -0.6988281011581421, "logps/rejected": -1.7101562023162842, "loss": 0.8684, "nll_loss": 0.8324218988418579, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.06989745795726776, "rewards/margins": 0.10103759914636612, "rewards/rejected": -0.17092284560203552, "step": 7860 }, { "epoch": 0.5976836909056389, "grad_norm": 1.6428229960728074, "learning_rate": 9.017841978639643e-07, "log_odds_chosen": 1.38330078125, "log_odds_ratio": -0.4200683534145355, "logits/chosen": -1.263281226158142, "logits/rejected": -1.111718773841858, "logps/chosen": -0.732226550579071, "logps/rejected": -1.716796875, "loss": 0.8761, "nll_loss": 0.925000011920929, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.07318115234375, "rewards/margins": 0.09846649318933487, "rewards/rejected": -0.17165526747703552, "step": 7870 }, { "epoch": 0.5984431365103474, "grad_norm": 2.0982937084228075, "learning_rate": 9.012118181186658e-07, "log_odds_chosen": 1.6896483898162842, "log_odds_ratio": -0.345947265625, "logits/chosen": -1.2716796398162842, "logits/rejected": -1.0927734375, "logps/chosen": -0.695605456829071, "logps/rejected": -1.9392578601837158, "loss": 0.8824, "nll_loss": 0.8794921636581421, "rewards/accuracies": 0.856249988079071, "rewards/chosen": -0.06961669772863388, "rewards/margins": 0.12427978217601776, "rewards/rejected": -0.19384765625, "step": 7880 }, { "epoch": 0.5992025821150561, "grad_norm": 1.7192530848227687, "learning_rate": 9.00640526892875e-07, "log_odds_chosen": 1.3336913585662842, "log_odds_ratio": -0.44365233182907104, "logits/chosen": -1.2384765148162842, "logits/rejected": -1.0964844226837158, "logps/chosen": -0.675585925579071, "logps/rejected": -1.620703101158142, "loss": 0.8592, "nll_loss": 0.765820324420929, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.06754150241613388, "rewards/margins": 0.09453125298023224, "rewards/rejected": -0.16215820610523224, "step": 7890 }, { "epoch": 0.5999620277197646, "grad_norm": 1.7175868090781943, "learning_rate": 9.000703207408191e-07, "log_odds_chosen": 1.658789038658142, "log_odds_ratio": -0.32719725370407104, "logits/chosen": -1.2257812023162842, "logits/rejected": -1.00390625, "logps/chosen": -0.7525390386581421, "logps/rejected": -2.003124952316284, "loss": 0.9036, "nll_loss": 0.909960925579071, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -0.07529296725988388, "rewards/margins": 0.1248779296875, "rewards/rejected": -0.20014648139476776, "step": 7900 }, { "epoch": 0.6007214733244731, "grad_norm": 1.9836697002329111, "learning_rate": 8.995011962319761e-07, "log_odds_chosen": 1.2804687023162842, "log_odds_ratio": -0.4490722715854645, "logits/chosen": -1.2062499523162842, "logits/rejected": -1.028906226158142, "logps/chosen": -0.7455078363418579, "logps/rejected": -1.6632812023162842, "loss": 0.8599, "nll_loss": 0.851757824420929, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.07452392578125, "rewards/margins": 0.09190674126148224, "rewards/rejected": -0.1663818359375, "step": 7910 }, { "epoch": 0.6014809189291817, "grad_norm": 1.7758031092125124, "learning_rate": 8.989331499509894e-07, "log_odds_chosen": 1.56982421875, "log_odds_ratio": -0.445068359375, "logits/chosen": -1.184179663658142, "logits/rejected": -0.993945300579071, "logps/chosen": -0.715039074420929, "logps/rejected": -1.898046851158142, "loss": 0.865, "nll_loss": 0.8001953363418579, "rewards/accuracies": 0.78125, "rewards/chosen": -0.071533203125, "rewards/margins": 0.11832275241613388, "rewards/rejected": -0.18984374403953552, "step": 7920 }, { "epoch": 0.6022403645338903, "grad_norm": 1.7810987008446517, "learning_rate": 8.983661784975812e-07, "log_odds_chosen": 1.2112305164337158, "log_odds_ratio": -0.4402832090854645, "logits/chosen": -1.1496093273162842, "logits/rejected": -0.9876953363418579, "logps/chosen": -0.698046863079071, "logps/rejected": -1.5324218273162842, "loss": 0.8855, "nll_loss": 0.8490234613418579, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.06979980319738388, "rewards/margins": 0.08347167819738388, "rewards/rejected": -0.1533203125, "step": 7930 }, { "epoch": 0.6029998101385988, "grad_norm": 1.8827105680277385, "learning_rate": 8.97800278486467e-07, "log_odds_chosen": 1.402441382408142, "log_odds_ratio": -0.3963378965854645, "logits/chosen": -1.16796875, "logits/rejected": -1.055078148841858, "logps/chosen": -0.699999988079071, "logps/rejected": -1.6953125, "loss": 0.856, "nll_loss": 0.8707031011581421, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.06998290866613388, "rewards/margins": 0.09955444186925888, "rewards/rejected": -0.16960449516773224, "step": 7940 }, { "epoch": 0.6037592557433074, "grad_norm": 1.7064051621837593, "learning_rate": 8.972354465472708e-07, "log_odds_chosen": 1.436914086341858, "log_odds_ratio": -0.41718751192092896, "logits/chosen": -1.1884765625, "logits/rejected": -1.052734375, "logps/chosen": -0.730761706829071, "logps/rejected": -1.779687523841858, "loss": 0.8474, "nll_loss": 0.8490234613418579, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.0731201171875, "rewards/margins": 0.10502929985523224, "rewards/rejected": -0.1781005859375, "step": 7950 }, { "epoch": 0.604518701348016, "grad_norm": 1.8346064776009128, "learning_rate": 8.966716793244405e-07, "log_odds_chosen": 1.407470703125, "log_odds_ratio": -0.41679686307907104, "logits/chosen": -1.1828124523162842, "logits/rejected": -1.059179663658142, "logps/chosen": -0.662304699420929, "logps/rejected": -1.6466796398162842, "loss": 0.9103, "nll_loss": 0.845703125, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.06621094048023224, "rewards/margins": 0.09853973239660263, "rewards/rejected": -0.16486816108226776, "step": 7960 }, { "epoch": 0.6052781469527245, "grad_norm": 1.6195597576472769, "learning_rate": 8.96108973477165e-07, "log_odds_chosen": 1.462304711341858, "log_odds_ratio": -0.4176269471645355, "logits/chosen": -1.2177734375, "logits/rejected": -0.9830077886581421, "logps/chosen": -0.690234363079071, "logps/rejected": -1.769140601158142, "loss": 0.8981, "nll_loss": 0.8685547113418579, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.06906738132238388, "rewards/margins": 0.10788574069738388, "rewards/rejected": -0.17690429091453552, "step": 7970 }, { "epoch": 0.6060375925574331, "grad_norm": 1.7613847599669266, "learning_rate": 8.955473256792899e-07, "log_odds_chosen": 1.607421875, "log_odds_ratio": -0.3736328184604645, "logits/chosen": -1.234960913658142, "logits/rejected": -1.0509765148162842, "logps/chosen": -0.675585925579071, "logps/rejected": -1.802343726158142, "loss": 0.8532, "nll_loss": 0.8184570074081421, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.06760253757238388, "rewards/margins": 0.11268921196460724, "rewards/rejected": -0.18034668266773224, "step": 7980 }, { "epoch": 0.6067970381621416, "grad_norm": 1.5238927277088803, "learning_rate": 8.949867326192358e-07, "log_odds_chosen": 1.5929687023162842, "log_odds_ratio": -0.4090332090854645, "logits/chosen": -1.126367211341858, "logits/rejected": -0.9662109613418579, "logps/chosen": -0.728320300579071, "logps/rejected": -1.853124976158142, "loss": 0.8759, "nll_loss": 0.7979491949081421, "rewards/accuracies": 0.8125, "rewards/chosen": -0.07285156100988388, "rewards/margins": 0.11247558891773224, "rewards/rejected": -0.18515625596046448, "step": 7990 }, { "epoch": 0.6075564837668502, "grad_norm": 1.5278067388702767, "learning_rate": 8.944271909999158e-07, "log_odds_chosen": 1.5256836414337158, "log_odds_ratio": -0.398193359375, "logits/chosen": -1.239648461341858, "logits/rejected": -1.0353515148162842, "logps/chosen": -0.707226574420929, "logps/rejected": -1.813085913658142, "loss": 0.865, "nll_loss": 0.8184570074081421, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.07069091498851776, "rewards/margins": 0.11065063625574112, "rewards/rejected": -0.18125000596046448, "step": 8000 }, { "epoch": 0.6083159293715588, "grad_norm": 2.3541460090894306, "learning_rate": 8.938686975386545e-07, "log_odds_chosen": 1.5163085460662842, "log_odds_ratio": -0.4124999940395355, "logits/chosen": -1.2724609375, "logits/rejected": -1.0330078601837158, "logps/chosen": -0.725390613079071, "logps/rejected": -1.831640601158142, "loss": 0.8821, "nll_loss": 0.8656250238418579, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.07253418117761612, "rewards/margins": 0.11063232272863388, "rewards/rejected": -0.18310546875, "step": 8010 }, { "epoch": 0.6090753749762673, "grad_norm": 1.9803321836764955, "learning_rate": 8.933112489671067e-07, "log_odds_chosen": 1.57470703125, "log_odds_ratio": -0.38056641817092896, "logits/chosen": -1.2335937023162842, "logits/rejected": -1.0203125476837158, "logps/chosen": -0.674511730670929, "logps/rejected": -1.81640625, "loss": 0.869, "nll_loss": 0.816210925579071, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -0.0675048828125, "rewards/margins": 0.11417236179113388, "rewards/rejected": -0.18166503310203552, "step": 8020 }, { "epoch": 0.6098348205809759, "grad_norm": 2.0677420372530997, "learning_rate": 8.927548420311771e-07, "log_odds_chosen": 1.771484375, "log_odds_ratio": -0.36713868379592896, "logits/chosen": -1.156640648841858, "logits/rejected": -1.006445288658142, "logps/chosen": -0.705273449420929, "logps/rejected": -2.0035157203674316, "loss": 0.85, "nll_loss": 0.8331054449081421, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.07056884467601776, "rewards/margins": 0.12977294623851776, "rewards/rejected": -0.20021972060203552, "step": 8030 }, { "epoch": 0.6105942661856845, "grad_norm": 2.2106281326021238, "learning_rate": 8.921994734909409e-07, "log_odds_chosen": 1.694921851158142, "log_odds_ratio": -0.3953613340854645, "logits/chosen": -1.264257788658142, "logits/rejected": -1.097265601158142, "logps/chosen": -0.7392578125, "logps/rejected": -2.00390625, "loss": 0.8938, "nll_loss": 0.844531238079071, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.07396240532398224, "rewards/margins": 0.12644043564796448, "rewards/rejected": -0.20039062201976776, "step": 8040 }, { "epoch": 0.611353711790393, "grad_norm": 2.0471669805884276, "learning_rate": 8.916451401205645e-07, "log_odds_chosen": 1.533300757408142, "log_odds_ratio": -0.3993164002895355, "logits/chosen": -1.2080078125, "logits/rejected": -1.049414038658142, "logps/chosen": -0.67724609375, "logps/rejected": -1.7666015625, "loss": 0.8962, "nll_loss": 0.876757800579071, "rewards/accuracies": 0.78125, "rewards/chosen": -0.06768798828125, "rewards/margins": 0.10896606743335724, "rewards/rejected": -0.17678222060203552, "step": 8050 }, { "epoch": 0.6121131573951015, "grad_norm": 1.8357906307160146, "learning_rate": 8.91091838708226e-07, "log_odds_chosen": 1.6740233898162842, "log_odds_ratio": -0.34814453125, "logits/chosen": -1.259374976158142, "logits/rejected": -1.0478515625, "logps/chosen": -0.687207043170929, "logps/rejected": -1.8898437023162842, "loss": 0.8928, "nll_loss": 0.808789074420929, "rewards/accuracies": 0.8125, "rewards/chosen": -0.06871338188648224, "rewards/margins": 0.12015380710363388, "rewards/rejected": -0.18879394233226776, "step": 8060 }, { "epoch": 0.6128726029998102, "grad_norm": 1.9627158625601706, "learning_rate": 8.905395660560378e-07, "log_odds_chosen": 1.4001953601837158, "log_odds_ratio": -0.4078613221645355, "logits/chosen": -1.2880859375, "logits/rejected": -1.0554687976837158, "logps/chosen": -0.7115234136581421, "logps/rejected": -1.7199218273162842, "loss": 0.8504, "nll_loss": 0.832714855670929, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.0711669921875, "rewards/margins": 0.10078124701976776, "rewards/rejected": -0.17189940810203552, "step": 8070 }, { "epoch": 0.6136320486045187, "grad_norm": 1.726838911800311, "learning_rate": 8.899883189799695e-07, "log_odds_chosen": 1.4314453601837158, "log_odds_ratio": -0.39057618379592896, "logits/chosen": -1.2316405773162842, "logits/rejected": -1.0974609851837158, "logps/chosen": -0.697265625, "logps/rejected": -1.682031273841858, "loss": 0.8713, "nll_loss": 0.8709961175918579, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.06975097954273224, "rewards/margins": 0.098297119140625, "rewards/rejected": -0.16818848252296448, "step": 8080 }, { "epoch": 0.6143914942092272, "grad_norm": 1.5714827259012532, "learning_rate": 8.894380943097694e-07, "log_odds_chosen": 1.5368163585662842, "log_odds_ratio": -0.39116209745407104, "logits/chosen": -1.2830078601837158, "logits/rejected": -1.050390601158142, "logps/chosen": -0.6767578125, "logps/rejected": -1.774023413658142, "loss": 0.9046, "nll_loss": 0.826953113079071, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.06772460788488388, "rewards/margins": 0.10973358154296875, "rewards/rejected": -0.17751464247703552, "step": 8090 }, { "epoch": 0.6151509398139359, "grad_norm": 1.5956161918673752, "learning_rate": 8.888888888888888e-07, "log_odds_chosen": 1.3312499523162842, "log_odds_ratio": -0.473876953125, "logits/chosen": -1.2634766101837158, "logits/rejected": -1.116601586341858, "logps/chosen": -0.759765625, "logps/rejected": -1.784765601158142, "loss": 0.8635, "nll_loss": 0.94140625, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.07601318508386612, "rewards/margins": 0.10225830227136612, "rewards/rejected": -0.17839355766773224, "step": 8100 }, { "epoch": 0.6159103854186444, "grad_norm": 1.679538605513346, "learning_rate": 8.883406995744061e-07, "log_odds_chosen": 1.464257836341858, "log_odds_ratio": -0.3727783262729645, "logits/chosen": -1.2830078601837158, "logits/rejected": -1.107421875, "logps/chosen": -0.6592773199081421, "logps/rejected": -1.6941406726837158, "loss": 0.8606, "nll_loss": 0.8255859613418579, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.06593017280101776, "rewards/margins": 0.10358581691980362, "rewards/rejected": -0.16940918564796448, "step": 8110 }, { "epoch": 0.6166698310233529, "grad_norm": 1.6157252193774412, "learning_rate": 8.877935232369506e-07, "log_odds_chosen": 1.591796875, "log_odds_ratio": -0.4097656309604645, "logits/chosen": -1.265234351158142, "logits/rejected": -1.1474609375, "logps/chosen": -0.71630859375, "logps/rejected": -1.8800780773162842, "loss": 0.8846, "nll_loss": 0.8603515625, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.07163085788488388, "rewards/margins": 0.11630859225988388, "rewards/rejected": -0.18801268935203552, "step": 8120 }, { "epoch": 0.6174292766280615, "grad_norm": 1.9256915134198014, "learning_rate": 8.872473567606276e-07, "log_odds_chosen": 1.6082031726837158, "log_odds_ratio": -0.40473634004592896, "logits/chosen": -1.060546875, "logits/rejected": -0.961718738079071, "logps/chosen": -0.7010742425918579, "logps/rejected": -1.8857421875, "loss": 0.8874, "nll_loss": 0.8626953363418579, "rewards/accuracies": 0.78125, "rewards/chosen": -0.07009277492761612, "rewards/margins": 0.11864013969898224, "rewards/rejected": -0.18862304091453552, "step": 8130 }, { "epoch": 0.6181887222327701, "grad_norm": 1.8810142418363807, "learning_rate": 8.867021970429453e-07, "log_odds_chosen": 1.6630859375, "log_odds_ratio": -0.3458496034145355, "logits/chosen": -1.2287108898162842, "logits/rejected": -1.0390625, "logps/chosen": -0.664843738079071, "logps/rejected": -1.85546875, "loss": 0.8378, "nll_loss": 0.8023437261581421, "rewards/accuracies": 0.84375, "rewards/chosen": -0.06646728515625, "rewards/margins": 0.11900635063648224, "rewards/rejected": -0.18549804389476776, "step": 8140 }, { "epoch": 0.6189481678374786, "grad_norm": 1.669076356773238, "learning_rate": 8.86158040994738e-07, "log_odds_chosen": 1.6027343273162842, "log_odds_ratio": -0.4222168028354645, "logits/chosen": -1.1271483898162842, "logits/rejected": -1.0027344226837158, "logps/chosen": -0.627148449420929, "logps/rejected": -1.771875023841858, "loss": 0.873, "nll_loss": 0.7890625, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.06278076022863388, "rewards/margins": 0.11445312201976776, "rewards/rejected": -0.1771240234375, "step": 8150 }, { "epoch": 0.6197076134421872, "grad_norm": 1.7932276515182175, "learning_rate": 8.856148855400954e-07, "log_odds_chosen": 1.763281226158142, "log_odds_ratio": -0.35126954317092896, "logits/chosen": -1.207617163658142, "logits/rejected": -1.0496094226837158, "logps/chosen": -0.708789050579071, "logps/rejected": -1.9835937023162842, "loss": 0.8762, "nll_loss": 0.8056640625, "rewards/accuracies": 0.856249988079071, "rewards/chosen": -0.07081298530101776, "rewards/margins": 0.12744140625, "rewards/rejected": -0.1982421875, "step": 8160 }, { "epoch": 0.6204670590468958, "grad_norm": 1.9230370898476254, "learning_rate": 8.850727276162873e-07, "log_odds_chosen": 1.599218726158142, "log_odds_ratio": -0.34697264432907104, "logits/chosen": -1.2490234375, "logits/rejected": -1.060937523841858, "logps/chosen": -0.6700195074081421, "logps/rejected": -1.7902343273162842, "loss": 0.8666, "nll_loss": 0.8299804925918579, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -0.06696777045726776, "rewards/margins": 0.1119384765625, "rewards/rejected": -0.17902831733226776, "step": 8170 }, { "epoch": 0.6212265046516043, "grad_norm": 1.7259667073602023, "learning_rate": 8.845315641736929e-07, "log_odds_chosen": 1.641992211341858, "log_odds_ratio": -0.3727050721645355, "logits/chosen": -1.218359351158142, "logits/rejected": -1.0654296875, "logps/chosen": -0.6875, "logps/rejected": -1.8875000476837158, "loss": 0.876, "nll_loss": 0.8192383050918579, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -0.06876220554113388, "rewards/margins": 0.11977539211511612, "rewards/rejected": -0.18862304091453552, "step": 8180 }, { "epoch": 0.6219859502563129, "grad_norm": 1.8394385807617337, "learning_rate": 8.839913921757278e-07, "log_odds_chosen": 1.441503882408142, "log_odds_ratio": -0.4401611387729645, "logits/chosen": -1.2390625476837158, "logits/rejected": -1.086328148841858, "logps/chosen": -0.678417980670929, "logps/rejected": -1.697265625, "loss": 0.8675, "nll_loss": 0.862109363079071, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.06784667819738388, "rewards/margins": 0.10177002102136612, "rewards/rejected": -0.16958007216453552, "step": 8190 }, { "epoch": 0.6227453958610214, "grad_norm": 1.925114646885691, "learning_rate": 8.834522085987722e-07, "log_odds_chosen": 1.4147460460662842, "log_odds_ratio": -0.3915771543979645, "logits/chosen": -1.2726562023162842, "logits/rejected": -1.084375023841858, "logps/chosen": -0.6722656488418579, "logps/rejected": -1.656640648841858, "loss": 0.859, "nll_loss": 0.787109375, "rewards/accuracies": 0.8125, "rewards/chosen": -0.06724853813648224, "rewards/margins": 0.09844970703125, "rewards/rejected": -0.16567382216453552, "step": 8200 }, { "epoch": 0.62350484146573, "grad_norm": 2.0048253738724298, "learning_rate": 8.829140104321008e-07, "log_odds_chosen": 1.2380859851837158, "log_odds_ratio": -0.4434570372104645, "logits/chosen": -1.2931640148162842, "logits/rejected": -1.0615234375, "logps/chosen": -0.703906238079071, "logps/rejected": -1.5890624523162842, "loss": 0.868, "nll_loss": 0.7749999761581421, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.07044677436351776, "rewards/margins": 0.08836670219898224, "rewards/rejected": -0.158935546875, "step": 8210 }, { "epoch": 0.6242642870704386, "grad_norm": 1.7854700757564335, "learning_rate": 8.82376794677811e-07, "log_odds_chosen": 1.6162109375, "log_odds_ratio": -0.37333983182907104, "logits/chosen": -1.238867163658142, "logits/rejected": -1.0544922351837158, "logps/chosen": -0.671093761920929, "logps/rejected": -1.8562500476837158, "loss": 0.8593, "nll_loss": 0.774609386920929, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.06711425632238388, "rewards/margins": 0.11869506537914276, "rewards/rejected": -0.18588867783546448, "step": 8220 }, { "epoch": 0.6250237326751471, "grad_norm": 1.7681683631356668, "learning_rate": 8.818405583507537e-07, "log_odds_chosen": 1.5754883289337158, "log_odds_ratio": -0.4116577208042145, "logits/chosen": -1.2130858898162842, "logits/rejected": -1.0263671875, "logps/chosen": -0.6841796636581421, "logps/rejected": -1.8332030773162842, "loss": 0.857, "nll_loss": 0.7699218988418579, "rewards/accuracies": 0.78125, "rewards/chosen": -0.06837157905101776, "rewards/margins": 0.11491088569164276, "rewards/rejected": -0.18332520127296448, "step": 8230 }, { "epoch": 0.6257831782798557, "grad_norm": 2.2960689988064305, "learning_rate": 8.813052984784634e-07, "log_odds_chosen": 1.4147460460662842, "log_odds_ratio": -0.4583496153354645, "logits/chosen": -1.2314453125, "logits/rejected": -1.045507788658142, "logps/chosen": -0.75830078125, "logps/rejected": -1.8162109851837158, "loss": 0.8433, "nll_loss": 0.8570312261581421, "rewards/accuracies": 0.71875, "rewards/chosen": -0.07586669921875, "rewards/margins": 0.10589752346277237, "rewards/rejected": -0.18168945610523224, "step": 8240 }, { "epoch": 0.6265426238845643, "grad_norm": 2.474750808447357, "learning_rate": 8.807710121010885e-07, "log_odds_chosen": 1.660742163658142, "log_odds_ratio": -0.3642334043979645, "logits/chosen": -1.331445336341858, "logits/rejected": -1.0822265148162842, "logps/chosen": -0.6806640625, "logps/rejected": -1.846093773841858, "loss": 0.8922, "nll_loss": 0.860546886920929, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -0.06801757961511612, "rewards/margins": 0.1165771484375, "rewards/rejected": -0.18466797471046448, "step": 8250 }, { "epoch": 0.6273020694892728, "grad_norm": 1.7205281085768411, "learning_rate": 8.802376962713231e-07, "log_odds_chosen": 1.587499976158142, "log_odds_ratio": -0.3670410215854645, "logits/chosen": -1.3371093273162842, "logits/rejected": -1.1257812976837158, "logps/chosen": -0.6533203125, "logps/rejected": -1.7781250476837158, "loss": 0.8728, "nll_loss": 0.78857421875, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.06536865234375, "rewards/margins": 0.11246337741613388, "rewards/rejected": -0.17783203721046448, "step": 8260 }, { "epoch": 0.6280615150939814, "grad_norm": 1.9927208958672193, "learning_rate": 8.797053480543386e-07, "log_odds_chosen": 1.4103515148162842, "log_odds_ratio": -0.38945311307907104, "logits/chosen": -1.2900390625, "logits/rejected": -1.104882836341858, "logps/chosen": -0.65673828125, "logps/rejected": -1.6437499523162842, "loss": 0.8735, "nll_loss": 0.8189452886581421, "rewards/accuracies": 0.8125, "rewards/chosen": -0.06566162407398224, "rewards/margins": 0.09868469089269638, "rewards/rejected": -0.16428223252296448, "step": 8270 }, { "epoch": 0.62882096069869, "grad_norm": 2.4774354529089964, "learning_rate": 8.79173964527716e-07, "log_odds_chosen": 1.4777343273162842, "log_odds_ratio": -0.40380859375, "logits/chosen": -1.2404296398162842, "logits/rejected": -1.105859398841858, "logps/chosen": -0.6602538824081421, "logps/rejected": -1.697851538658142, "loss": 0.8563, "nll_loss": 0.777050793170929, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.06602783501148224, "rewards/margins": 0.10378418117761612, "rewards/rejected": -0.16972656548023224, "step": 8280 }, { "epoch": 0.6295804063033985, "grad_norm": 1.8463354191112862, "learning_rate": 8.78643542781378e-07, "log_odds_chosen": 1.5869140625, "log_odds_ratio": -0.4022460877895355, "logits/chosen": -1.199609398841858, "logits/rejected": -1.0234375, "logps/chosen": -0.7212890386581421, "logps/rejected": -1.846093773841858, "loss": 0.8591, "nll_loss": 0.796093761920929, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.07208251953125, "rewards/margins": 0.11253662407398224, "rewards/rejected": -0.18461914360523224, "step": 8290 }, { "epoch": 0.630339851908107, "grad_norm": 1.5525465501609135, "learning_rate": 8.781140799175228e-07, "log_odds_chosen": 1.619726538658142, "log_odds_ratio": -0.40107423067092896, "logits/chosen": -1.1201171875, "logits/rejected": -0.9388672113418579, "logps/chosen": -0.717968761920929, "logps/rejected": -1.9210937023162842, "loss": 0.8679, "nll_loss": 0.9126952886581421, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.07163085788488388, "rewards/margins": 0.12054443359375, "rewards/rejected": -0.19206543266773224, "step": 8300 }, { "epoch": 0.6310992975128157, "grad_norm": 1.7145951741703385, "learning_rate": 8.775855730505568e-07, "log_odds_chosen": 1.647558569908142, "log_odds_ratio": -0.3619628846645355, "logits/chosen": -1.169335961341858, "logits/rejected": -1.0615234375, "logps/chosen": -0.662890613079071, "logps/rejected": -1.8212890625, "loss": 0.8886, "nll_loss": 0.8759765625, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.0662841796875, "rewards/margins": 0.11578674614429474, "rewards/rejected": -0.18212890625, "step": 8310 }, { "epoch": 0.6318587431175242, "grad_norm": 2.4758679847088136, "learning_rate": 8.770580193070291e-07, "log_odds_chosen": 1.375, "log_odds_ratio": -0.44658201932907104, "logits/chosen": -1.2414062023162842, "logits/rejected": -1.031835913658142, "logps/chosen": -0.709277331829071, "logps/rejected": -1.745703101158142, "loss": 0.8577, "nll_loss": 0.8861328363418579, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.0709228515625, "rewards/margins": 0.10381164401769638, "rewards/rejected": -0.17460937798023224, "step": 8320 }, { "epoch": 0.6326181887222327, "grad_norm": 2.312365492319721, "learning_rate": 8.765314158255661e-07, "log_odds_chosen": 1.5015137195587158, "log_odds_ratio": -0.3719238340854645, "logits/chosen": -1.090429663658142, "logits/rejected": -0.952343761920929, "logps/chosen": -0.7269531488418579, "logps/rejected": -1.807031273841858, "loss": 0.8894, "nll_loss": 0.844042956829071, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.07274170219898224, "rewards/margins": 0.10812988132238388, "rewards/rejected": -0.18081054091453552, "step": 8330 }, { "epoch": 0.6333776343269414, "grad_norm": 1.5977000025004726, "learning_rate": 8.760057597568057e-07, "log_odds_chosen": 1.540917992591858, "log_odds_ratio": -0.39702147245407104, "logits/chosen": -1.1818358898162842, "logits/rejected": -1.0431640148162842, "logps/chosen": -0.7216796875, "logps/rejected": -1.865234375, "loss": 0.8404, "nll_loss": 0.7298828363418579, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.07210693508386612, "rewards/margins": 0.11437378078699112, "rewards/rejected": -0.18666991591453552, "step": 8340 }, { "epoch": 0.6341370799316499, "grad_norm": 1.6701776377304396, "learning_rate": 8.754810482633324e-07, "log_odds_chosen": 1.4485352039337158, "log_odds_ratio": -0.406494140625, "logits/chosen": -1.2585937976837158, "logits/rejected": -1.0595703125, "logps/chosen": -0.714550793170929, "logps/rejected": -1.7302734851837158, "loss": 0.8244, "nll_loss": 0.797656238079071, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.07147216796875, "rewards/margins": 0.101470947265625, "rewards/rejected": -0.17302246391773224, "step": 8350 }, { "epoch": 0.6348965255363584, "grad_norm": 1.9700718659567844, "learning_rate": 8.749572785196142e-07, "log_odds_chosen": 1.585839867591858, "log_odds_ratio": -0.4032226502895355, "logits/chosen": -1.1962890625, "logits/rejected": -1.037695288658142, "logps/chosen": -0.696484386920929, "logps/rejected": -1.8533203601837158, "loss": 0.852, "nll_loss": 0.872851550579071, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.06964111328125, "rewards/margins": 0.11574707180261612, "rewards/rejected": -0.18552246689796448, "step": 8360 }, { "epoch": 0.635655971141067, "grad_norm": 1.5325558486797066, "learning_rate": 8.744344477119373e-07, "log_odds_chosen": 1.5457031726837158, "log_odds_ratio": -0.430908203125, "logits/chosen": -1.2658202648162842, "logits/rejected": -1.0994141101837158, "logps/chosen": -0.7208007574081421, "logps/rejected": -1.8347656726837158, "loss": 0.89, "nll_loss": 0.8033202886581421, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.07213135063648224, "rewards/margins": 0.11139526218175888, "rewards/rejected": -0.18356934189796448, "step": 8370 }, { "epoch": 0.6364154167457756, "grad_norm": 1.841028881079497, "learning_rate": 8.739125530383433e-07, "log_odds_chosen": 1.48046875, "log_odds_ratio": -0.47563475370407104, "logits/chosen": -1.3212890625, "logits/rejected": -1.032812476158142, "logps/chosen": -0.729199230670929, "logps/rejected": -1.816015601158142, "loss": 0.8492, "nll_loss": 0.832226574420929, "rewards/accuracies": 0.75, "rewards/chosen": -0.07301025092601776, "rewards/margins": 0.1087646484375, "rewards/rejected": -0.181640625, "step": 8380 }, { "epoch": 0.6371748623504842, "grad_norm": 2.128300436109608, "learning_rate": 8.733915917085661e-07, "log_odds_chosen": 1.365014672279358, "log_odds_ratio": -0.45317381620407104, "logits/chosen": -1.194921851158142, "logits/rejected": -1.046875, "logps/chosen": -0.6942383050918579, "logps/rejected": -1.6296875476837158, "loss": 0.8698, "nll_loss": 0.855664074420929, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.06938476860523224, "rewards/margins": 0.09358368068933487, "rewards/rejected": -0.1629638671875, "step": 8390 }, { "epoch": 0.6379343079551927, "grad_norm": 1.9414800238817391, "learning_rate": 8.728715609439695e-07, "log_odds_chosen": 1.5486328601837158, "log_odds_ratio": -0.4139160215854645, "logits/chosen": -1.2498047351837158, "logits/rejected": -1.107421875, "logps/chosen": -0.6826171875, "logps/rejected": -1.826562523841858, "loss": 0.8669, "nll_loss": 0.8046875, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.06828613579273224, "rewards/margins": 0.11431884765625, "rewards/rejected": -0.18242187798023224, "step": 8400 }, { "epoch": 0.6386937535599013, "grad_norm": 1.6322439353090987, "learning_rate": 8.72352457977484e-07, "log_odds_chosen": 1.642187476158142, "log_odds_ratio": -0.3805175721645355, "logits/chosen": -1.265625, "logits/rejected": -1.049218773841858, "logps/chosen": -0.680371105670929, "logps/rejected": -1.8537108898162842, "loss": 0.8572, "nll_loss": 0.7906249761581421, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -0.06806640326976776, "rewards/margins": 0.11732177436351776, "rewards/rejected": -0.18535156548023224, "step": 8410 }, { "epoch": 0.6394531991646099, "grad_norm": 1.8193062646935996, "learning_rate": 8.718342800535456e-07, "log_odds_chosen": 1.5109374523162842, "log_odds_ratio": -0.4214843809604645, "logits/chosen": -1.2468750476837158, "logits/rejected": -1.10546875, "logps/chosen": -0.646289050579071, "logps/rejected": -1.7472655773162842, "loss": 0.8739, "nll_loss": 0.7906249761581421, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.06462402641773224, "rewards/margins": 0.11008300632238388, "rewards/rejected": -0.1746826171875, "step": 8420 }, { "epoch": 0.6402126447693184, "grad_norm": 2.080992305795004, "learning_rate": 8.713170244280353e-07, "log_odds_chosen": 1.6323730945587158, "log_odds_ratio": -0.38554686307907104, "logits/chosen": -1.318359375, "logits/rejected": -1.1326172351837158, "logps/chosen": -0.683398425579071, "logps/rejected": -1.8771483898162842, "loss": 0.865, "nll_loss": 0.8187500238418579, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.06837157905101776, "rewards/margins": 0.11935882270336151, "rewards/rejected": -0.18767090141773224, "step": 8430 }, { "epoch": 0.6409720903740269, "grad_norm": 1.938517561633018, "learning_rate": 8.708006883682162e-07, "log_odds_chosen": 1.683984398841858, "log_odds_ratio": -0.3924804627895355, "logits/chosen": -1.1179687976837158, "logits/rejected": -0.9712890386581421, "logps/chosen": -0.721875011920929, "logps/rejected": -1.943359375, "loss": 0.8708, "nll_loss": 0.8515625, "rewards/accuracies": 0.78125, "rewards/chosen": -0.07221679389476776, "rewards/margins": 0.1219482421875, "rewards/rejected": -0.19401855766773224, "step": 8440 }, { "epoch": 0.6417315359787356, "grad_norm": 1.9757645952958998, "learning_rate": 8.702852691526739e-07, "log_odds_chosen": 1.9064452648162842, "log_odds_ratio": -0.33845216035842896, "logits/chosen": -1.2703125476837158, "logits/rejected": -1.081640601158142, "logps/chosen": -0.6294921636581421, "logps/rejected": -2.0455079078674316, "loss": 0.8469, "nll_loss": 0.775390625, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.06291504204273224, "rewards/margins": 0.14177855849266052, "rewards/rejected": -0.20468750596046448, "step": 8450 }, { "epoch": 0.6424909815834441, "grad_norm": 2.0831944028906073, "learning_rate": 8.697707640712562e-07, "log_odds_chosen": 1.372314453125, "log_odds_ratio": -0.4296630918979645, "logits/chosen": -1.2492187023162842, "logits/rejected": -1.0925781726837158, "logps/chosen": -0.691113293170929, "logps/rejected": -1.640234351158142, "loss": 0.8507, "nll_loss": 0.7734375, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06915283203125, "rewards/margins": 0.0949554443359375, "rewards/rejected": -0.16411133110523224, "step": 8460 }, { "epoch": 0.6432504271881526, "grad_norm": 1.6868711217414847, "learning_rate": 8.692571704250135e-07, "log_odds_chosen": 1.641210913658142, "log_odds_ratio": -0.3528808653354645, "logits/chosen": -1.2292969226837158, "logits/rejected": -1.0388672351837158, "logps/chosen": -0.7115234136581421, "logps/rejected": -1.8759765625, "loss": 0.8502, "nll_loss": 0.8739258050918579, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -0.07111816108226776, "rewards/margins": 0.11646118015050888, "rewards/rejected": -0.1876220703125, "step": 8470 }, { "epoch": 0.6440098727928613, "grad_norm": 1.9034260152327824, "learning_rate": 8.687444855261388e-07, "log_odds_chosen": 1.545800805091858, "log_odds_ratio": -0.3990722596645355, "logits/chosen": -1.146875023841858, "logits/rejected": -1.009179711341858, "logps/chosen": -0.691210925579071, "logps/rejected": -1.7785155773162842, "loss": 0.8572, "nll_loss": 0.894726574420929, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.06905517727136612, "rewards/margins": 0.10872192680835724, "rewards/rejected": -0.17780761420726776, "step": 8480 }, { "epoch": 0.6447693183975698, "grad_norm": 2.200783069858851, "learning_rate": 8.682327066979084e-07, "log_odds_chosen": 1.566796898841858, "log_odds_ratio": -0.3725341856479645, "logits/chosen": -1.2146484851837158, "logits/rejected": -1.046875, "logps/chosen": -0.709765613079071, "logps/rejected": -1.817968726158142, "loss": 0.8697, "nll_loss": 0.827832043170929, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.07095947116613388, "rewards/margins": 0.11058349907398224, "rewards/rejected": -0.18159179389476776, "step": 8490 }, { "epoch": 0.6455287640022783, "grad_norm": 2.3454238320351, "learning_rate": 8.677218312746247e-07, "log_odds_chosen": 1.28759765625, "log_odds_ratio": -0.42353516817092896, "logits/chosen": -1.146093726158142, "logits/rejected": -0.9886718988418579, "logps/chosen": -0.7308593988418579, "logps/rejected": -1.608007788658142, "loss": 0.8589, "nll_loss": 0.786328136920929, "rewards/accuracies": 0.8125, "rewards/chosen": -0.07301025092601776, "rewards/margins": 0.08779601752758026, "rewards/rejected": -0.16091307997703552, "step": 8500 }, { "epoch": 0.6462882096069869, "grad_norm": 2.7449451573391794, "learning_rate": 8.672118566015558e-07, "log_odds_chosen": 1.5104491710662842, "log_odds_ratio": -0.44038087129592896, "logits/chosen": -1.166601538658142, "logits/rejected": -1.0226562023162842, "logps/chosen": -0.706250011920929, "logps/rejected": -1.8142578601837158, "loss": 0.8822, "nll_loss": 0.847851574420929, "rewards/accuracies": 0.75, "rewards/chosen": -0.0706787109375, "rewards/margins": 0.11084441840648651, "rewards/rejected": -0.18146972358226776, "step": 8510 }, { "epoch": 0.6470476552116955, "grad_norm": 2.0793995099613793, "learning_rate": 8.667027800348789e-07, "log_odds_chosen": 1.5382812023162842, "log_odds_ratio": -0.39228516817092896, "logits/chosen": -1.318750023841858, "logits/rejected": -1.108984351158142, "logps/chosen": -0.67578125, "logps/rejected": -1.780859351158142, "loss": 0.8493, "nll_loss": 0.8072265386581421, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.06749267876148224, "rewards/margins": 0.11052246391773224, "rewards/rejected": -0.17812499403953552, "step": 8520 }, { "epoch": 0.647807100816404, "grad_norm": 1.7718825506170353, "learning_rate": 8.661945989416229e-07, "log_odds_chosen": 1.4892578125, "log_odds_ratio": -0.3880371153354645, "logits/chosen": -1.191015601158142, "logits/rejected": -1.029687523841858, "logps/chosen": -0.663769543170929, "logps/rejected": -1.6875, "loss": 0.8767, "nll_loss": 0.741992175579071, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.06633301079273224, "rewards/margins": 0.10237427055835724, "rewards/rejected": -0.16887207329273224, "step": 8530 }, { "epoch": 0.6485665464211126, "grad_norm": 1.7788454347691889, "learning_rate": 8.6568731069961e-07, "log_odds_chosen": 1.709375023841858, "log_odds_ratio": -0.3609619140625, "logits/chosen": -1.313085913658142, "logits/rejected": -1.072265625, "logps/chosen": -0.7318359613418579, "logps/rejected": -1.985937476158142, "loss": 0.8819, "nll_loss": 0.8111327886581421, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -0.07318115234375, "rewards/margins": 0.12542724609375, "rewards/rejected": -0.1986083984375, "step": 8540 }, { "epoch": 0.6493259920258212, "grad_norm": 1.8318414881179026, "learning_rate": 8.651809126974002e-07, "log_odds_chosen": 1.5849609375, "log_odds_ratio": -0.391845703125, "logits/chosen": -1.1736328601837158, "logits/rejected": -0.974609375, "logps/chosen": -0.7099609375, "logps/rejected": -1.873437523841858, "loss": 0.8865, "nll_loss": 0.86328125, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.07093505561351776, "rewards/margins": 0.11641845852136612, "rewards/rejected": -0.1873779296875, "step": 8550 }, { "epoch": 0.6500854376305297, "grad_norm": 1.7158667196552355, "learning_rate": 8.646754023342339e-07, "log_odds_chosen": 1.4396483898162842, "log_odds_ratio": -0.40571290254592896, "logits/chosen": -1.220312476158142, "logits/rejected": -1.060937523841858, "logps/chosen": -0.6778320074081421, "logps/rejected": -1.6765625476837158, "loss": 0.864, "nll_loss": 0.795605480670929, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.06773681938648224, "rewards/margins": 0.09991455078125, "rewards/rejected": -0.16770020127296448, "step": 8560 }, { "epoch": 0.6508448832352383, "grad_norm": 1.757240313788162, "learning_rate": 8.64170777019976e-07, "log_odds_chosen": 1.5543944835662842, "log_odds_ratio": -0.38984376192092896, "logits/chosen": -1.220703125, "logits/rejected": -1.058984398841858, "logps/chosen": -0.6786133050918579, "logps/rejected": -1.823632836341858, "loss": 0.8533, "nll_loss": 0.7416015863418579, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06789550930261612, "rewards/margins": 0.11446533352136612, "rewards/rejected": -0.18222656846046448, "step": 8570 }, { "epoch": 0.6516043288399468, "grad_norm": 2.04687835577521, "learning_rate": 8.636670341750609e-07, "log_odds_chosen": 1.5525391101837158, "log_odds_ratio": -0.36250001192092896, "logits/chosen": -1.208984375, "logits/rejected": -0.9593750238418579, "logps/chosen": -0.688183605670929, "logps/rejected": -1.790429711341858, "loss": 0.8496, "nll_loss": 0.796679675579071, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -0.06873778998851776, "rewards/margins": 0.11025390774011612, "rewards/rejected": -0.17900390923023224, "step": 8580 }, { "epoch": 0.6523637744446554, "grad_norm": 1.8648237495717304, "learning_rate": 8.631641712304359e-07, "log_odds_chosen": 1.51416015625, "log_odds_ratio": -0.41582030057907104, "logits/chosen": -1.1593749523162842, "logits/rejected": -1.0066406726837158, "logps/chosen": -0.683300793170929, "logps/rejected": -1.7693359851837158, "loss": 0.8597, "nll_loss": 0.79296875, "rewards/accuracies": 0.78125, "rewards/chosen": -0.06831054389476776, "rewards/margins": 0.10870361328125, "rewards/rejected": -0.17705078423023224, "step": 8590 }, { "epoch": 0.653123220049364, "grad_norm": 1.828861724513819, "learning_rate": 8.626621856275073e-07, "log_odds_chosen": 1.6612548828125, "log_odds_ratio": -0.40510255098342896, "logits/chosen": -1.305273413658142, "logits/rejected": -1.1365234851837158, "logps/chosen": -0.719921886920929, "logps/rejected": -1.9285156726837158, "loss": 0.8509, "nll_loss": 0.8246093988418579, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.07199706882238388, "rewards/margins": 0.12072525173425674, "rewards/rejected": -0.19294433295726776, "step": 8600 }, { "epoch": 0.6538826656540725, "grad_norm": 1.9816558535477535, "learning_rate": 8.621610748180847e-07, "log_odds_chosen": 1.2704956531524658, "log_odds_ratio": -0.4281249940395355, "logits/chosen": -1.1984374523162842, "logits/rejected": -1.039453148841858, "logps/chosen": -0.7490234375, "logps/rejected": -1.681249976158142, "loss": 0.8542, "nll_loss": 0.7724609375, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.07487793266773224, "rewards/margins": 0.09312133491039276, "rewards/rejected": -0.16796875, "step": 8610 }, { "epoch": 0.6546421112587811, "grad_norm": 2.2305999788317648, "learning_rate": 8.616608362643274e-07, "log_odds_chosen": 1.429785132408142, "log_odds_ratio": -0.42963868379592896, "logits/chosen": -1.231835961341858, "logits/rejected": -1.056249976158142, "logps/chosen": -0.758593738079071, "logps/rejected": -1.826171875, "loss": 0.8448, "nll_loss": 0.834765613079071, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.07585449516773224, "rewards/margins": 0.10676269233226776, "rewards/rejected": -0.18269042670726776, "step": 8620 }, { "epoch": 0.6554015568634897, "grad_norm": 1.7762083461404181, "learning_rate": 8.611614674386904e-07, "log_odds_chosen": 1.419677734375, "log_odds_ratio": -0.4224609434604645, "logits/chosen": -1.1943359375, "logits/rejected": -1.001953125, "logps/chosen": -0.7627929449081421, "logps/rejected": -1.807226538658142, "loss": 0.8633, "nll_loss": 0.8902343511581421, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.07636718451976776, "rewards/margins": 0.10434570163488388, "rewards/rejected": -0.18063965439796448, "step": 8630 }, { "epoch": 0.6561610024681982, "grad_norm": 1.7952325999139542, "learning_rate": 8.606629658238703e-07, "log_odds_chosen": 1.6124999523162842, "log_odds_ratio": -0.43671876192092896, "logits/chosen": -1.162109375, "logits/rejected": -1.0378906726837158, "logps/chosen": -0.7515624761581421, "logps/rejected": -1.9763672351837158, "loss": 0.8704, "nll_loss": 0.813281238079071, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.07513427734375, "rewards/margins": 0.12245483696460724, "rewards/rejected": -0.19746093451976776, "step": 8640 }, { "epoch": 0.6569204480729067, "grad_norm": 1.7246285126478962, "learning_rate": 8.601653289127525e-07, "log_odds_chosen": 1.380761742591858, "log_odds_ratio": -0.4205566346645355, "logits/chosen": -1.2371094226837158, "logits/rejected": -1.054296851158142, "logps/chosen": -0.713671863079071, "logps/rejected": -1.721093773841858, "loss": 0.8647, "nll_loss": 0.861132800579071, "rewards/accuracies": 0.78125, "rewards/chosen": -0.07138671725988388, "rewards/margins": 0.100799560546875, "rewards/rejected": -0.17214354872703552, "step": 8650 }, { "epoch": 0.6576798936776154, "grad_norm": 2.0546348540997754, "learning_rate": 8.596685542083577e-07, "log_odds_chosen": 1.685546875, "log_odds_ratio": -0.3949218690395355, "logits/chosen": -1.326171875, "logits/rejected": -1.0681641101837158, "logps/chosen": -0.718554675579071, "logps/rejected": -1.9874999523162842, "loss": 0.8603, "nll_loss": 0.744140625, "rewards/accuracies": 0.75, "rewards/chosen": -0.07182617485523224, "rewards/margins": 0.12697753310203552, "rewards/rejected": -0.19877929985523224, "step": 8660 }, { "epoch": 0.6584393392823239, "grad_norm": 2.4582125955319496, "learning_rate": 8.591726392237899e-07, "log_odds_chosen": 1.7724609375, "log_odds_ratio": -0.34111326932907104, "logits/chosen": -1.180078148841858, "logits/rejected": -1.0388672351837158, "logps/chosen": -0.671875, "logps/rejected": -1.954687476158142, "loss": 0.826, "nll_loss": 0.80126953125, "rewards/accuracies": 0.862500011920929, "rewards/chosen": -0.06715087592601776, "rewards/margins": 0.12841796875, "rewards/rejected": -0.1954345703125, "step": 8670 }, { "epoch": 0.6591987848870324, "grad_norm": 1.8266202177328659, "learning_rate": 8.586775814821837e-07, "log_odds_chosen": 1.1981689929962158, "log_odds_ratio": -0.47416990995407104, "logits/chosen": -1.294921875, "logits/rejected": -1.1335937976837158, "logps/chosen": -0.7176758050918579, "logps/rejected": -1.579492211341858, "loss": 0.8575, "nll_loss": 0.846484363079071, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.07174072414636612, "rewards/margins": 0.08611603081226349, "rewards/rejected": -0.15793457627296448, "step": 8680 }, { "epoch": 0.6599582304917411, "grad_norm": 1.7609457416954775, "learning_rate": 8.58183378516652e-07, "log_odds_chosen": 1.5400390625, "log_odds_ratio": -0.3751464784145355, "logits/chosen": -1.280664086341858, "logits/rejected": -1.0705077648162842, "logps/chosen": -0.633496105670929, "logps/rejected": -1.6873047351837158, "loss": 0.8561, "nll_loss": 0.745800793170929, "rewards/accuracies": 0.8125, "rewards/chosen": -0.06336669623851776, "rewards/margins": 0.10533447563648224, "rewards/rejected": -0.16862793266773224, "step": 8690 }, { "epoch": 0.6607176760964496, "grad_norm": 3.223603770263842, "learning_rate": 8.576900278702358e-07, "log_odds_chosen": 1.6416015625, "log_odds_ratio": -0.3489013612270355, "logits/chosen": -1.247460961341858, "logits/rejected": -0.996874988079071, "logps/chosen": -0.703417956829071, "logps/rejected": -1.888085961341858, "loss": 0.86, "nll_loss": 0.8921874761581421, "rewards/accuracies": 0.84375, "rewards/chosen": -0.07033691555261612, "rewards/margins": 0.11851348727941513, "rewards/rejected": -0.18881836533546448, "step": 8700 }, { "epoch": 0.6614771217011581, "grad_norm": 1.8785772392339022, "learning_rate": 8.57197527095851e-07, "log_odds_chosen": 1.3074219226837158, "log_odds_ratio": -0.44257813692092896, "logits/chosen": -1.2283203601837158, "logits/rejected": -1.105078101158142, "logps/chosen": -0.7110351324081421, "logps/rejected": -1.6707031726837158, "loss": 0.8295, "nll_loss": 0.7764648199081421, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.07109375298023224, "rewards/margins": 0.09604492038488388, "rewards/rejected": -0.16716308891773224, "step": 8710 }, { "epoch": 0.6622365673058667, "grad_norm": 2.019694234201137, "learning_rate": 8.567058737562385e-07, "log_odds_chosen": 1.404394507408142, "log_odds_ratio": -0.47314453125, "logits/chosen": -1.283593773841858, "logits/rejected": -1.070898413658142, "logps/chosen": -0.768847644329071, "logps/rejected": -1.801171898841858, "loss": 0.8682, "nll_loss": 0.7806640863418579, "rewards/accuracies": 0.75, "rewards/chosen": -0.07689209282398224, "rewards/margins": 0.10318603366613388, "rewards/rejected": -0.18000487983226776, "step": 8720 }, { "epoch": 0.6629960129105753, "grad_norm": 1.8103234810397246, "learning_rate": 8.562150654239141e-07, "log_odds_chosen": 1.585546851158142, "log_odds_ratio": -0.4041503965854645, "logits/chosen": -1.249414086341858, "logits/rejected": -1.077539086341858, "logps/chosen": -0.7012695074081421, "logps/rejected": -1.876953125, "loss": 0.8582, "nll_loss": 0.805859386920929, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.07016601413488388, "rewards/margins": 0.11749877780675888, "rewards/rejected": -0.18759766221046448, "step": 8730 }, { "epoch": 0.6637554585152838, "grad_norm": 1.7216869039288665, "learning_rate": 8.55725099681116e-07, "log_odds_chosen": 1.1668822765350342, "log_odds_ratio": -0.4730468690395355, "logits/chosen": -1.2228515148162842, "logits/rejected": -1.0685546398162842, "logps/chosen": -0.707324206829071, "logps/rejected": -1.563867211341858, "loss": 0.8523, "nll_loss": 0.8125976324081421, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.07069091498851776, "rewards/margins": 0.08548202365636826, "rewards/rejected": -0.15622559189796448, "step": 8740 }, { "epoch": 0.6645149041199924, "grad_norm": 1.8356730438715008, "learning_rate": 8.552359741197579e-07, "log_odds_chosen": 1.6085937023162842, "log_odds_ratio": -0.3914550840854645, "logits/chosen": -1.240234375, "logits/rejected": -1.063085913658142, "logps/chosen": -0.700390636920929, "logps/rejected": -1.885156273841858, "loss": 0.8756, "nll_loss": 0.8265625238418579, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.07009277492761612, "rewards/margins": 0.11851195991039276, "rewards/rejected": -0.18857422471046448, "step": 8750 }, { "epoch": 0.665274349724701, "grad_norm": 2.013490070453226, "learning_rate": 8.547476863413765e-07, "log_odds_chosen": 1.6863281726837158, "log_odds_ratio": -0.41425782442092896, "logits/chosen": -1.2482421398162842, "logits/rejected": -1.0802733898162842, "logps/chosen": -0.7109375, "logps/rejected": -1.9386718273162842, "loss": 0.8342, "nll_loss": 0.7884765863418579, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.07115478813648224, "rewards/margins": 0.12282714992761612, "rewards/rejected": -0.19404296576976776, "step": 8760 }, { "epoch": 0.6660337953294095, "grad_norm": 2.3679019713184477, "learning_rate": 8.54260233957083e-07, "log_odds_chosen": 1.3140380382537842, "log_odds_ratio": -0.39716798067092896, "logits/chosen": -1.307226538658142, "logits/rejected": -1.125390648841858, "logps/chosen": -0.666308581829071, "logps/rejected": -1.5642578601837158, "loss": 0.8527, "nll_loss": 0.7774413824081421, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06666259467601776, "rewards/margins": 0.08972778171300888, "rewards/rejected": -0.15639647841453552, "step": 8770 }, { "epoch": 0.6667932409341181, "grad_norm": 1.511706887083758, "learning_rate": 8.537736145875154e-07, "log_odds_chosen": 1.5632812976837158, "log_odds_ratio": -0.3960937559604645, "logits/chosen": -1.2166016101837158, "logits/rejected": -1.085546851158142, "logps/chosen": -0.7216796875, "logps/rejected": -1.8703124523162842, "loss": 0.8474, "nll_loss": 0.880175769329071, "rewards/accuracies": 0.78125, "rewards/chosen": -0.07221679389476776, "rewards/margins": 0.11490478366613388, "rewards/rejected": -0.18706054985523224, "step": 8780 }, { "epoch": 0.6675526865388267, "grad_norm": 2.1956758252615645, "learning_rate": 8.532878258627874e-07, "log_odds_chosen": 1.6017577648162842, "log_odds_ratio": -0.3897460997104645, "logits/chosen": -1.1916015148162842, "logits/rejected": -1.039648413658142, "logps/chosen": -0.677734375, "logps/rejected": -1.87109375, "loss": 0.8554, "nll_loss": 0.806640625, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.0677490234375, "rewards/margins": 0.11925049126148224, "rewards/rejected": -0.18706054985523224, "step": 8790 }, { "epoch": 0.6683121321435352, "grad_norm": 1.9718947543088248, "learning_rate": 8.528028654224416e-07, "log_odds_chosen": 1.294824242591858, "log_odds_ratio": -0.4576660096645355, "logits/chosen": -1.173828125, "logits/rejected": -1.037109375, "logps/chosen": -0.7769531011581421, "logps/rejected": -1.7138671875, "loss": 0.8542, "nll_loss": 0.8177734613418579, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.07774658501148224, "rewards/margins": 0.0937095656991005, "rewards/rejected": -0.17148438096046448, "step": 8800 }, { "epoch": 0.6690715777482438, "grad_norm": 1.6039782287741153, "learning_rate": 8.523187309154008e-07, "log_odds_chosen": 1.7234375476837158, "log_odds_ratio": -0.35053712129592896, "logits/chosen": -1.2097656726837158, "logits/rejected": -1.0626952648162842, "logps/chosen": -0.628222644329071, "logps/rejected": -1.842187523841858, "loss": 0.8644, "nll_loss": 0.8275390863418579, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -0.06280517578125, "rewards/margins": 0.12144775688648224, "rewards/rejected": -0.18425293266773224, "step": 8810 }, { "epoch": 0.6698310233529523, "grad_norm": 2.1538447837372288, "learning_rate": 8.518354199999198e-07, "log_odds_chosen": 1.583593726158142, "log_odds_ratio": -0.3662109375, "logits/chosen": -1.3312499523162842, "logits/rejected": -1.117578148841858, "logps/chosen": -0.6786133050918579, "logps/rejected": -1.8171875476837158, "loss": 0.8502, "nll_loss": 0.82421875, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -0.06778564304113388, "rewards/margins": 0.11392822116613388, "rewards/rejected": -0.18183593451976776, "step": 8820 }, { "epoch": 0.670590468957661, "grad_norm": 1.8619568560512132, "learning_rate": 8.513529303435386e-07, "log_odds_chosen": 1.8123047351837158, "log_odds_ratio": -0.38164061307907104, "logits/chosen": -1.1740233898162842, "logits/rejected": -1.0539062023162842, "logps/chosen": -0.689257800579071, "logps/rejected": -2.055468797683716, "loss": 0.8404, "nll_loss": 0.784375011920929, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -0.06890869140625, "rewards/margins": 0.13663940131664276, "rewards/rejected": -0.20559081435203552, "step": 8830 }, { "epoch": 0.6713499145623695, "grad_norm": 2.6324728422293795, "learning_rate": 8.50871259623034e-07, "log_odds_chosen": 1.256933569908142, "log_odds_ratio": -0.45698243379592896, "logits/chosen": -1.1134765148162842, "logits/rejected": -1.021484375, "logps/chosen": -0.705859363079071, "logps/rejected": -1.578515648841858, "loss": 0.8611, "nll_loss": 0.778124988079071, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.07056884467601776, "rewards/margins": 0.08726348727941513, "rewards/rejected": -0.15776367485523224, "step": 8840 }, { "epoch": 0.672109360167078, "grad_norm": 1.556077038911895, "learning_rate": 8.503904055243742e-07, "log_odds_chosen": 1.58984375, "log_odds_ratio": -0.37451171875, "logits/chosen": -1.173242211341858, "logits/rejected": -1.030859351158142, "logps/chosen": -0.639355480670929, "logps/rejected": -1.771484375, "loss": 0.8607, "nll_loss": 0.7972656488418579, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.06391601264476776, "rewards/margins": 0.11323241889476776, "rewards/rejected": -0.17705078423023224, "step": 8850 }, { "epoch": 0.6728688057717866, "grad_norm": 1.8669358811429273, "learning_rate": 8.499103657426704e-07, "log_odds_chosen": 1.6242187023162842, "log_odds_ratio": -0.35810548067092896, "logits/chosen": -1.1564452648162842, "logits/rejected": -1.0271484851837158, "logps/chosen": -0.662109375, "logps/rejected": -1.802734375, "loss": 0.8439, "nll_loss": 0.7662109136581421, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.06621094048023224, "rewards/margins": 0.11405029147863388, "rewards/rejected": -0.1802978515625, "step": 8860 }, { "epoch": 0.6736282513764952, "grad_norm": 1.7940840380774576, "learning_rate": 8.494311379821314e-07, "log_odds_chosen": 1.455957055091858, "log_odds_ratio": -0.38525390625, "logits/chosen": -1.277734398841858, "logits/rejected": -1.1222655773162842, "logps/chosen": -0.6900390386581421, "logps/rejected": -1.732421875, "loss": 0.8554, "nll_loss": 0.7857421636581421, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.06899414211511612, "rewards/margins": 0.10431823879480362, "rewards/rejected": -0.17338867485523224, "step": 8870 }, { "epoch": 0.6743876969812037, "grad_norm": 1.6842863023537538, "learning_rate": 8.489527199560178e-07, "log_odds_chosen": 1.3297119140625, "log_odds_ratio": -0.44189453125, "logits/chosen": -1.3359375, "logits/rejected": -1.181054711341858, "logps/chosen": -0.729199230670929, "logps/rejected": -1.685156226158142, "loss": 0.8553, "nll_loss": 0.827343761920929, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.07291259616613388, "rewards/margins": 0.09567108005285263, "rewards/rejected": -0.1685791015625, "step": 8880 }, { "epoch": 0.6751471425859122, "grad_norm": 2.0811647525581964, "learning_rate": 8.484751093865948e-07, "log_odds_chosen": 1.6076171398162842, "log_odds_ratio": -0.40625, "logits/chosen": -1.276757836341858, "logits/rejected": -1.1365234851837158, "logps/chosen": -0.7373046875, "logps/rejected": -1.987695336341858, "loss": 0.8935, "nll_loss": 0.821093738079071, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.07369384914636612, "rewards/margins": 0.12492065131664276, "rewards/rejected": -0.1986083984375, "step": 8890 }, { "epoch": 0.6759065881906209, "grad_norm": 1.84698604747761, "learning_rate": 8.47998304005088e-07, "log_odds_chosen": 1.350195288658142, "log_odds_ratio": -0.45268553495407104, "logits/chosen": -1.1818358898162842, "logits/rejected": -1.0226562023162842, "logps/chosen": -0.6884765625, "logps/rejected": -1.652929663658142, "loss": 0.8594, "nll_loss": 0.815625011920929, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.06883545219898224, "rewards/margins": 0.096435546875, "rewards/rejected": -0.16520996391773224, "step": 8900 }, { "epoch": 0.6766660337953294, "grad_norm": 1.9668100900950947, "learning_rate": 8.475223015516377e-07, "log_odds_chosen": 1.2578125, "log_odds_ratio": -0.458984375, "logits/chosen": -1.309960961341858, "logits/rejected": -1.111328125, "logps/chosen": -0.7037109136581421, "logps/rejected": -1.603124976158142, "loss": 0.8506, "nll_loss": 0.8187500238418579, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.07038573920726776, "rewards/margins": 0.08997802436351776, "rewards/rejected": -0.16025391221046448, "step": 8910 }, { "epoch": 0.6774254794000379, "grad_norm": 1.6052095705161575, "learning_rate": 8.470470997752534e-07, "log_odds_chosen": 1.349023461341858, "log_odds_ratio": -0.4310546815395355, "logits/chosen": -1.2498047351837158, "logits/rejected": -1.0974609851837158, "logps/chosen": -0.6962890625, "logps/rejected": -1.6474609375, "loss": 0.8787, "nll_loss": 0.857226550579071, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.06962890923023224, "rewards/margins": 0.09519042819738388, "rewards/rejected": -0.16472168266773224, "step": 8920 }, { "epoch": 0.6781849250047466, "grad_norm": 1.913868070936609, "learning_rate": 8.465726964337702e-07, "log_odds_chosen": 1.6296875476837158, "log_odds_ratio": -0.3970703184604645, "logits/chosen": -1.3125, "logits/rejected": -1.053125023841858, "logps/chosen": -0.734570324420929, "logps/rejected": -1.924218773841858, "loss": 0.8579, "nll_loss": 0.816113293170929, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.07351074367761612, "rewards/margins": 0.11907958984375, "rewards/rejected": -0.19252929091453552, "step": 8930 }, { "epoch": 0.6789443706094551, "grad_norm": 2.0322293410439083, "learning_rate": 8.460990892938031e-07, "log_odds_chosen": 1.5529296398162842, "log_odds_ratio": -0.4059814512729645, "logits/chosen": -1.2830078601837158, "logits/rejected": -1.0978515148162842, "logps/chosen": -0.67138671875, "logps/rejected": -1.784765601158142, "loss": 0.8573, "nll_loss": 0.804882824420929, "rewards/accuracies": 0.75, "rewards/chosen": -0.06706543266773224, "rewards/margins": 0.11127929389476776, "rewards/rejected": -0.17844238877296448, "step": 8940 }, { "epoch": 0.6797038162141636, "grad_norm": 2.3164694606069776, "learning_rate": 8.456262761307038e-07, "log_odds_chosen": 1.4230468273162842, "log_odds_ratio": -0.3865722715854645, "logits/chosen": -1.1462891101837158, "logits/rejected": -1.047265648841858, "logps/chosen": -0.653027355670929, "logps/rejected": -1.6707031726837158, "loss": 0.876, "nll_loss": 0.816210925579071, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -0.06533203274011612, "rewards/margins": 0.10169677436351776, "rewards/rejected": -0.16706542670726776, "step": 8950 }, { "epoch": 0.6804632618188722, "grad_norm": 1.886625921509163, "learning_rate": 8.451542547285166e-07, "log_odds_chosen": 1.287011742591858, "log_odds_ratio": -0.4500976502895355, "logits/chosen": -1.294921875, "logits/rejected": -1.0867187976837158, "logps/chosen": -0.713085949420929, "logps/rejected": -1.6015625, "loss": 0.8575, "nll_loss": 0.8218749761581421, "rewards/accuracies": 0.78125, "rewards/chosen": -0.07126464694738388, "rewards/margins": 0.08902435004711151, "rewards/rejected": -0.16032715141773224, "step": 8960 }, { "epoch": 0.6812227074235808, "grad_norm": 2.336527453632272, "learning_rate": 8.44683022879934e-07, "log_odds_chosen": 1.3084228038787842, "log_odds_ratio": -0.4517578184604645, "logits/chosen": -1.293359398841858, "logits/rejected": -1.0791015625, "logps/chosen": -0.6878906488418579, "logps/rejected": -1.6248047351837158, "loss": 0.8566, "nll_loss": 0.8636718988418579, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.06881103664636612, "rewards/margins": 0.09365234524011612, "rewards/rejected": -0.16242675483226776, "step": 8970 }, { "epoch": 0.6819821530282894, "grad_norm": 1.8615355347707325, "learning_rate": 8.442125783862544e-07, "log_odds_chosen": 1.509765625, "log_odds_ratio": -0.371337890625, "logits/chosen": -1.310156226158142, "logits/rejected": -1.081445336341858, "logps/chosen": -0.702929675579071, "logps/rejected": -1.7667968273162842, "loss": 0.8266, "nll_loss": 0.857128918170929, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.07034911960363388, "rewards/margins": 0.10633544623851776, "rewards/rejected": -0.1766357421875, "step": 8980 }, { "epoch": 0.6827415986329979, "grad_norm": 1.8348565016949345, "learning_rate": 8.437429190573388e-07, "log_odds_chosen": 1.6455078125, "log_odds_ratio": -0.337158203125, "logits/chosen": -1.335351586341858, "logits/rejected": -1.107812523841858, "logps/chosen": -0.645312488079071, "logps/rejected": -1.769140601158142, "loss": 0.8411, "nll_loss": 0.770703136920929, "rewards/accuracies": 0.862500011920929, "rewards/chosen": -0.06450195610523224, "rewards/margins": 0.11247558891773224, "rewards/rejected": -0.1768798828125, "step": 8990 }, { "epoch": 0.6835010442377065, "grad_norm": 2.4401841705393816, "learning_rate": 8.432740427115678e-07, "log_odds_chosen": 1.5046875476837158, "log_odds_ratio": -0.40644532442092896, "logits/chosen": -1.318945288658142, "logits/rejected": -1.121679663658142, "logps/chosen": -0.7242187261581421, "logps/rejected": -1.813867211341858, "loss": 0.8431, "nll_loss": 0.7943359613418579, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.07239989936351776, "rewards/margins": 0.10901489108800888, "rewards/rejected": -0.18144531548023224, "step": 9000 }, { "epoch": 0.684260489842415, "grad_norm": 2.5303364241033504, "learning_rate": 8.428059471757984e-07, "log_odds_chosen": 1.514257788658142, "log_odds_ratio": -0.40092772245407104, "logits/chosen": -1.180078148841858, "logits/rejected": -1.008203148841858, "logps/chosen": -0.6644531488418579, "logps/rejected": -1.7306640148162842, "loss": 0.8387, "nll_loss": 0.806835949420929, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.06646728515625, "rewards/margins": 0.10664062201976776, "rewards/rejected": -0.17302246391773224, "step": 9010 }, { "epoch": 0.6850199354471236, "grad_norm": 5.62495236320242, "learning_rate": 8.423386302853226e-07, "log_odds_chosen": 1.5607421398162842, "log_odds_ratio": -0.3729492127895355, "logits/chosen": -1.177734375, "logits/rejected": -1.0076172351837158, "logps/chosen": -0.70556640625, "logps/rejected": -1.8523437976837158, "loss": 0.8492, "nll_loss": 0.7992187738418579, "rewards/accuracies": 0.8125, "rewards/chosen": -0.070556640625, "rewards/margins": 0.11466064304113388, "rewards/rejected": -0.18525390326976776, "step": 9020 }, { "epoch": 0.6857793810518321, "grad_norm": 1.6905604228252527, "learning_rate": 8.418720898838254e-07, "log_odds_chosen": 1.6921875476837158, "log_odds_ratio": -0.3948730528354645, "logits/chosen": -1.2033202648162842, "logits/rejected": -1.014062523841858, "logps/chosen": -0.7183593511581421, "logps/rejected": -2.003124952316284, "loss": 0.8532, "nll_loss": 0.892578125, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.07185058295726776, "rewards/margins": 0.12843017280101776, "rewards/rejected": -0.20009765028953552, "step": 9030 }, { "epoch": 0.6865388266565408, "grad_norm": 1.815273120814082, "learning_rate": 8.414063238233425e-07, "log_odds_chosen": 1.4768555164337158, "log_odds_ratio": -0.3927246034145355, "logits/chosen": -1.372656226158142, "logits/rejected": -1.1083984375, "logps/chosen": -0.6763671636581421, "logps/rejected": -1.754296898841858, "loss": 0.8545, "nll_loss": 0.8705078363418579, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.06760253757238388, "rewards/margins": 0.10790710151195526, "rewards/rejected": -0.17548827826976776, "step": 9040 }, { "epoch": 0.6872982722612493, "grad_norm": 1.7493903538914637, "learning_rate": 8.409413299642188e-07, "log_odds_chosen": 1.546972632408142, "log_odds_ratio": -0.3753418028354645, "logits/chosen": -1.280664086341858, "logits/rejected": -1.067968726158142, "logps/chosen": -0.645214855670929, "logps/rejected": -1.740234375, "loss": 0.8732, "nll_loss": 0.719042956829071, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.06456299126148224, "rewards/margins": 0.10957030951976776, "rewards/rejected": -0.17414550483226776, "step": 9050 }, { "epoch": 0.6880577178659578, "grad_norm": 1.9074708092997414, "learning_rate": 8.404771061750672e-07, "log_odds_chosen": 1.358129858970642, "log_odds_ratio": -0.464111328125, "logits/chosen": -1.3125, "logits/rejected": -1.126953125, "logps/chosen": -0.726757824420929, "logps/rejected": -1.70703125, "loss": 0.825, "nll_loss": 0.852734386920929, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.0726318359375, "rewards/margins": 0.09812621772289276, "rewards/rejected": -0.17080077528953552, "step": 9060 }, { "epoch": 0.6888171634706665, "grad_norm": 2.5343641776403545, "learning_rate": 8.400136503327277e-07, "log_odds_chosen": 1.6789062023162842, "log_odds_ratio": -0.33955079317092896, "logits/chosen": -1.261328101158142, "logits/rejected": -1.052734375, "logps/chosen": -0.658398449420929, "logps/rejected": -1.8585937023162842, "loss": 0.8595, "nll_loss": 0.823925793170929, "rewards/accuracies": 0.862500011920929, "rewards/chosen": -0.06588134914636612, "rewards/margins": 0.12004394829273224, "rewards/rejected": -0.18571777641773224, "step": 9070 }, { "epoch": 0.689576609075375, "grad_norm": 1.7037240039640928, "learning_rate": 8.395509603222271e-07, "log_odds_chosen": 1.6989257335662842, "log_odds_ratio": -0.3265624940395355, "logits/chosen": -1.2470703125, "logits/rejected": -1.021093726158142, "logps/chosen": -0.6700195074081421, "logps/rejected": -1.8835937976837158, "loss": 0.8397, "nll_loss": 0.811816394329071, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -0.06699218600988388, "rewards/margins": 0.12136230617761612, "rewards/rejected": -0.18837890028953552, "step": 9080 }, { "epoch": 0.6903360546800835, "grad_norm": 1.9941967760617403, "learning_rate": 8.390890340367368e-07, "log_odds_chosen": 1.6064453125, "log_odds_ratio": -0.4085937440395355, "logits/chosen": -1.2097656726837158, "logits/rejected": -1.0031249523162842, "logps/chosen": -0.706347644329071, "logps/rejected": -1.896484375, "loss": 0.8704, "nll_loss": 0.865917980670929, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.07069091498851776, "rewards/margins": 0.11895751953125, "rewards/rejected": -0.18950195610523224, "step": 9090 }, { "epoch": 0.691095500284792, "grad_norm": 1.7594293263884053, "learning_rate": 8.386278693775346e-07, "log_odds_chosen": 1.868261694908142, "log_odds_ratio": -0.328125, "logits/chosen": -1.222070336341858, "logits/rejected": -1.046289086341858, "logps/chosen": -0.705273449420929, "logps/rejected": -2.073437452316284, "loss": 0.8763, "nll_loss": 0.922656238079071, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.07053222507238388, "rewards/margins": 0.13690796494483948, "rewards/rejected": -0.20732422173023224, "step": 9100 }, { "epoch": 0.6918549458895007, "grad_norm": 1.6134931068629057, "learning_rate": 8.381674642539632e-07, "log_odds_chosen": 1.5260741710662842, "log_odds_ratio": -0.4007812440395355, "logits/chosen": -1.2029297351837158, "logits/rejected": -1.016992211341858, "logps/chosen": -0.724609375, "logps/rejected": -1.8015625476837158, "loss": 0.8597, "nll_loss": 0.7930663824081421, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.072509765625, "rewards/margins": 0.10764160007238388, "rewards/rejected": -0.18015137314796448, "step": 9110 }, { "epoch": 0.6926143914942092, "grad_norm": 2.024763906281436, "learning_rate": 8.37707816583391e-07, "log_odds_chosen": 1.5656249523162842, "log_odds_ratio": -0.4073730409145355, "logits/chosen": -1.263085961341858, "logits/rejected": -1.102929711341858, "logps/chosen": -0.716015636920929, "logps/rejected": -1.8582031726837158, "loss": 0.8531, "nll_loss": 0.7847656011581421, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -0.07164306938648224, "rewards/margins": 0.11433105170726776, "rewards/rejected": -0.18598632514476776, "step": 9120 }, { "epoch": 0.6933738370989178, "grad_norm": 2.0171847091201056, "learning_rate": 8.372489242911724e-07, "log_odds_chosen": 1.4443359375, "log_odds_ratio": -0.44487303495407104, "logits/chosen": -1.1710937023162842, "logits/rejected": -1.070898413658142, "logps/chosen": -0.744824230670929, "logps/rejected": -1.8019530773162842, "loss": 0.8283, "nll_loss": 0.839062511920929, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.07454834133386612, "rewards/margins": 0.10557861626148224, "rewards/rejected": -0.18012695014476776, "step": 9130 }, { "epoch": 0.6941332827036264, "grad_norm": 2.393740130747864, "learning_rate": 8.367907853106078e-07, "log_odds_chosen": 1.9191405773162842, "log_odds_ratio": -0.32341307401657104, "logits/chosen": -1.231835961341858, "logits/rejected": -1.009374976158142, "logps/chosen": -0.7149413824081421, "logps/rejected": -2.108593702316284, "loss": 0.8263, "nll_loss": 0.7875000238418579, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -0.07145996391773224, "rewards/margins": 0.13937988877296448, "rewards/rejected": -0.21054688096046448, "step": 9140 }, { "epoch": 0.6948927283083349, "grad_norm": 1.6720306286626572, "learning_rate": 8.363333975829066e-07, "log_odds_chosen": 1.7646484375, "log_odds_ratio": -0.33024901151657104, "logits/chosen": -1.244531273841858, "logits/rejected": -1.0499999523162842, "logps/chosen": -0.6219726800918579, "logps/rejected": -1.859765648841858, "loss": 0.8591, "nll_loss": 0.7925781011581421, "rewards/accuracies": 0.856249988079071, "rewards/chosen": -0.06223144382238388, "rewards/margins": 0.12393798679113388, "rewards/rejected": -0.18603515625, "step": 9150 }, { "epoch": 0.6956521739130435, "grad_norm": 2.239467360839852, "learning_rate": 8.358767590571457e-07, "log_odds_chosen": 1.4714844226837158, "log_odds_ratio": -0.36811524629592896, "logits/chosen": -1.37109375, "logits/rejected": -1.102148413658142, "logps/chosen": -0.730664074420929, "logps/rejected": -1.780859351158142, "loss": 0.8686, "nll_loss": 0.946484386920929, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.07303466647863388, "rewards/margins": 0.10495605319738388, "rewards/rejected": -0.17814941704273224, "step": 9160 }, { "epoch": 0.696411619517752, "grad_norm": 1.9844696597106122, "learning_rate": 8.354208676902326e-07, "log_odds_chosen": 1.508203148841858, "log_odds_ratio": -0.40966796875, "logits/chosen": -1.150781273841858, "logits/rejected": -1.0001952648162842, "logps/chosen": -0.6533203125, "logps/rejected": -1.737890601158142, "loss": 0.862, "nll_loss": 0.7816406488418579, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.06538085639476776, "rewards/margins": 0.10834427177906036, "rewards/rejected": -0.17388916015625, "step": 9170 }, { "epoch": 0.6971710651224606, "grad_norm": 1.7014000855069646, "learning_rate": 8.349657214468659e-07, "log_odds_chosen": 1.5212891101837158, "log_odds_ratio": -0.3971191346645355, "logits/chosen": -1.2546875476837158, "logits/rejected": -1.093359351158142, "logps/chosen": -0.695507824420929, "logps/rejected": -1.8136718273162842, "loss": 0.8485, "nll_loss": 0.7486327886581421, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.06955566257238388, "rewards/margins": 0.11174316704273224, "rewards/rejected": -0.18132324516773224, "step": 9180 }, { "epoch": 0.6979305107271692, "grad_norm": 1.6421272716329032, "learning_rate": 8.345113182994988e-07, "log_odds_chosen": 1.452539086341858, "log_odds_ratio": -0.41215819120407104, "logits/chosen": -1.2111327648162842, "logits/rejected": -1.017968773841858, "logps/chosen": -0.754687488079071, "logps/rejected": -1.8175780773162842, "loss": 0.8482, "nll_loss": 0.845019519329071, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.07545165717601776, "rewards/margins": 0.10628662258386612, "rewards/rejected": -0.18171386420726776, "step": 9190 }, { "epoch": 0.6986899563318777, "grad_norm": 2.9418671670365013, "learning_rate": 8.34057656228299e-07, "log_odds_chosen": 1.4816405773162842, "log_odds_ratio": -0.41015625, "logits/chosen": -1.255273461341858, "logits/rejected": -1.115625023841858, "logps/chosen": -0.6834961175918579, "logps/rejected": -1.759179711341858, "loss": 0.8633, "nll_loss": 0.7162109613418579, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.06829833984375, "rewards/margins": 0.10756225883960724, "rewards/rejected": -0.17587891221046448, "step": 9200 }, { "epoch": 0.6994494019365863, "grad_norm": 1.7830250665625063, "learning_rate": 8.336047332211128e-07, "log_odds_chosen": 1.574804663658142, "log_odds_ratio": -0.3819335997104645, "logits/chosen": -1.3093750476837158, "logits/rejected": -1.096093773841858, "logps/chosen": -0.729785144329071, "logps/rejected": -1.8992187976837158, "loss": 0.8327, "nll_loss": 0.8402343988418579, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.07301025092601776, "rewards/margins": 0.11690063774585724, "rewards/rejected": -0.18994140625, "step": 9210 }, { "epoch": 0.7002088475412949, "grad_norm": 1.699938395595441, "learning_rate": 8.331525472734267e-07, "log_odds_chosen": 1.502832055091858, "log_odds_ratio": -0.42158204317092896, "logits/chosen": -1.2419922351837158, "logits/rejected": -1.103515625, "logps/chosen": -0.6634765863418579, "logps/rejected": -1.7253906726837158, "loss": 0.8317, "nll_loss": 0.7789062261581421, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.06627197563648224, "rewards/margins": 0.10634765774011612, "rewards/rejected": -0.17265625298023224, "step": 9220 }, { "epoch": 0.7009682931460034, "grad_norm": 1.793341700009259, "learning_rate": 8.327010963883302e-07, "log_odds_chosen": 1.3875000476837158, "log_odds_ratio": -0.42119139432907104, "logits/chosen": -1.388085961341858, "logits/rejected": -1.13671875, "logps/chosen": -0.7523437738418579, "logps/rejected": -1.741796851158142, "loss": 0.8663, "nll_loss": 0.8076171875, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.07524414360523224, "rewards/margins": 0.09896240383386612, "rewards/rejected": -0.17426757514476776, "step": 9230 }, { "epoch": 0.7017277387507119, "grad_norm": 1.9236112340860407, "learning_rate": 8.322503785764789e-07, "log_odds_chosen": 1.6037108898162842, "log_odds_ratio": -0.38032227754592896, "logits/chosen": -1.2003905773162842, "logits/rejected": -1.0080077648162842, "logps/chosen": -0.69873046875, "logps/rejected": -1.871484398841858, "loss": 0.8413, "nll_loss": 0.839062511920929, "rewards/accuracies": 0.8125, "rewards/chosen": -0.06989745795726776, "rewards/margins": 0.11723633110523224, "rewards/rejected": -0.18720702826976776, "step": 9240 }, { "epoch": 0.7024871843554206, "grad_norm": 2.063473746288346, "learning_rate": 8.318003918560583e-07, "log_odds_chosen": 1.414697289466858, "log_odds_ratio": -0.451171875, "logits/chosen": -1.2664062976837158, "logits/rejected": -1.111328125, "logps/chosen": -0.7393554449081421, "logps/rejected": -1.8019530773162842, "loss": 0.8797, "nll_loss": 0.8392578363418579, "rewards/accuracies": 0.75, "rewards/chosen": -0.07398681342601776, "rewards/margins": 0.10618285834789276, "rewards/rejected": -0.1800537109375, "step": 9250 }, { "epoch": 0.7032466299601291, "grad_norm": 2.0817562133436756, "learning_rate": 8.313511342527453e-07, "log_odds_chosen": 1.449804663658142, "log_odds_ratio": -0.3965820372104645, "logits/chosen": -1.204492211341858, "logits/rejected": -1.038671851158142, "logps/chosen": -0.690625011920929, "logps/rejected": -1.733984351158142, "loss": 0.8465, "nll_loss": 0.79736328125, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.06907959282398224, "rewards/margins": 0.10433349758386612, "rewards/rejected": -0.17336425185203552, "step": 9260 }, { "epoch": 0.7040060755648376, "grad_norm": 1.796507712069137, "learning_rate": 8.309026037996745e-07, "log_odds_chosen": 1.662207007408142, "log_odds_ratio": -0.40703123807907104, "logits/chosen": -1.142968773841858, "logits/rejected": -1.0408203601837158, "logps/chosen": -0.6949218511581421, "logps/rejected": -1.9249999523162842, "loss": 0.8443, "nll_loss": 0.815136730670929, "rewards/accuracies": 0.78125, "rewards/chosen": -0.06947021186351776, "rewards/margins": 0.12286682426929474, "rewards/rejected": -0.19252929091453552, "step": 9270 }, { "epoch": 0.7047655211695463, "grad_norm": 2.063076676153399, "learning_rate": 8.304547985373996e-07, "log_odds_chosen": 1.537695288658142, "log_odds_ratio": -0.40229493379592896, "logits/chosen": -1.379492163658142, "logits/rejected": -1.137304663658142, "logps/chosen": -0.6495116949081421, "logps/rejected": -1.7439453601837158, "loss": 0.8315, "nll_loss": 0.775390625, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.06490478664636612, "rewards/margins": 0.10950927436351776, "rewards/rejected": -0.17446288466453552, "step": 9280 }, { "epoch": 0.7055249667742548, "grad_norm": 1.6747583604129352, "learning_rate": 8.300077165138592e-07, "log_odds_chosen": 1.8134765625, "log_odds_ratio": -0.36689454317092896, "logits/chosen": -1.155859351158142, "logits/rejected": -0.994921863079071, "logps/chosen": -0.703320324420929, "logps/rejected": -2.076953172683716, "loss": 0.8453, "nll_loss": 0.8333984613418579, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.07036133110523224, "rewards/margins": 0.13723143935203552, "rewards/rejected": -0.20759277045726776, "step": 9290 }, { "epoch": 0.7062844123789633, "grad_norm": 1.832435769509851, "learning_rate": 8.295613557843402e-07, "log_odds_chosen": 1.724023461341858, "log_odds_ratio": -0.3955078125, "logits/chosen": -1.2550780773162842, "logits/rejected": -1.040429711341858, "logps/chosen": -0.6810547113418579, "logps/rejected": -1.91796875, "loss": 0.8358, "nll_loss": 0.8042968511581421, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -0.06810303032398224, "rewards/margins": 0.12376709282398224, "rewards/rejected": -0.19179686903953552, "step": 9300 }, { "epoch": 0.7070438579836719, "grad_norm": 1.7940835781204116, "learning_rate": 8.291157144114419e-07, "log_odds_chosen": 1.7205078601837158, "log_odds_ratio": -0.38505858182907104, "logits/chosen": -1.258398413658142, "logits/rejected": -1.0773437023162842, "logps/chosen": -0.621386706829071, "logps/rejected": -1.8839843273162842, "loss": 0.8442, "nll_loss": 0.751171886920929, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.0621337890625, "rewards/margins": 0.12613525986671448, "rewards/rejected": -0.18828125298023224, "step": 9310 }, { "epoch": 0.7078033035883805, "grad_norm": 2.025978837874131, "learning_rate": 8.286707904650417e-07, "log_odds_chosen": 1.523046851158142, "log_odds_ratio": -0.41704100370407104, "logits/chosen": -1.3203125, "logits/rejected": -1.100976586341858, "logps/chosen": -0.715624988079071, "logps/rejected": -1.7785155773162842, "loss": 0.837, "nll_loss": 0.87109375, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.07160644233226776, "rewards/margins": 0.10624084621667862, "rewards/rejected": -0.1778564453125, "step": 9320 }, { "epoch": 0.708562749193089, "grad_norm": 1.8079979296804594, "learning_rate": 8.282265820222593e-07, "log_odds_chosen": 1.52587890625, "log_odds_ratio": -0.3924804627895355, "logits/chosen": -1.327734351158142, "logits/rejected": -1.174218773841858, "logps/chosen": -0.656542956829071, "logps/rejected": -1.7296874523162842, "loss": 0.8293, "nll_loss": 0.7383788824081421, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06563720852136612, "rewards/margins": 0.10716553032398224, "rewards/rejected": -0.17277832329273224, "step": 9330 }, { "epoch": 0.7093221947977976, "grad_norm": 2.0420722174470485, "learning_rate": 8.277830871674222e-07, "log_odds_chosen": 1.7628905773162842, "log_odds_ratio": -0.35419923067092896, "logits/chosen": -1.216406226158142, "logits/rejected": -1.021484375, "logps/chosen": -0.6954101324081421, "logps/rejected": -2.0078125, "loss": 0.8399, "nll_loss": 0.7674804925918579, "rewards/accuracies": 0.8125, "rewards/chosen": -0.06960449367761612, "rewards/margins": 0.13129882514476776, "rewards/rejected": -0.20085449516773224, "step": 9340 }, { "epoch": 0.7100816404025062, "grad_norm": 1.8741918036670322, "learning_rate": 8.273403039920306e-07, "log_odds_chosen": 1.4140625, "log_odds_ratio": -0.42045897245407104, "logits/chosen": -1.265039086341858, "logits/rejected": -1.0810546875, "logps/chosen": -0.674023449420929, "logps/rejected": -1.6417968273162842, "loss": 0.8603, "nll_loss": 0.8045898675918579, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.06740722805261612, "rewards/margins": 0.09681396186351776, "rewards/rejected": -0.16423340141773224, "step": 9350 }, { "epoch": 0.7108410860072147, "grad_norm": 2.894565623437091, "learning_rate": 8.268982305947231e-07, "log_odds_chosen": 1.3358886241912842, "log_odds_ratio": -0.41328126192092896, "logits/chosen": -1.2902343273162842, "logits/rejected": -1.0792968273162842, "logps/chosen": -0.680957019329071, "logps/rejected": -1.598242163658142, "loss": 0.8352, "nll_loss": 0.79736328125, "rewards/accuracies": 0.8125, "rewards/chosen": -0.06805419921875, "rewards/margins": 0.091644287109375, "rewards/rejected": -0.1597900390625, "step": 9360 }, { "epoch": 0.7116005316119233, "grad_norm": 2.808401024799505, "learning_rate": 8.264568650812423e-07, "log_odds_chosen": 1.6865234375, "log_odds_ratio": -0.4326171875, "logits/chosen": -1.13671875, "logits/rejected": -0.9585937261581421, "logps/chosen": -0.718945324420929, "logps/rejected": -1.970312476158142, "loss": 0.8259, "nll_loss": 0.8060547113418579, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.07194824516773224, "rewards/margins": 0.125, "rewards/rejected": -0.19687500596046448, "step": 9370 }, { "epoch": 0.7123599772166319, "grad_norm": 2.1004913679817845, "learning_rate": 8.26016205564401e-07, "log_odds_chosen": 1.6632812023162842, "log_odds_ratio": -0.3443359434604645, "logits/chosen": -1.280859351158142, "logits/rejected": -1.0857422351837158, "logps/chosen": -0.683789074420929, "logps/rejected": -1.8757812976837158, "loss": 0.8566, "nll_loss": 0.8812500238418579, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.06845702975988388, "rewards/margins": 0.11937256157398224, "rewards/rejected": -0.187744140625, "step": 9380 }, { "epoch": 0.7131194228213404, "grad_norm": 1.808447866229622, "learning_rate": 8.25576250164048e-07, "log_odds_chosen": 1.50244140625, "log_odds_ratio": -0.38569337129592896, "logits/chosen": -1.1798827648162842, "logits/rejected": -0.988085925579071, "logps/chosen": -0.704296886920929, "logps/rejected": -1.831640601158142, "loss": 0.8753, "nll_loss": 0.80859375, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -0.07038573920726776, "rewards/margins": 0.11273803561925888, "rewards/rejected": -0.18303222954273224, "step": 9390 }, { "epoch": 0.713878868426049, "grad_norm": 1.8961987023730806, "learning_rate": 8.251369970070346e-07, "log_odds_chosen": 1.38134765625, "log_odds_ratio": -0.418212890625, "logits/chosen": -1.265039086341858, "logits/rejected": -1.104101538658142, "logps/chosen": -0.6612304449081421, "logps/rejected": -1.6123046875, "loss": 0.8763, "nll_loss": 0.7899414300918579, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.06613769382238388, "rewards/margins": 0.09506835788488388, "rewards/rejected": -0.16115722060203552, "step": 9400 }, { "epoch": 0.7146383140307575, "grad_norm": 2.1144402525742345, "learning_rate": 8.246984442271813e-07, "log_odds_chosen": 1.555273413658142, "log_odds_ratio": -0.37568360567092896, "logits/chosen": -1.2462890148162842, "logits/rejected": -1.0525391101837158, "logps/chosen": -0.7017577886581421, "logps/rejected": -1.8054687976837158, "loss": 0.8366, "nll_loss": 0.8173828125, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.07011719048023224, "rewards/margins": 0.1103515625, "rewards/rejected": -0.18037109076976776, "step": 9410 }, { "epoch": 0.7153977596354661, "grad_norm": 2.297888234698861, "learning_rate": 8.242605899652435e-07, "log_odds_chosen": 1.646093726158142, "log_odds_ratio": -0.37431639432907104, "logits/chosen": -1.253515601158142, "logits/rejected": -1.0857422351837158, "logps/chosen": -0.6641601324081421, "logps/rejected": -1.8699219226837158, "loss": 0.8523, "nll_loss": 0.802734375, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.06639404594898224, "rewards/margins": 0.12064208835363388, "rewards/rejected": -0.18693847954273224, "step": 9420 }, { "epoch": 0.7161572052401747, "grad_norm": 1.942727428255868, "learning_rate": 8.238234323688798e-07, "log_odds_chosen": 1.3718750476837158, "log_odds_ratio": -0.43999022245407104, "logits/chosen": -1.1730468273162842, "logits/rejected": -1.0027344226837158, "logps/chosen": -0.7500976324081421, "logps/rejected": -1.7265625, "loss": 0.8325, "nll_loss": 0.852734386920929, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.07497558742761612, "rewards/margins": 0.09764404594898224, "rewards/rejected": -0.17250975966453552, "step": 9430 }, { "epoch": 0.7169166508448832, "grad_norm": 1.6759969120283038, "learning_rate": 8.233869695926182e-07, "log_odds_chosen": 1.4997069835662842, "log_odds_ratio": -0.3736816346645355, "logits/chosen": -1.2160155773162842, "logits/rejected": -1.0851562023162842, "logps/chosen": -0.6904296875, "logps/rejected": -1.753515601158142, "loss": 0.8453, "nll_loss": 0.79345703125, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.06906738132238388, "rewards/margins": 0.10629882663488388, "rewards/rejected": -0.17536620795726776, "step": 9440 }, { "epoch": 0.7176760964495918, "grad_norm": 2.0082995169182922, "learning_rate": 8.229511997978235e-07, "log_odds_chosen": 1.470703125, "log_odds_ratio": -0.37397462129592896, "logits/chosen": -1.3048827648162842, "logits/rejected": -1.081445336341858, "logps/chosen": -0.701367199420929, "logps/rejected": -1.747656226158142, "loss": 0.8514, "nll_loss": 0.7916015386581421, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.07017822563648224, "rewards/margins": 0.10450439155101776, "rewards/rejected": -0.17475585639476776, "step": 9450 }, { "epoch": 0.7184355420543004, "grad_norm": 2.5524663676903736, "learning_rate": 8.22516121152665e-07, "log_odds_chosen": 1.743749976158142, "log_odds_ratio": -0.38530272245407104, "logits/chosen": -1.3142578601837158, "logits/rejected": -1.0978515148162842, "logps/chosen": -0.664355456829071, "logps/rejected": -1.9523437023162842, "loss": 0.8346, "nll_loss": 0.7450195550918579, "rewards/accuracies": 0.78125, "rewards/chosen": -0.06641845405101776, "rewards/margins": 0.12875977158546448, "rewards/rejected": -0.19528809189796448, "step": 9460 }, { "epoch": 0.7191949876590089, "grad_norm": 1.9030960718568661, "learning_rate": 8.220817318320836e-07, "log_odds_chosen": 1.5896484851837158, "log_odds_ratio": -0.3680664002895355, "logits/chosen": -1.2443358898162842, "logits/rejected": -1.0576171875, "logps/chosen": -0.6937500238418579, "logps/rejected": -1.826562523841858, "loss": 0.8605, "nll_loss": 0.778027355670929, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06939697265625, "rewards/margins": 0.11336670070886612, "rewards/rejected": -0.18271484971046448, "step": 9470 }, { "epoch": 0.7199544332637174, "grad_norm": 2.220152496986314, "learning_rate": 8.216480300177611e-07, "log_odds_chosen": 1.631445288658142, "log_odds_ratio": -0.38432615995407104, "logits/chosen": -1.2208983898162842, "logits/rejected": -1.02734375, "logps/chosen": -0.7137695550918579, "logps/rejected": -1.931249976158142, "loss": 0.8594, "nll_loss": 0.8314453363418579, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.07136230170726776, "rewards/margins": 0.12178955227136612, "rewards/rejected": -0.193115234375, "step": 9480 }, { "epoch": 0.7207138788684261, "grad_norm": 3.0971021370399776, "learning_rate": 8.212150138980857e-07, "log_odds_chosen": 1.6320312023162842, "log_odds_ratio": -0.3908935487270355, "logits/chosen": -1.260156273841858, "logits/rejected": -1.051171898841858, "logps/chosen": -0.702929675579071, "logps/rejected": -1.9123046398162842, "loss": 0.8597, "nll_loss": 0.690234363079071, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.0703125, "rewards/margins": 0.12093506008386612, "rewards/rejected": -0.19113770127296448, "step": 9490 }, { "epoch": 0.7214733244731346, "grad_norm": 2.5505151869494127, "learning_rate": 8.207826816681233e-07, "log_odds_chosen": 1.647070288658142, "log_odds_ratio": -0.3738769590854645, "logits/chosen": -1.2296874523162842, "logits/rejected": -1.0458984375, "logps/chosen": -0.676562488079071, "logps/rejected": -1.8914062976837158, "loss": 0.8546, "nll_loss": 0.743457019329071, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06761474907398224, "rewards/margins": 0.12148437649011612, "rewards/rejected": -0.18913574516773224, "step": 9500 }, { "epoch": 0.7222327700778431, "grad_norm": 1.954932811200942, "learning_rate": 8.203510315295829e-07, "log_odds_chosen": 1.433203101158142, "log_odds_ratio": -0.40644532442092896, "logits/chosen": -1.216796875, "logits/rejected": -1.0634765625, "logps/chosen": -0.701855480670929, "logps/rejected": -1.718164086341858, "loss": 0.8431, "nll_loss": 0.7955077886581421, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.07020263373851776, "rewards/margins": 0.10169677436351776, "rewards/rejected": -0.17182616889476776, "step": 9510 }, { "epoch": 0.7229922156825518, "grad_norm": 2.09050997664485, "learning_rate": 8.199200616907878e-07, "log_odds_chosen": 1.607421875, "log_odds_ratio": -0.36979979276657104, "logits/chosen": -1.1826171875, "logits/rejected": -1.0294921398162842, "logps/chosen": -0.6771484613418579, "logps/rejected": -1.831640601158142, "loss": 0.8084, "nll_loss": 0.740039050579071, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.06772460788488388, "rewards/margins": 0.11554565280675888, "rewards/rejected": -0.18332520127296448, "step": 9520 }, { "epoch": 0.7237516612872603, "grad_norm": 1.6410193756113376, "learning_rate": 8.194897703666421e-07, "log_odds_chosen": 1.4861328601837158, "log_odds_ratio": -0.42158204317092896, "logits/chosen": -1.263085961341858, "logits/rejected": -1.1320312023162842, "logps/chosen": -0.708789050579071, "logps/rejected": -1.7814452648162842, "loss": 0.8475, "nll_loss": 0.822070300579071, "rewards/accuracies": 0.75, "rewards/chosen": -0.07086181640625, "rewards/margins": 0.107330322265625, "rewards/rejected": -0.17814941704273224, "step": 9530 }, { "epoch": 0.7245111068919688, "grad_norm": 2.4413643185420106, "learning_rate": 8.190601557786015e-07, "log_odds_chosen": 1.4402344226837158, "log_odds_ratio": -0.42695313692092896, "logits/chosen": -1.141015648841858, "logits/rejected": -0.983593761920929, "logps/chosen": -0.7427734136581421, "logps/rejected": -1.8009765148162842, "loss": 0.832, "nll_loss": 0.8832031488418579, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.07430420070886612, "rewards/margins": 0.10582427680492401, "rewards/rejected": -0.18012695014476776, "step": 9540 }, { "epoch": 0.7252705524966774, "grad_norm": 2.4690692646824823, "learning_rate": 8.186312161546413e-07, "log_odds_chosen": 1.6965820789337158, "log_odds_ratio": -0.34663087129592896, "logits/chosen": -1.2830078601837158, "logits/rejected": -1.0421874523162842, "logps/chosen": -0.6318359375, "logps/rejected": -1.8271484375, "loss": 0.8437, "nll_loss": 0.7724609375, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -0.06314697116613388, "rewards/margins": 0.11960449069738388, "rewards/rejected": -0.182861328125, "step": 9550 }, { "epoch": 0.726029998101386, "grad_norm": 1.6291608095383956, "learning_rate": 8.182029497292262e-07, "log_odds_chosen": 1.480371117591858, "log_odds_ratio": -0.4151855409145355, "logits/chosen": -1.220312476158142, "logits/rejected": -1.052734375, "logps/chosen": -0.7032226324081421, "logps/rejected": -1.7734375, "loss": 0.8297, "nll_loss": 0.8101562261581421, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.07027588039636612, "rewards/margins": 0.10720214992761612, "rewards/rejected": -0.17746582627296448, "step": 9560 }, { "epoch": 0.7267894437060946, "grad_norm": 1.9465917165278175, "learning_rate": 8.177753547432792e-07, "log_odds_chosen": 1.3966796398162842, "log_odds_ratio": -0.412353515625, "logits/chosen": -1.2677733898162842, "logits/rejected": -1.125390648841858, "logps/chosen": -0.6690429449081421, "logps/rejected": -1.6417968273162842, "loss": 0.8644, "nll_loss": 0.8470703363418579, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.06693115085363388, "rewards/margins": 0.09718017280101776, "rewards/rejected": -0.16416016221046448, "step": 9570 }, { "epoch": 0.7275488893108031, "grad_norm": 2.2227459413640824, "learning_rate": 8.173484294441524e-07, "log_odds_chosen": 1.266015648841858, "log_odds_ratio": -0.4805664122104645, "logits/chosen": -1.1728515625, "logits/rejected": -1.0558593273162842, "logps/chosen": -0.6832031011581421, "logps/rejected": -1.6025390625, "loss": 0.8363, "nll_loss": 0.7822265625, "rewards/accuracies": 0.71875, "rewards/chosen": -0.06833495944738388, "rewards/margins": 0.09189452975988388, "rewards/rejected": -0.16032715141773224, "step": 9580 }, { "epoch": 0.7283083349155117, "grad_norm": 2.048564119568155, "learning_rate": 8.169221720855952e-07, "log_odds_chosen": 1.8904297351837158, "log_odds_ratio": -0.3573974668979645, "logits/chosen": -1.248046875, "logits/rejected": -1.062890648841858, "logps/chosen": -0.70751953125, "logps/rejected": -2.1109375953674316, "loss": 0.8725, "nll_loss": 0.839160144329071, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.07069091498851776, "rewards/margins": 0.14018554985523224, "rewards/rejected": -0.21088866889476776, "step": 9590 }, { "epoch": 0.7290677805202203, "grad_norm": 1.78590456756995, "learning_rate": 8.164965809277261e-07, "log_odds_chosen": 1.5006103515625, "log_odds_ratio": -0.40336912870407104, "logits/chosen": -1.2306640148162842, "logits/rejected": -1.0849609375, "logps/chosen": -0.716601550579071, "logps/rejected": -1.845312476158142, "loss": 0.8474, "nll_loss": 0.7999023199081421, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.07163085788488388, "rewards/margins": 0.112706758081913, "rewards/rejected": -0.18439941108226776, "step": 9600 }, { "epoch": 0.7298272261249288, "grad_norm": 2.083643672724203, "learning_rate": 8.160716542370011e-07, "log_odds_chosen": 1.429931640625, "log_odds_ratio": -0.43657225370407104, "logits/chosen": -1.251562476158142, "logits/rejected": -1.080468773841858, "logps/chosen": -0.7152343988418579, "logps/rejected": -1.770898461341858, "loss": 0.8415, "nll_loss": 0.7470703125, "rewards/accuracies": 0.75, "rewards/chosen": -0.071533203125, "rewards/margins": 0.10551681369543076, "rewards/rejected": -0.17719726264476776, "step": 9610 }, { "epoch": 0.7305866717296373, "grad_norm": 2.0325705149318325, "learning_rate": 8.156473902861856e-07, "log_odds_chosen": 1.58984375, "log_odds_ratio": -0.4124511778354645, "logits/chosen": -1.2863280773162842, "logits/rejected": -1.082421898841858, "logps/chosen": -0.6353515386581421, "logps/rejected": -1.778906226158142, "loss": 0.8355, "nll_loss": 0.78564453125, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.06361083686351776, "rewards/margins": 0.11440429836511612, "rewards/rejected": -0.1778564453125, "step": 9620 }, { "epoch": 0.731346117334346, "grad_norm": 2.195711949963783, "learning_rate": 8.152237873543241e-07, "log_odds_chosen": 1.3994140625, "log_odds_ratio": -0.426025390625, "logits/chosen": -1.335546851158142, "logits/rejected": -1.124414086341858, "logps/chosen": -0.6727539300918579, "logps/rejected": -1.6921875476837158, "loss": 0.8231, "nll_loss": 0.7850586175918579, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.06724853813648224, "rewards/margins": 0.10197754204273224, "rewards/rejected": -0.16921386122703552, "step": 9630 }, { "epoch": 0.7321055629390545, "grad_norm": 2.4181616172046523, "learning_rate": 8.148008437267104e-07, "log_odds_chosen": 1.4499022960662842, "log_odds_ratio": -0.43120115995407104, "logits/chosen": -1.32421875, "logits/rejected": -1.1328125, "logps/chosen": -0.708203136920929, "logps/rejected": -1.7734375, "loss": 0.8754, "nll_loss": 0.792187511920929, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.07080078125, "rewards/margins": 0.10653991997241974, "rewards/rejected": -0.17744140326976776, "step": 9640 }, { "epoch": 0.732865008543763, "grad_norm": 1.6334678550372603, "learning_rate": 8.143785576948602e-07, "log_odds_chosen": 1.6615111827850342, "log_odds_ratio": -0.3721679747104645, "logits/chosen": -1.2371094226837158, "logits/rejected": -1.0537109375, "logps/chosen": -0.690722644329071, "logps/rejected": -1.8701171875, "loss": 0.8296, "nll_loss": 0.8109375238418579, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.06903076171875, "rewards/margins": 0.11800689995288849, "rewards/rejected": -0.187255859375, "step": 9650 }, { "epoch": 0.7336244541484717, "grad_norm": 2.080356986547261, "learning_rate": 8.139569275564796e-07, "log_odds_chosen": 1.5515625476837158, "log_odds_ratio": -0.37470704317092896, "logits/chosen": -1.228906273841858, "logits/rejected": -1.0056641101837158, "logps/chosen": -0.7134765386581421, "logps/rejected": -1.8351562023162842, "loss": 0.8406, "nll_loss": 0.8490234613418579, "rewards/accuracies": 0.8125, "rewards/chosen": -0.07136230170726776, "rewards/margins": 0.11210937798023224, "rewards/rejected": -0.1834716796875, "step": 9660 }, { "epoch": 0.7343838997531802, "grad_norm": 2.382479344970819, "learning_rate": 8.135359516154388e-07, "log_odds_chosen": 1.400476098060608, "log_odds_ratio": -0.4380859434604645, "logits/chosen": -1.187109351158142, "logits/rejected": -1.0837891101837158, "logps/chosen": -0.7222656011581421, "logps/rejected": -1.731835961341858, "loss": 0.8474, "nll_loss": 0.8392578363418579, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.07221679389476776, "rewards/margins": 0.10102234035730362, "rewards/rejected": -0.17333984375, "step": 9670 }, { "epoch": 0.7351433453578887, "grad_norm": 1.865399956282584, "learning_rate": 8.131156281817418e-07, "log_odds_chosen": 1.4783203601837158, "log_odds_ratio": -0.42778319120407104, "logits/chosen": -1.1876952648162842, "logits/rejected": -1.0232422351837158, "logps/chosen": -0.726367175579071, "logps/rejected": -1.779687523841858, "loss": 0.8362, "nll_loss": 0.79296875, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.07257080078125, "rewards/margins": 0.10542144626379013, "rewards/rejected": -0.17807617783546448, "step": 9680 }, { "epoch": 0.7359027909625973, "grad_norm": 1.7157906620318233, "learning_rate": 8.126959555714979e-07, "log_odds_chosen": 1.818359375, "log_odds_ratio": -0.3334716856479645, "logits/chosen": -1.2576172351837158, "logits/rejected": -1.0271484851837158, "logps/chosen": -0.670703113079071, "logps/rejected": -2.010546922683716, "loss": 0.8335, "nll_loss": 0.832226574420929, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.06702880561351776, "rewards/margins": 0.13400879502296448, "rewards/rejected": -0.20097656548023224, "step": 9690 }, { "epoch": 0.7366622365673059, "grad_norm": 1.6095790471021794, "learning_rate": 8.122769321068952e-07, "log_odds_chosen": 1.8251953125, "log_odds_ratio": -0.358642578125, "logits/chosen": -1.2087891101837158, "logits/rejected": -1.0732421875, "logps/chosen": -0.627734363079071, "logps/rejected": -1.933203101158142, "loss": 0.824, "nll_loss": 0.7876952886581421, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.06273193657398224, "rewards/margins": 0.13060303032398224, "rewards/rejected": -0.193359375, "step": 9700 }, { "epoch": 0.7374216821720144, "grad_norm": 1.8435380274452864, "learning_rate": 8.118585561161698e-07, "log_odds_chosen": 1.5578124523162842, "log_odds_ratio": -0.4232421815395355, "logits/chosen": -1.259374976158142, "logits/rejected": -1.0480468273162842, "logps/chosen": -0.720996081829071, "logps/rejected": -1.857421875, "loss": 0.8562, "nll_loss": 0.797167956829071, "rewards/accuracies": 0.75, "rewards/chosen": -0.07209472358226776, "rewards/margins": 0.11363525688648224, "rewards/rejected": -0.18583984673023224, "step": 9710 }, { "epoch": 0.738181127776723, "grad_norm": 1.9490558181212343, "learning_rate": 8.114408259335793e-07, "log_odds_chosen": 1.5732421875, "log_odds_ratio": -0.38728028535842896, "logits/chosen": -1.1433594226837158, "logits/rejected": -1.0187499523162842, "logps/chosen": -0.6431640386581421, "logps/rejected": -1.7912108898162842, "loss": 0.8283, "nll_loss": 0.855273425579071, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.06431885063648224, "rewards/margins": 0.11466064304113388, "rewards/rejected": -0.1790771484375, "step": 9720 }, { "epoch": 0.7389405733814316, "grad_norm": 2.329785461170994, "learning_rate": 8.110237398993754e-07, "log_odds_chosen": 1.538183569908142, "log_odds_ratio": -0.3902343809604645, "logits/chosen": -1.1904296875, "logits/rejected": -0.982617199420929, "logps/chosen": -0.704882800579071, "logps/rejected": -1.806249976158142, "loss": 0.8109, "nll_loss": 0.743457019329071, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.07044677436351776, "rewards/margins": 0.11017455905675888, "rewards/rejected": -0.18059082329273224, "step": 9730 }, { "epoch": 0.7397000189861401, "grad_norm": 1.943787670992818, "learning_rate": 8.106072963597751e-07, "log_odds_chosen": 1.6134765148162842, "log_odds_ratio": -0.38749998807907104, "logits/chosen": -1.2917969226837158, "logits/rejected": -1.0556640625, "logps/chosen": -0.6348632574081421, "logps/rejected": -1.7980468273162842, "loss": 0.8306, "nll_loss": 0.8041015863418579, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06352539360523224, "rewards/margins": 0.11639404296875, "rewards/rejected": -0.17990723252296448, "step": 9740 }, { "epoch": 0.7404594645908487, "grad_norm": 1.6841571256146914, "learning_rate": 8.101914936669332e-07, "log_odds_chosen": 1.349267601966858, "log_odds_ratio": -0.4476562440395355, "logits/chosen": -1.201757788658142, "logits/rejected": -1.0849609375, "logps/chosen": -0.7279297113418579, "logps/rejected": -1.7041015625, "loss": 0.8263, "nll_loss": 0.808886706829071, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.07275390625, "rewards/margins": 0.097503662109375, "rewards/rejected": -0.17026367783546448, "step": 9750 }, { "epoch": 0.7412189101955572, "grad_norm": 1.862978018506566, "learning_rate": 8.09776330178916e-07, "log_odds_chosen": 1.622900366783142, "log_odds_ratio": -0.3663574159145355, "logits/chosen": -1.173437476158142, "logits/rejected": -1.006445288658142, "logps/chosen": -0.6578124761581421, "logps/rejected": -1.8156249523162842, "loss": 0.8463, "nll_loss": 0.7801758050918579, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.06578369438648224, "rewards/margins": 0.11580200493335724, "rewards/rejected": -0.18168945610523224, "step": 9760 }, { "epoch": 0.7419783558002658, "grad_norm": 1.956791779895198, "learning_rate": 8.093618042596727e-07, "log_odds_chosen": 1.548828125, "log_odds_ratio": -0.3756347596645355, "logits/chosen": -1.2654297351837158, "logits/rejected": -1.1013672351837158, "logps/chosen": -0.66162109375, "logps/rejected": -1.8064453601837158, "loss": 0.8371, "nll_loss": 0.8021484613418579, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -0.06617431342601776, "rewards/margins": 0.11455688625574112, "rewards/rejected": -0.18081054091453552, "step": 9770 }, { "epoch": 0.7427378014049744, "grad_norm": 1.7134929731161561, "learning_rate": 8.089479142790095e-07, "log_odds_chosen": 1.3888671398162842, "log_odds_ratio": -0.43183594942092896, "logits/chosen": -1.253515601158142, "logits/rejected": -1.0271484851837158, "logps/chosen": -0.6830078363418579, "logps/rejected": -1.6912109851837158, "loss": 0.8311, "nll_loss": 0.792675793170929, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.06826172024011612, "rewards/margins": 0.10081787407398224, "rewards/rejected": -0.1690673828125, "step": 9780 }, { "epoch": 0.7434972470096829, "grad_norm": 1.7917107547392905, "learning_rate": 8.085346586125621e-07, "log_odds_chosen": 1.857812523841858, "log_odds_ratio": -0.34833985567092896, "logits/chosen": -1.2744140625, "logits/rejected": -1.0087890625, "logps/chosen": -0.67236328125, "logps/rejected": -2.01171875, "loss": 0.812, "nll_loss": 0.7865234613418579, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.0672607421875, "rewards/margins": 0.13392333686351776, "rewards/rejected": -0.20114746689796448, "step": 9790 }, { "epoch": 0.7442566926143915, "grad_norm": 1.7173046676060162, "learning_rate": 8.081220356417685e-07, "log_odds_chosen": 1.837499976158142, "log_odds_ratio": -0.39228516817092896, "logits/chosen": -1.1443359851837158, "logits/rejected": -1.01953125, "logps/chosen": -0.704296886920929, "logps/rejected": -2.09765625, "loss": 0.8518, "nll_loss": 0.8223632574081421, "rewards/accuracies": 0.78125, "rewards/chosen": -0.07048340141773224, "rewards/margins": 0.13919678330421448, "rewards/rejected": -0.20979003608226776, "step": 9800 }, { "epoch": 0.7450161382191001, "grad_norm": 1.9011837591881142, "learning_rate": 8.077100437538435e-07, "log_odds_chosen": 1.423437476158142, "log_odds_ratio": -0.4295410215854645, "logits/chosen": -1.2607421875, "logits/rejected": -1.0910155773162842, "logps/chosen": -0.764453113079071, "logps/rejected": -1.817968726158142, "loss": 0.8331, "nll_loss": 0.9078124761581421, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.07635498046875, "rewards/margins": 0.10543213039636612, "rewards/rejected": -0.1817626953125, "step": 9810 }, { "epoch": 0.7457755838238086, "grad_norm": 1.9392115209449918, "learning_rate": 8.072986813417512e-07, "log_odds_chosen": 1.684667944908142, "log_odds_ratio": -0.34477537870407104, "logits/chosen": -1.308007836341858, "logits/rejected": -1.1005859375, "logps/chosen": -0.6668945550918579, "logps/rejected": -1.863671898841858, "loss": 0.8208, "nll_loss": 0.75244140625, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.06671142578125, "rewards/margins": 0.11983032524585724, "rewards/rejected": -0.18635253608226776, "step": 9820 }, { "epoch": 0.7465350294285171, "grad_norm": 1.7860975119775744, "learning_rate": 8.068879468041791e-07, "log_odds_chosen": 1.5818359851837158, "log_odds_ratio": -0.37358397245407104, "logits/chosen": -1.1638672351837158, "logits/rejected": -0.961132824420929, "logps/chosen": -0.6631835699081421, "logps/rejected": -1.75390625, "loss": 0.8176, "nll_loss": 0.7835937738418579, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.06625976413488388, "rewards/margins": 0.10906982421875, "rewards/rejected": -0.17543944716453552, "step": 9830 }, { "epoch": 0.7472944750332258, "grad_norm": 1.795547379016665, "learning_rate": 8.064778385455118e-07, "log_odds_chosen": 1.5850830078125, "log_odds_ratio": -0.40351563692092896, "logits/chosen": -1.302343726158142, "logits/rejected": -1.064843773841858, "logps/chosen": -0.614453136920929, "logps/rejected": -1.7490234375, "loss": 0.8209, "nll_loss": 0.74609375, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06141357496380806, "rewards/margins": 0.11345215141773224, "rewards/rejected": -0.17499999701976776, "step": 9840 }, { "epoch": 0.7480539206379343, "grad_norm": 1.5399279483043273, "learning_rate": 8.060683549758054e-07, "log_odds_chosen": 1.500390648841858, "log_odds_ratio": -0.4170898497104645, "logits/chosen": -1.256445288658142, "logits/rejected": -1.0859375, "logps/chosen": -0.6703125238418579, "logps/rejected": -1.732812523841858, "loss": 0.8157, "nll_loss": 0.81640625, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.06706543266773224, "rewards/margins": 0.10620727390050888, "rewards/rejected": -0.17324218153953552, "step": 9850 }, { "epoch": 0.7488133662426428, "grad_norm": 2.156702773533916, "learning_rate": 8.056594945107608e-07, "log_odds_chosen": 1.5310547351837158, "log_odds_ratio": -0.4286132752895355, "logits/chosen": -1.1535155773162842, "logits/rejected": -1.068750023841858, "logps/chosen": -0.722363293170929, "logps/rejected": -1.830078125, "loss": 0.8007, "nll_loss": 0.736523449420929, "rewards/accuracies": 0.75, "rewards/chosen": -0.07218017429113388, "rewards/margins": 0.110748291015625, "rewards/rejected": -0.18295899033546448, "step": 9860 }, { "epoch": 0.7495728118473515, "grad_norm": 1.9647520573738568, "learning_rate": 8.052512555716987e-07, "log_odds_chosen": 1.608007788658142, "log_odds_ratio": -0.384521484375, "logits/chosen": -1.3603515625, "logits/rejected": -1.09765625, "logps/chosen": -0.6880859136581421, "logps/rejected": -1.857421875, "loss": 0.8345, "nll_loss": 0.767285168170929, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.06884765625, "rewards/margins": 0.11683349311351776, "rewards/rejected": -0.18581542372703552, "step": 9870 }, { "epoch": 0.75033225745206, "grad_norm": 2.2839777844928935, "learning_rate": 8.048436365855337e-07, "log_odds_chosen": 1.6731445789337158, "log_odds_ratio": -0.39794921875, "logits/chosen": -1.256250023841858, "logits/rejected": -1.0798828601837158, "logps/chosen": -0.693554699420929, "logps/rejected": -1.912695288658142, "loss": 0.8188, "nll_loss": 0.779296875, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.06937255710363388, "rewards/margins": 0.121795654296875, "rewards/rejected": -0.1912841796875, "step": 9880 }, { "epoch": 0.7510917030567685, "grad_norm": 2.248953787331925, "learning_rate": 8.044366359847486e-07, "log_odds_chosen": 1.5715820789337158, "log_odds_ratio": -0.4205078184604645, "logits/chosen": -1.2595703601837158, "logits/rejected": -1.1017577648162842, "logps/chosen": -0.6861327886581421, "logps/rejected": -1.841796875, "loss": 0.8388, "nll_loss": 0.8003906011581421, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.06865234673023224, "rewards/margins": 0.11572265625, "rewards/rejected": -0.184326171875, "step": 9890 }, { "epoch": 0.7518511486614772, "grad_norm": 3.1336155455285692, "learning_rate": 8.040302522073696e-07, "log_odds_chosen": 1.6300780773162842, "log_odds_ratio": -0.442138671875, "logits/chosen": -1.1857421398162842, "logits/rejected": -1.0556640625, "logps/chosen": -0.7427734136581421, "logps/rejected": -2.0072264671325684, "loss": 0.8388, "nll_loss": 0.817578136920929, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.07424316555261612, "rewards/margins": 0.12659911811351776, "rewards/rejected": -0.20078125596046448, "step": 9900 }, { "epoch": 0.7526105942661857, "grad_norm": 1.8227300582771109, "learning_rate": 8.036244836969407e-07, "log_odds_chosen": 1.501074194908142, "log_odds_ratio": -0.4056152403354645, "logits/chosen": -1.3054687976837158, "logits/rejected": -1.0888671875, "logps/chosen": -0.7142578363418579, "logps/rejected": -1.782812476158142, "loss": 0.8536, "nll_loss": 0.774121105670929, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.07135009765625, "rewards/margins": 0.10687255859375, "rewards/rejected": -0.1783447265625, "step": 9910 }, { "epoch": 0.7533700398708942, "grad_norm": 1.9138977769926597, "learning_rate": 8.032193289024989e-07, "log_odds_chosen": 1.7023437023162842, "log_odds_ratio": -0.35346680879592896, "logits/chosen": -1.3664062023162842, "logits/rejected": -1.128320336341858, "logps/chosen": -0.7105468511581421, "logps/rejected": -1.957421898841858, "loss": 0.7974, "nll_loss": 0.816210925579071, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.07108154147863388, "rewards/margins": 0.12482909858226776, "rewards/rejected": -0.19584961235523224, "step": 9920 }, { "epoch": 0.7541294854756028, "grad_norm": 1.7790474850201958, "learning_rate": 8.02814786278549e-07, "log_odds_chosen": 1.7423827648162842, "log_odds_ratio": -0.41552734375, "logits/chosen": -1.267187476158142, "logits/rejected": -1.035742163658142, "logps/chosen": -0.7113281488418579, "logps/rejected": -2.020312547683716, "loss": 0.8396, "nll_loss": 0.8062499761581421, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.07111816108226776, "rewards/margins": 0.13092041015625, "rewards/rejected": -0.20195312798023224, "step": 9930 }, { "epoch": 0.7548889310803114, "grad_norm": 1.8523899389175844, "learning_rate": 8.024108542850394e-07, "log_odds_chosen": 1.765722632408142, "log_odds_ratio": -0.3970703184604645, "logits/chosen": -1.235937476158142, "logits/rejected": -1.078710913658142, "logps/chosen": -0.7154296636581421, "logps/rejected": -2.083984375, "loss": 0.8062, "nll_loss": 0.7822265625, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.07150878757238388, "rewards/margins": 0.13680419325828552, "rewards/rejected": -0.208251953125, "step": 9940 }, { "epoch": 0.7556483766850199, "grad_norm": 1.8725924159711422, "learning_rate": 8.020075313873367e-07, "log_odds_chosen": 1.462988257408142, "log_odds_ratio": -0.4608398377895355, "logits/chosen": -1.26171875, "logits/rejected": -1.135156273841858, "logps/chosen": -0.7294921875, "logps/rejected": -1.814062476158142, "loss": 0.8422, "nll_loss": 0.789257824420929, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.07291259616613388, "rewards/margins": 0.1085205078125, "rewards/rejected": -0.18146972358226776, "step": 9950 }, { "epoch": 0.7564078222897285, "grad_norm": 1.8583518194278383, "learning_rate": 8.016048160562023e-07, "log_odds_chosen": 1.4685547351837158, "log_odds_ratio": -0.4063476622104645, "logits/chosen": -1.2277343273162842, "logits/rejected": -1.021875023841858, "logps/chosen": -0.7220703363418579, "logps/rejected": -1.822656273841858, "loss": 0.8358, "nll_loss": 0.8597656488418579, "rewards/accuracies": 0.78125, "rewards/chosen": -0.07222900539636612, "rewards/margins": 0.110107421875, "rewards/rejected": -0.18232421576976776, "step": 9960 }, { "epoch": 0.7571672678944371, "grad_norm": 1.7489758876175683, "learning_rate": 8.012027067677667e-07, "log_odds_chosen": 1.4907715320587158, "log_odds_ratio": -0.4061523377895355, "logits/chosen": -1.2140624523162842, "logits/rejected": -1.087499976158142, "logps/chosen": -0.665722668170929, "logps/rejected": -1.6736328601837158, "loss": 0.8418, "nll_loss": 0.7884765863418579, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.06657715141773224, "rewards/margins": 0.10090027004480362, "rewards/rejected": -0.16743163764476776, "step": 9970 }, { "epoch": 0.7579267134991456, "grad_norm": 1.802065546939526, "learning_rate": 8.008012020035062e-07, "log_odds_chosen": 1.625634789466858, "log_odds_ratio": -0.38520509004592896, "logits/chosen": -1.2783203125, "logits/rejected": -1.041015625, "logps/chosen": -0.712695300579071, "logps/rejected": -1.9140625, "loss": 0.8388, "nll_loss": 0.8550781011581421, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.07122802734375, "rewards/margins": 0.11989136040210724, "rewards/rejected": -0.19143065810203552, "step": 9980 }, { "epoch": 0.7586861591038542, "grad_norm": 2.2371996102058156, "learning_rate": 8.004003002502188e-07, "log_odds_chosen": 1.6330077648162842, "log_odds_ratio": -0.40888673067092896, "logits/chosen": -1.1765625476837158, "logits/rejected": -0.9906250238418579, "logps/chosen": -0.6919921636581421, "logps/rejected": -1.8800780773162842, "loss": 0.8421, "nll_loss": 0.8638671636581421, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.06917724758386612, "rewards/margins": 0.11884765326976776, "rewards/rejected": -0.18798828125, "step": 9990 }, { "epoch": 0.7594456047085627, "grad_norm": 16.224683042103, "learning_rate": 8e-07, "log_odds_chosen": 1.6964843273162842, "log_odds_ratio": -0.38249510526657104, "logits/chosen": -1.2009766101837158, "logits/rejected": -1.0441405773162842, "logps/chosen": -0.7347656488418579, "logps/rejected": -1.970703125, "loss": 0.8299, "nll_loss": 0.8218749761581421, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.07351074367761612, "rewards/margins": 0.12357177585363388, "rewards/rejected": -0.1971435546875, "step": 10000 }, { "epoch": 0.7594456047085627, "eval_log_odds_chosen": 1.168565034866333, "eval_log_odds_ratio": -0.4858035743236542, "eval_logits/chosen": -1.196563959121704, "eval_logits/rejected": -1.0272332429885864, "eval_logps/chosen": -0.793671727180481, "eval_logps/rejected": -1.666948914527893, "eval_loss": 1.2217642068862915, "eval_nll_loss": 1.1777170896530151, "eval_rewards/accuracies": 0.7296352982521057, "eval_rewards/chosen": -0.07937466353178024, "eval_rewards/margins": 0.08731885254383087, "eval_rewards/rejected": -0.1666969507932663, "eval_runtime": 1676.3037, "eval_samples_per_second": 55.985, "eval_steps_per_second": 0.875, "step": 10000 }, { "epoch": 0.7602050503132713, "grad_norm": 2.305443998125007, "learning_rate": 7.996002997502185e-07, "log_odds_chosen": 1.6630859375, "log_odds_ratio": -0.3716064393520355, "logits/chosen": -1.295312523841858, "logits/rejected": -1.0798828601837158, "logps/chosen": -0.6888672113418579, "logps/rejected": -1.9013671875, "loss": 0.841, "nll_loss": 0.805859386920929, "rewards/accuracies": 0.8125, "rewards/chosen": -0.06884765625, "rewards/margins": 0.12114868313074112, "rewards/rejected": -0.19016113877296448, "step": 10010 }, { "epoch": 0.7609644959179799, "grad_norm": 1.956526083331964, "learning_rate": 7.992011980034937e-07, "log_odds_chosen": 1.507421851158142, "log_odds_ratio": -0.370361328125, "logits/chosen": -1.2958984375, "logits/rejected": -1.082617163658142, "logps/chosen": -0.6587890386581421, "logps/rejected": -1.738671898841858, "loss": 0.8195, "nll_loss": 0.7685546875, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.06589355319738388, "rewards/margins": 0.10789795219898224, "rewards/rejected": -0.17380371689796448, "step": 10020 }, { "epoch": 0.7617239415226884, "grad_norm": 1.7121948135981586, "learning_rate": 7.98802693267671e-07, "log_odds_chosen": 1.463281273841858, "log_odds_ratio": -0.40168458223342896, "logits/chosen": -1.227148413658142, "logits/rejected": -0.995312511920929, "logps/chosen": -0.6788085699081421, "logps/rejected": -1.714453101158142, "loss": 0.8273, "nll_loss": 0.8143554925918579, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.06788329780101776, "rewards/margins": 0.10355834662914276, "rewards/rejected": -0.17148438096046448, "step": 10030 }, { "epoch": 0.762483387127397, "grad_norm": 2.3012393633628956, "learning_rate": 7.984047840557992e-07, "log_odds_chosen": 1.387109398841858, "log_odds_ratio": -0.42744141817092896, "logits/chosen": -1.17578125, "logits/rejected": -1.0476562976837158, "logps/chosen": -0.706250011920929, "logps/rejected": -1.6884765625, "loss": 0.8474, "nll_loss": 0.8994140625, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.07062987983226776, "rewards/margins": 0.0983428955078125, "rewards/rejected": -0.16896972060203552, "step": 10040 }, { "epoch": 0.7632428327321056, "grad_norm": 1.699932748293435, "learning_rate": 7.980074688861063e-07, "log_odds_chosen": 1.477148413658142, "log_odds_ratio": -0.393798828125, "logits/chosen": -1.3113281726837158, "logits/rejected": -1.0978515148162842, "logps/chosen": -0.6546875238418579, "logps/rejected": -1.691796898841858, "loss": 0.8242, "nll_loss": 0.746874988079071, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.0654296875, "rewards/margins": 0.10365600883960724, "rewards/rejected": -0.16911621391773224, "step": 10050 }, { "epoch": 0.7640022783368141, "grad_norm": 2.5512743396894195, "learning_rate": 7.976107462819775e-07, "log_odds_chosen": 1.800195336341858, "log_odds_ratio": -0.3418212831020355, "logits/chosen": -1.263671875, "logits/rejected": -1.0730469226837158, "logps/chosen": -0.625292956829071, "logps/rejected": -1.905664086341858, "loss": 0.8571, "nll_loss": 0.7699218988418579, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.06256103515625, "rewards/margins": 0.12816771864891052, "rewards/rejected": -0.1905517578125, "step": 10060 }, { "epoch": 0.7647617239415226, "grad_norm": 2.054176427797956, "learning_rate": 7.97214614771931e-07, "log_odds_chosen": 1.640234351158142, "log_odds_ratio": -0.37373048067092896, "logits/chosen": -1.235937476158142, "logits/rejected": -1.0457031726837158, "logps/chosen": -0.710644543170929, "logps/rejected": -1.883203148841858, "loss": 0.8161, "nll_loss": 0.7530273199081421, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.071044921875, "rewards/margins": 0.11729736626148224, "rewards/rejected": -0.18842773139476776, "step": 10070 }, { "epoch": 0.7655211695462313, "grad_norm": 1.8259165438258183, "learning_rate": 7.968190728895957e-07, "log_odds_chosen": 1.551965355873108, "log_odds_ratio": -0.3768554627895355, "logits/chosen": -1.211523413658142, "logits/rejected": -1.0710937976837158, "logps/chosen": -0.681835949420929, "logps/rejected": -1.817773461341858, "loss": 0.8283, "nll_loss": 0.808789074420929, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -0.06820068508386612, "rewards/margins": 0.11352996528148651, "rewards/rejected": -0.18183593451976776, "step": 10080 }, { "epoch": 0.7662806151509398, "grad_norm": 2.6682669538271444, "learning_rate": 7.964241191736886e-07, "log_odds_chosen": 1.46575927734375, "log_odds_ratio": -0.4251953065395355, "logits/chosen": -1.2734375, "logits/rejected": -1.0929687023162842, "logps/chosen": -0.68701171875, "logps/rejected": -1.7488281726837158, "loss": 0.8243, "nll_loss": 0.786425769329071, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.06870117038488388, "rewards/margins": 0.10630645602941513, "rewards/rejected": -0.17491455376148224, "step": 10090 }, { "epoch": 0.7670400607556483, "grad_norm": 2.0234546818197225, "learning_rate": 7.960297521679913e-07, "log_odds_chosen": 1.6279296875, "log_odds_ratio": -0.39960938692092896, "logits/chosen": -1.1359374523162842, "logits/rejected": -1.0539062023162842, "logps/chosen": -0.7730468511581421, "logps/rejected": -2.0130858421325684, "loss": 0.853, "nll_loss": 0.830273449420929, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.07730712741613388, "rewards/margins": 0.12397155910730362, "rewards/rejected": -0.20126953721046448, "step": 10100 }, { "epoch": 0.767799506360357, "grad_norm": 1.5781836159538365, "learning_rate": 7.956359704213283e-07, "log_odds_chosen": 1.6222655773162842, "log_odds_ratio": -0.37128907442092896, "logits/chosen": -1.305273413658142, "logits/rejected": -1.0869140625, "logps/chosen": -0.664355456829071, "logps/rejected": -1.801171898841858, "loss": 0.8287, "nll_loss": 0.80322265625, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.06643066555261612, "rewards/margins": 0.11368408054113388, "rewards/rejected": -0.1802978515625, "step": 10110 }, { "epoch": 0.7685589519650655, "grad_norm": 2.003707072969664, "learning_rate": 7.95242772487544e-07, "log_odds_chosen": 1.5603516101837158, "log_odds_ratio": -0.3994384706020355, "logits/chosen": -1.300195336341858, "logits/rejected": -1.1404297351837158, "logps/chosen": -0.667285144329071, "logps/rejected": -1.725195288658142, "loss": 0.8141, "nll_loss": 0.7529296875, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.0667724609375, "rewards/margins": 0.10565185546875, "rewards/rejected": -0.17246094346046448, "step": 10120 }, { "epoch": 0.769318397569774, "grad_norm": 1.9566951063199327, "learning_rate": 7.94850156925481e-07, "log_odds_chosen": 1.489599585533142, "log_odds_ratio": -0.3837890625, "logits/chosen": -1.186132788658142, "logits/rejected": -1.047265648841858, "logps/chosen": -0.633593738079071, "logps/rejected": -1.692773461341858, "loss": 0.8192, "nll_loss": 0.7860351800918579, "rewards/accuracies": 0.78125, "rewards/chosen": -0.06340332329273224, "rewards/margins": 0.10585937649011612, "rewards/rejected": -0.16938476264476776, "step": 10130 }, { "epoch": 0.7700778431744826, "grad_norm": 1.928634482506982, "learning_rate": 7.944581222989574e-07, "log_odds_chosen": 1.65478515625, "log_odds_ratio": -0.37324219942092896, "logits/chosen": -1.1658203601837158, "logits/rejected": -1.01953125, "logps/chosen": -0.664843738079071, "logps/rejected": -1.890234351158142, "loss": 0.8286, "nll_loss": 0.7353515625, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.06640625, "rewards/margins": 0.12242431938648224, "rewards/rejected": -0.18886718153953552, "step": 10140 }, { "epoch": 0.7708372887791912, "grad_norm": 1.7109049959037, "learning_rate": 7.940666671767441e-07, "log_odds_chosen": 1.5915038585662842, "log_odds_ratio": -0.42705076932907104, "logits/chosen": -1.337499976158142, "logits/rejected": -1.141992211341858, "logps/chosen": -0.712207019329071, "logps/rejected": -1.8923828601837158, "loss": 0.8295, "nll_loss": 0.7919921875, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.07126464694738388, "rewards/margins": 0.11812744289636612, "rewards/rejected": -0.18930664658546448, "step": 10150 }, { "epoch": 0.7715967343838998, "grad_norm": 1.9866677681935845, "learning_rate": 7.936757901325451e-07, "log_odds_chosen": 1.5958983898162842, "log_odds_ratio": -0.37031251192092896, "logits/chosen": -1.232421875, "logits/rejected": -1.0849609375, "logps/chosen": -0.661425769329071, "logps/rejected": -1.8093750476837158, "loss": 0.8254, "nll_loss": 0.7857421636581421, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.06612548977136612, "rewards/margins": 0.11492309719324112, "rewards/rejected": -0.18095703423023224, "step": 10160 }, { "epoch": 0.7723561799886083, "grad_norm": 1.6532581099613222, "learning_rate": 7.932854897449727e-07, "log_odds_chosen": 1.6357421875, "log_odds_ratio": -0.4205566346645355, "logits/chosen": -1.405859351158142, "logits/rejected": -1.1550781726837158, "logps/chosen": -0.6839843988418579, "logps/rejected": -1.919921875, "loss": 0.8076, "nll_loss": 0.7691406011581421, "rewards/accuracies": 0.75, "rewards/chosen": -0.06839599460363388, "rewards/margins": 0.12379150092601776, "rewards/rejected": -0.19204100966453552, "step": 10170 }, { "epoch": 0.7731156255933169, "grad_norm": 1.6086220608597865, "learning_rate": 7.928957645975286e-07, "log_odds_chosen": 1.626562476158142, "log_odds_ratio": -0.4270385801792145, "logits/chosen": -1.2312500476837158, "logits/rejected": -1.043359398841858, "logps/chosen": -0.696484386920929, "logps/rejected": -1.894921898841858, "loss": 0.82, "nll_loss": 0.760937511920929, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.06960449367761612, "rewards/margins": 0.11984863132238388, "rewards/rejected": -0.18947753310203552, "step": 10180 }, { "epoch": 0.7738750711980255, "grad_norm": 2.3930409402613706, "learning_rate": 7.925066132785799e-07, "log_odds_chosen": 1.565039038658142, "log_odds_ratio": -0.395751953125, "logits/chosen": -1.262109398841858, "logits/rejected": -1.1003906726837158, "logps/chosen": -0.7074218988418579, "logps/rejected": -1.853906273841858, "loss": 0.8516, "nll_loss": 0.834179699420929, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -0.07076416164636612, "rewards/margins": 0.11470947414636612, "rewards/rejected": -0.1854248046875, "step": 10190 }, { "epoch": 0.774634516802734, "grad_norm": 1.9628197703977521, "learning_rate": 7.921180343813395e-07, "log_odds_chosen": 1.4992187023162842, "log_odds_ratio": -0.3987793028354645, "logits/chosen": -1.190820336341858, "logits/rejected": -1.041601538658142, "logps/chosen": -0.677050769329071, "logps/rejected": -1.7626953125, "loss": 0.8323, "nll_loss": 0.7880859375, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06771240383386612, "rewards/margins": 0.10841064155101776, "rewards/rejected": -0.17619629204273224, "step": 10200 }, { "epoch": 0.7753939624074425, "grad_norm": 1.9039973588043215, "learning_rate": 7.917300265038436e-07, "log_odds_chosen": 1.6943359375, "log_odds_ratio": -0.43034666776657104, "logits/chosen": -1.183203101158142, "logits/rejected": -1.1082031726837158, "logps/chosen": -0.7416015863418579, "logps/rejected": -2.056640625, "loss": 0.8333, "nll_loss": 0.7745116949081421, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.07414551079273224, "rewards/margins": 0.13143309950828552, "rewards/rejected": -0.20563964545726776, "step": 10210 }, { "epoch": 0.7761534080121512, "grad_norm": 2.4032742877153233, "learning_rate": 7.913425882489307e-07, "log_odds_chosen": 1.598046898841858, "log_odds_ratio": -0.36308592557907104, "logits/chosen": -1.276757836341858, "logits/rejected": -1.075781226158142, "logps/chosen": -0.670117199420929, "logps/rejected": -1.798828125, "loss": 0.8314, "nll_loss": 0.757617175579071, "rewards/accuracies": 0.8125, "rewards/chosen": -0.06694336235523224, "rewards/margins": 0.11307372897863388, "rewards/rejected": -0.18000487983226776, "step": 10220 }, { "epoch": 0.7769128536168597, "grad_norm": 2.6256144170537423, "learning_rate": 7.909557182242211e-07, "log_odds_chosen": 1.6171875, "log_odds_ratio": -0.3779296875, "logits/chosen": -1.232421875, "logits/rejected": -1.0685546398162842, "logps/chosen": -0.7372070550918579, "logps/rejected": -1.947656273841858, "loss": 0.8162, "nll_loss": 0.814453125, "rewards/accuracies": 0.8125, "rewards/chosen": -0.07363281399011612, "rewards/margins": 0.12122802436351776, "rewards/rejected": -0.19477538764476776, "step": 10230 }, { "epoch": 0.7776722992215682, "grad_norm": 2.503538783163183, "learning_rate": 7.905694150420947e-07, "log_odds_chosen": 1.1460082530975342, "log_odds_ratio": -0.54541015625, "logits/chosen": -1.1242187023162842, "logits/rejected": -1.0048828125, "logps/chosen": -0.8062499761581421, "logps/rejected": -1.6326172351837158, "loss": 0.8488, "nll_loss": 0.8428710699081421, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.08063964545726776, "rewards/margins": 0.08269043266773224, "rewards/rejected": -0.16328124701976776, "step": 10240 }, { "epoch": 0.7784317448262769, "grad_norm": 2.2737179967964334, "learning_rate": 7.901836773196717e-07, "log_odds_chosen": 1.5880858898162842, "log_odds_ratio": -0.4181152284145355, "logits/chosen": -1.293359398841858, "logits/rejected": -1.131250023841858, "logps/chosen": -0.6820312738418579, "logps/rejected": -1.83203125, "loss": 0.8434, "nll_loss": 0.753613293170929, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.06820068508386612, "rewards/margins": 0.11491699516773224, "rewards/rejected": -0.1832275390625, "step": 10250 }, { "epoch": 0.7791911904309854, "grad_norm": 2.048206488397067, "learning_rate": 7.897985036787898e-07, "log_odds_chosen": 1.56884765625, "log_odds_ratio": -0.38398438692092896, "logits/chosen": -1.2365233898162842, "logits/rejected": -1.0841796398162842, "logps/chosen": -0.6737304925918579, "logps/rejected": -1.8035156726837158, "loss": 0.8224, "nll_loss": 0.796875, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.0673828125, "rewards/margins": 0.11302490532398224, "rewards/rejected": -0.18017578125, "step": 10260 }, { "epoch": 0.7799506360356939, "grad_norm": 1.5923218397110792, "learning_rate": 7.894138927459853e-07, "log_odds_chosen": 1.3406250476837158, "log_odds_ratio": -0.45844727754592896, "logits/chosen": -1.1749999523162842, "logits/rejected": -1.051367163658142, "logps/chosen": -0.7510741949081421, "logps/rejected": -1.740625023841858, "loss": 0.8165, "nll_loss": 0.812207043170929, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.0750732421875, "rewards/margins": 0.09912109375, "rewards/rejected": -0.17416992783546448, "step": 10270 }, { "epoch": 0.7807100816404025, "grad_norm": 2.1394962335624386, "learning_rate": 7.890298431524716e-07, "log_odds_chosen": 1.805078148841858, "log_odds_ratio": -0.38361817598342896, "logits/chosen": -1.162695288658142, "logits/rejected": -1.0146484375, "logps/chosen": -0.702929675579071, "logps/rejected": -2.032031297683716, "loss": 0.82, "nll_loss": 0.814160168170929, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.07033691555261612, "rewards/margins": 0.13292846083641052, "rewards/rejected": -0.20327147841453552, "step": 10280 }, { "epoch": 0.7814695272451111, "grad_norm": 2.69601568858476, "learning_rate": 7.88646353534119e-07, "log_odds_chosen": 1.5158202648162842, "log_odds_ratio": -0.40043944120407104, "logits/chosen": -1.3332030773162842, "logits/rejected": -1.1687500476837158, "logps/chosen": -0.655078113079071, "logps/rejected": -1.74609375, "loss": 0.8398, "nll_loss": 0.759472668170929, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.06553955376148224, "rewards/margins": 0.10908813774585724, "rewards/rejected": -0.1746826171875, "step": 10290 }, { "epoch": 0.7822289728498196, "grad_norm": 2.191404784926674, "learning_rate": 7.882634225314345e-07, "log_odds_chosen": 1.553613305091858, "log_odds_ratio": -0.41650390625, "logits/chosen": -1.2648437023162842, "logits/rejected": -1.099023461341858, "logps/chosen": -0.765429675579071, "logps/rejected": -1.939062476158142, "loss": 0.825, "nll_loss": 0.7875000238418579, "rewards/accuracies": 0.78125, "rewards/chosen": -0.07650146633386612, "rewards/margins": 0.11745300143957138, "rewards/rejected": -0.19384765625, "step": 10300 }, { "epoch": 0.7829884184545282, "grad_norm": 1.9332118348414757, "learning_rate": 7.87881048789541e-07, "log_odds_chosen": 1.439306616783142, "log_odds_ratio": -0.37890625, "logits/chosen": -1.3701171875, "logits/rejected": -1.125, "logps/chosen": -0.634570300579071, "logps/rejected": -1.6251952648162842, "loss": 0.8157, "nll_loss": 0.7919921875, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.06348876655101776, "rewards/margins": 0.09905090183019638, "rewards/rejected": -0.16262206435203552, "step": 10310 }, { "epoch": 0.7837478640592368, "grad_norm": 2.42237836240058, "learning_rate": 7.874992309581578e-07, "log_odds_chosen": 1.708398461341858, "log_odds_ratio": -0.3548828065395355, "logits/chosen": -1.237695336341858, "logits/rejected": -0.994335949420929, "logps/chosen": -0.66015625, "logps/rejected": -1.8603515625, "loss": 0.8274, "nll_loss": 0.728808581829071, "rewards/accuracies": 0.862500011920929, "rewards/chosen": -0.06602783501148224, "rewards/margins": 0.12009887397289276, "rewards/rejected": -0.18618163466453552, "step": 10320 }, { "epoch": 0.7845073096639453, "grad_norm": 1.793762452258263, "learning_rate": 7.871179676915801e-07, "log_odds_chosen": 1.6848633289337158, "log_odds_ratio": -0.381103515625, "logits/chosen": -1.1708984375, "logits/rejected": -1.072656273841858, "logps/chosen": -0.685742199420929, "logps/rejected": -1.931249976158142, "loss": 0.8219, "nll_loss": 0.802539050579071, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.06853027641773224, "rewards/margins": 0.124664306640625, "rewards/rejected": -0.1932373046875, "step": 10330 }, { "epoch": 0.7852667552686539, "grad_norm": 1.9906624328845575, "learning_rate": 7.867372576486597e-07, "log_odds_chosen": 1.689843773841858, "log_odds_ratio": -0.4129394590854645, "logits/chosen": -1.2531249523162842, "logits/rejected": -1.0595703125, "logps/chosen": -0.7100585699081421, "logps/rejected": -1.933984398841858, "loss": 0.8518, "nll_loss": 0.8416992425918579, "rewards/accuracies": 0.8125, "rewards/chosen": -0.07100830227136612, "rewards/margins": 0.12244872748851776, "rewards/rejected": -0.19350585341453552, "step": 10340 }, { "epoch": 0.7860262008733624, "grad_norm": 1.771868028991938, "learning_rate": 7.863570994927847e-07, "log_odds_chosen": 1.5180175304412842, "log_odds_ratio": -0.43505859375, "logits/chosen": -1.221289038658142, "logits/rejected": -1.0447266101837158, "logps/chosen": -0.721875011920929, "logps/rejected": -1.83203125, "loss": 0.8425, "nll_loss": 0.8091796636581421, "rewards/accuracies": 0.75, "rewards/chosen": -0.0721435546875, "rewards/margins": 0.11093749850988388, "rewards/rejected": -0.18312987685203552, "step": 10350 }, { "epoch": 0.786785646478071, "grad_norm": 1.975750595607362, "learning_rate": 7.859774918918594e-07, "log_odds_chosen": 1.6037108898162842, "log_odds_ratio": -0.3805908262729645, "logits/chosen": -1.3318359851837158, "logits/rejected": -1.1154296398162842, "logps/chosen": -0.6693359613418579, "logps/rejected": -1.790624976158142, "loss": 0.8235, "nll_loss": 0.725292980670929, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.06687011569738388, "rewards/margins": 0.11201171576976776, "rewards/rejected": -0.17885741591453552, "step": 10360 }, { "epoch": 0.7875450920827796, "grad_norm": 2.061641247368863, "learning_rate": 7.855984335182852e-07, "log_odds_chosen": 1.837890625, "log_odds_ratio": -0.36784666776657104, "logits/chosen": -1.263671875, "logits/rejected": -1.0587890148162842, "logps/chosen": -0.669726550579071, "logps/rejected": -2.0111327171325684, "loss": 0.8091, "nll_loss": 0.7197265625, "rewards/accuracies": 0.8125, "rewards/chosen": -0.0670166015625, "rewards/margins": 0.13408812880516052, "rewards/rejected": -0.20100097358226776, "step": 10370 }, { "epoch": 0.7883045376874881, "grad_norm": 2.380237379044274, "learning_rate": 7.852199230489422e-07, "log_odds_chosen": 1.752832055091858, "log_odds_ratio": -0.37285155057907104, "logits/chosen": -1.2468750476837158, "logits/rejected": -1.05859375, "logps/chosen": -0.67431640625, "logps/rejected": -1.9552733898162842, "loss": 0.8379, "nll_loss": 0.792187511920929, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.06741943210363388, "rewards/margins": 0.127960205078125, "rewards/rejected": -0.19528809189796448, "step": 10380 }, { "epoch": 0.7890639832921967, "grad_norm": 1.872928030134007, "learning_rate": 7.848419591651668e-07, "log_odds_chosen": 1.37890625, "log_odds_ratio": -0.4627929627895355, "logits/chosen": -1.2921874523162842, "logits/rejected": -1.111328125, "logps/chosen": -0.73388671875, "logps/rejected": -1.7507812976837158, "loss": 0.8473, "nll_loss": 0.804492175579071, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.07337646186351776, "rewards/margins": 0.10158081352710724, "rewards/rejected": -0.17490234971046448, "step": 10390 }, { "epoch": 0.7898234288969053, "grad_norm": 1.8359909982859695, "learning_rate": 7.844645405527361e-07, "log_odds_chosen": 1.338134765625, "log_odds_ratio": -0.4302734434604645, "logits/chosen": -1.166015625, "logits/rejected": -1.0294921398162842, "logps/chosen": -0.762011706829071, "logps/rejected": -1.692968726158142, "loss": 0.8442, "nll_loss": 0.822265625, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.07612304389476776, "rewards/margins": 0.09316253662109375, "rewards/rejected": -0.16923828423023224, "step": 10400 }, { "epoch": 0.7905828745016138, "grad_norm": 1.9834233099401095, "learning_rate": 7.840876659018457e-07, "log_odds_chosen": 1.3353271484375, "log_odds_ratio": -0.4327148497104645, "logits/chosen": -1.26171875, "logits/rejected": -1.0427734851837158, "logps/chosen": -0.7060546875, "logps/rejected": -1.6423828601837158, "loss": 0.8258, "nll_loss": 0.774609386920929, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.0706787109375, "rewards/margins": 0.09351806342601776, "rewards/rejected": -0.16420897841453552, "step": 10410 }, { "epoch": 0.7913423201063224, "grad_norm": 2.2562192190076518, "learning_rate": 7.837113339070922e-07, "log_odds_chosen": 1.789453148841858, "log_odds_ratio": -0.41191405057907104, "logits/chosen": -1.2421875, "logits/rejected": -1.08984375, "logps/chosen": -0.742871105670929, "logps/rejected": -2.1285157203674316, "loss": 0.8362, "nll_loss": 0.825488269329071, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.07429198920726776, "rewards/margins": 0.13848876953125, "rewards/rejected": -0.21284179389476776, "step": 10420 }, { "epoch": 0.792101765711031, "grad_norm": 1.9448676193410612, "learning_rate": 7.833355432674538e-07, "log_odds_chosen": 1.745507836341858, "log_odds_ratio": -0.3860839903354645, "logits/chosen": -1.2507812976837158, "logits/rejected": -1.0576171875, "logps/chosen": -0.780078113079071, "logps/rejected": -2.090039014816284, "loss": 0.8305, "nll_loss": 0.869140625, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.07807616889476776, "rewards/margins": 0.13088683784008026, "rewards/rejected": -0.20895996689796448, "step": 10430 }, { "epoch": 0.7928612113157395, "grad_norm": 2.1809824932686612, "learning_rate": 7.829602926862713e-07, "log_odds_chosen": 1.620507836341858, "log_odds_ratio": -0.37578123807907104, "logits/chosen": -1.310546875, "logits/rejected": -1.109375, "logps/chosen": -0.656445324420929, "logps/rejected": -1.804101586341858, "loss": 0.8161, "nll_loss": 0.749707043170929, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06569824367761612, "rewards/margins": 0.11485596001148224, "rewards/rejected": -0.1805419921875, "step": 10440 }, { "epoch": 0.793620656920448, "grad_norm": 2.2857189137916993, "learning_rate": 7.825855808712296e-07, "log_odds_chosen": 1.6447632312774658, "log_odds_ratio": -0.39912110567092896, "logits/chosen": -1.2900390625, "logits/rejected": -1.0724608898162842, "logps/chosen": -0.7144531011581421, "logps/rejected": -1.944726586341858, "loss": 0.7927, "nll_loss": 0.7818359136581421, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.0714111328125, "rewards/margins": 0.123138427734375, "rewards/rejected": -0.19472655653953552, "step": 10450 }, { "epoch": 0.7943801025251567, "grad_norm": 1.9465759671926481, "learning_rate": 7.822114065343386e-07, "log_odds_chosen": 1.544531226158142, "log_odds_ratio": -0.38676756620407104, "logits/chosen": -1.333593726158142, "logits/rejected": -1.1689453125, "logps/chosen": -0.7105468511581421, "logps/rejected": -1.7859375476837158, "loss": 0.822, "nll_loss": 0.7481445074081421, "rewards/accuracies": 0.8125, "rewards/chosen": -0.07106933742761612, "rewards/margins": 0.10765381157398224, "rewards/rejected": -0.17873534560203552, "step": 10460 }, { "epoch": 0.7951395481298652, "grad_norm": 1.847677066683706, "learning_rate": 7.818377683919149e-07, "log_odds_chosen": 1.5048828125, "log_odds_ratio": -0.400146484375, "logits/chosen": -1.185937523841858, "logits/rejected": -1.001367211341858, "logps/chosen": -0.680371105670929, "logps/rejected": -1.742773413658142, "loss": 0.7993, "nll_loss": 0.7802734375, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.06800536811351776, "rewards/margins": 0.10605468600988388, "rewards/rejected": -0.174072265625, "step": 10470 }, { "epoch": 0.7958989937345737, "grad_norm": 2.1027696887832716, "learning_rate": 7.814646651645635e-07, "log_odds_chosen": 1.6904296875, "log_odds_ratio": -0.3837646543979645, "logits/chosen": -1.342187523841858, "logits/rejected": -1.0841796398162842, "logps/chosen": -0.7093750238418579, "logps/rejected": -1.992578148841858, "loss": 0.822, "nll_loss": 0.802539050579071, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.07095947116613388, "rewards/margins": 0.12825927138328552, "rewards/rejected": -0.19907227158546448, "step": 10480 }, { "epoch": 0.7966584393392824, "grad_norm": 2.89104218121431, "learning_rate": 7.810920955771586e-07, "log_odds_chosen": 1.53271484375, "log_odds_ratio": -0.4212402403354645, "logits/chosen": -1.255468726158142, "logits/rejected": -1.0880858898162842, "logps/chosen": -0.693554699420929, "logps/rejected": -1.819921851158142, "loss": 0.8295, "nll_loss": 0.875195324420929, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.06931152194738388, "rewards/margins": 0.11258240044116974, "rewards/rejected": -0.181884765625, "step": 10490 }, { "epoch": 0.7974178849439909, "grad_norm": 1.8677128000732595, "learning_rate": 7.807200583588266e-07, "log_odds_chosen": 1.7722656726837158, "log_odds_ratio": -0.3912353515625, "logits/chosen": -1.232031226158142, "logits/rejected": -1.054101586341858, "logps/chosen": -0.711718738079071, "logps/rejected": -2.0023436546325684, "loss": 0.8357, "nll_loss": 0.7699218988418579, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.07114257663488388, "rewards/margins": 0.12906494736671448, "rewards/rejected": -0.2001953125, "step": 10500 }, { "epoch": 0.7981773305486994, "grad_norm": 1.9144386923323709, "learning_rate": 7.803485522429261e-07, "log_odds_chosen": 1.681640625, "log_odds_ratio": -0.39677733182907104, "logits/chosen": -1.212304711341858, "logits/rejected": -1.125585913658142, "logps/chosen": -0.665820300579071, "logps/rejected": -1.907812476158142, "loss": 0.8092, "nll_loss": 0.7520507574081421, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06658935546875, "rewards/margins": 0.12430419772863388, "rewards/rejected": -0.19072265923023224, "step": 10510 }, { "epoch": 0.798936776153408, "grad_norm": 1.5894734400551145, "learning_rate": 7.799775759670318e-07, "log_odds_chosen": 1.7138671875, "log_odds_ratio": -0.39423829317092896, "logits/chosen": -1.2160155773162842, "logits/rejected": -1.057031273841858, "logps/chosen": -0.731249988079071, "logps/rejected": -2.0316405296325684, "loss": 0.8192, "nll_loss": 0.8042968511581421, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.07310791313648224, "rewards/margins": 0.12984618544578552, "rewards/rejected": -0.2030029296875, "step": 10520 }, { "epoch": 0.7996962217581166, "grad_norm": 1.935720627419702, "learning_rate": 7.796071282729149e-07, "log_odds_chosen": 1.7361328601837158, "log_odds_ratio": -0.37480467557907104, "logits/chosen": -1.281835913658142, "logits/rejected": -1.1365234851837158, "logps/chosen": -0.6617187261581421, "logps/rejected": -1.9308593273162842, "loss": 0.8488, "nll_loss": 0.8036133050918579, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06627197563648224, "rewards/margins": 0.12697753310203552, "rewards/rejected": -0.19318847358226776, "step": 10530 }, { "epoch": 0.8004556673628251, "grad_norm": 2.208444278087247, "learning_rate": 7.792372079065259e-07, "log_odds_chosen": 1.718164086341858, "log_odds_ratio": -0.3453613221645355, "logits/chosen": -1.1689453125, "logits/rejected": -0.9800781011581421, "logps/chosen": -0.717480480670929, "logps/rejected": -1.94921875, "loss": 0.8462, "nll_loss": 0.8076171875, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -0.07176513969898224, "rewards/margins": 0.12319336086511612, "rewards/rejected": -0.19487304985523224, "step": 10540 }, { "epoch": 0.8012151129675337, "grad_norm": 2.5106676889972994, "learning_rate": 7.788678136179767e-07, "log_odds_chosen": 1.494140625, "log_odds_ratio": -0.397705078125, "logits/chosen": -1.298242211341858, "logits/rejected": -1.135351538658142, "logps/chosen": -0.6602538824081421, "logps/rejected": -1.701171875, "loss": 0.8049, "nll_loss": 0.728710949420929, "rewards/accuracies": 0.8125, "rewards/chosen": -0.06600341945886612, "rewards/margins": 0.10419921576976776, "rewards/rejected": -0.17014160752296448, "step": 10550 }, { "epoch": 0.8019745585722423, "grad_norm": 1.6643149823507335, "learning_rate": 7.78498944161523e-07, "log_odds_chosen": 1.372314453125, "log_odds_ratio": -0.4178222715854645, "logits/chosen": -1.3388671875, "logits/rejected": -1.1572265625, "logps/chosen": -0.6729491949081421, "logps/rejected": -1.6267578601837158, "loss": 0.8198, "nll_loss": 0.8199218511581421, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.0672607421875, "rewards/margins": 0.09538574516773224, "rewards/rejected": -0.16262206435203552, "step": 10560 }, { "epoch": 0.8027340041769508, "grad_norm": 1.9774086281039132, "learning_rate": 7.781305982955459e-07, "log_odds_chosen": 1.495507836341858, "log_odds_ratio": -0.37202149629592896, "logits/chosen": -1.310937523841858, "logits/rejected": -1.091210961341858, "logps/chosen": -0.6629883050918579, "logps/rejected": -1.6945312023162842, "loss": 0.8081, "nll_loss": 0.696972668170929, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.06633301079273224, "rewards/margins": 0.10322265326976776, "rewards/rejected": -0.16945800185203552, "step": 10570 }, { "epoch": 0.8034934497816594, "grad_norm": 1.9013586835862928, "learning_rate": 7.777627747825355e-07, "log_odds_chosen": 1.478124976158142, "log_odds_ratio": -0.419921875, "logits/chosen": -1.3212890625, "logits/rejected": -1.1164062023162842, "logps/chosen": -0.704882800579071, "logps/rejected": -1.790624976158142, "loss": 0.822, "nll_loss": 0.822070300579071, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.07049560546875, "rewards/margins": 0.10870361328125, "rewards/rejected": -0.17912597954273224, "step": 10580 }, { "epoch": 0.8042528953863679, "grad_norm": 1.635759222740883, "learning_rate": 7.773954723890725e-07, "log_odds_chosen": 1.263769507408142, "log_odds_ratio": -0.4552246034145355, "logits/chosen": -1.2158203125, "logits/rejected": -1.0011718273162842, "logps/chosen": -0.7210937738418579, "logps/rejected": -1.652734398841858, "loss": 0.8102, "nll_loss": 0.8343750238418579, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.07208251953125, "rewards/margins": 0.09312744438648224, "rewards/rejected": -0.16538086533546448, "step": 10590 }, { "epoch": 0.8050123409910765, "grad_norm": 1.9510152114673742, "learning_rate": 7.770286898858113e-07, "log_odds_chosen": 1.5107421875, "log_odds_ratio": -0.374755859375, "logits/chosen": -1.2458984851837158, "logits/rejected": -1.071679711341858, "logps/chosen": -0.6376953125, "logps/rejected": -1.6611328125, "loss": 0.7884, "nll_loss": 0.739550769329071, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.06379394233226776, "rewards/margins": 0.10227050632238388, "rewards/rejected": -0.16618652641773224, "step": 10600 }, { "epoch": 0.8057717865957851, "grad_norm": 2.5235346168196062, "learning_rate": 7.766624260474625e-07, "log_odds_chosen": 1.72265625, "log_odds_ratio": -0.37348634004592896, "logits/chosen": -1.20703125, "logits/rejected": -1.021875023841858, "logps/chosen": -0.693652331829071, "logps/rejected": -1.9343750476837158, "loss": 0.8179, "nll_loss": 0.7789062261581421, "rewards/accuracies": 0.856249988079071, "rewards/chosen": -0.06940917670726776, "rewards/margins": 0.12397460639476776, "rewards/rejected": -0.1934814453125, "step": 10610 }, { "epoch": 0.8065312322004936, "grad_norm": 2.4963652712361424, "learning_rate": 7.762966796527759e-07, "log_odds_chosen": 1.749414086341858, "log_odds_ratio": -0.35371094942092896, "logits/chosen": -1.259179711341858, "logits/rejected": -1.064062476158142, "logps/chosen": -0.649218738079071, "logps/rejected": -1.9089844226837158, "loss": 0.8174, "nll_loss": 0.7134765386581421, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.06489257514476776, "rewards/margins": 0.12598876655101776, "rewards/rejected": -0.19091796875, "step": 10620 }, { "epoch": 0.8072906778052023, "grad_norm": 2.2901573486165763, "learning_rate": 7.759314494845234e-07, "log_odds_chosen": 1.461035132408142, "log_odds_ratio": -0.38569337129592896, "logits/chosen": -1.2527344226837158, "logits/rejected": -1.043359398841858, "logps/chosen": -0.753710925579071, "logps/rejected": -1.8015625476837158, "loss": 0.8019, "nll_loss": 0.7818359136581421, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.07535400241613388, "rewards/margins": 0.10472412407398224, "rewards/rejected": -0.18020018935203552, "step": 10630 }, { "epoch": 0.8080501234099108, "grad_norm": 2.23664836758176, "learning_rate": 7.755667343294812e-07, "log_odds_chosen": 1.7644531726837158, "log_odds_ratio": -0.36767578125, "logits/chosen": -1.2781250476837158, "logits/rejected": -1.0654296875, "logps/chosen": -0.6962890625, "logps/rejected": -2.0, "loss": 0.7889, "nll_loss": 0.7188476324081421, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.06966552883386612, "rewards/margins": 0.130340576171875, "rewards/rejected": -0.19992676377296448, "step": 10640 }, { "epoch": 0.8088095690146193, "grad_norm": 2.062899603235853, "learning_rate": 7.752025329784146e-07, "log_odds_chosen": 1.3190429210662842, "log_odds_ratio": -0.4269042909145355, "logits/chosen": -1.2156250476837158, "logits/rejected": -1.0720703601837158, "logps/chosen": -0.6783202886581421, "logps/rejected": -1.6300780773162842, "loss": 0.8357, "nll_loss": 0.8167968988418579, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.06784667819738388, "rewards/margins": 0.09515380859375, "rewards/rejected": -0.16289062798023224, "step": 10650 }, { "epoch": 0.8095690146193278, "grad_norm": 1.964950345534349, "learning_rate": 7.748388442260596e-07, "log_odds_chosen": 1.6256835460662842, "log_odds_ratio": -0.38671875, "logits/chosen": -1.1982421875, "logits/rejected": -1.073828101158142, "logps/chosen": -0.68115234375, "logps/rejected": -1.896875023841858, "loss": 0.8178, "nll_loss": 0.7313476800918579, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.06806640326976776, "rewards/margins": 0.12169799953699112, "rewards/rejected": -0.189697265625, "step": 10660 }, { "epoch": 0.8103284602240365, "grad_norm": 2.6358155729810546, "learning_rate": 7.744756668711065e-07, "log_odds_chosen": 1.4111328125, "log_odds_ratio": -0.443603515625, "logits/chosen": -1.2179687023162842, "logits/rejected": -1.083984375, "logps/chosen": -0.7369140386581421, "logps/rejected": -1.744531273841858, "loss": 0.8358, "nll_loss": 0.8509765863418579, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.07366943359375, "rewards/margins": 0.10068969428539276, "rewards/rejected": -0.17436523735523224, "step": 10670 }, { "epoch": 0.811087905828745, "grad_norm": 1.760869051853748, "learning_rate": 7.741129997161835e-07, "log_odds_chosen": 1.6129882335662842, "log_odds_ratio": -0.4383789002895355, "logits/chosen": -1.277929663658142, "logits/rejected": -1.060156226158142, "logps/chosen": -0.7364257574081421, "logps/rejected": -1.9792969226837158, "loss": 0.8065, "nll_loss": 0.78857421875, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.07373046875, "rewards/margins": 0.12413330376148224, "rewards/rejected": -0.19780273735523224, "step": 10680 }, { "epoch": 0.8118473514334535, "grad_norm": 2.4981069007084153, "learning_rate": 7.737508415678403e-07, "log_odds_chosen": 1.7058594226837158, "log_odds_ratio": -0.38176268339157104, "logits/chosen": -1.3615233898162842, "logits/rejected": -1.1505858898162842, "logps/chosen": -0.699902355670929, "logps/rejected": -1.933984398841858, "loss": 0.8031, "nll_loss": 0.800097644329071, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.07000732421875, "rewards/margins": 0.12347412109375, "rewards/rejected": -0.193359375, "step": 10690 }, { "epoch": 0.8126067970381622, "grad_norm": 2.159430092739579, "learning_rate": 7.733891912365308e-07, "log_odds_chosen": 1.435156226158142, "log_odds_ratio": -0.3873046934604645, "logits/chosen": -1.328125, "logits/rejected": -1.1279296875, "logps/chosen": -0.6664062738418579, "logps/rejected": -1.66796875, "loss": 0.8246, "nll_loss": 0.768359363079071, "rewards/accuracies": 0.8125, "rewards/chosen": -0.06655273586511612, "rewards/margins": 0.10013427585363388, "rewards/rejected": -0.16679687798023224, "step": 10700 }, { "epoch": 0.8133662426428707, "grad_norm": 1.8646633838019677, "learning_rate": 7.730280475365979e-07, "log_odds_chosen": 1.4558594226837158, "log_odds_ratio": -0.415283203125, "logits/chosen": -1.292578101158142, "logits/rejected": -1.130273461341858, "logps/chosen": -0.738476574420929, "logps/rejected": -1.7726562023162842, "loss": 0.8098, "nll_loss": 0.7733398675918579, "rewards/accuracies": 0.75, "rewards/chosen": -0.07382812350988388, "rewards/margins": 0.1033935546875, "rewards/rejected": -0.17729492485523224, "step": 10710 }, { "epoch": 0.8141256882475792, "grad_norm": 2.0937880043208215, "learning_rate": 7.726674092862557e-07, "log_odds_chosen": 1.589599609375, "log_odds_ratio": -0.425537109375, "logits/chosen": -1.267187476158142, "logits/rejected": -1.1222655773162842, "logps/chosen": -0.6761718988418579, "logps/rejected": -1.823828101158142, "loss": 0.797, "nll_loss": 0.7744140625, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.06765136867761612, "rewards/margins": 0.11472778022289276, "rewards/rejected": -0.18234863877296448, "step": 10720 }, { "epoch": 0.8148851338522878, "grad_norm": 1.7840128173712262, "learning_rate": 7.723072753075748e-07, "log_odds_chosen": 1.605078101158142, "log_odds_ratio": -0.36967772245407104, "logits/chosen": -1.355859398841858, "logits/rejected": -1.1789062023162842, "logps/chosen": -0.666796863079071, "logps/rejected": -1.800390601158142, "loss": 0.8377, "nll_loss": 0.76025390625, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06668701022863388, "rewards/margins": 0.11361084133386612, "rewards/rejected": -0.18020018935203552, "step": 10730 }, { "epoch": 0.8156445794569964, "grad_norm": 2.3834529700180895, "learning_rate": 7.719476444264649e-07, "log_odds_chosen": 1.6256835460662842, "log_odds_ratio": -0.3705078065395355, "logits/chosen": -1.2501952648162842, "logits/rejected": -1.118749976158142, "logps/chosen": -0.723925769329071, "logps/rejected": -1.9416015148162842, "loss": 0.8147, "nll_loss": 0.7466796636581421, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.07236327975988388, "rewards/margins": 0.12178955227136612, "rewards/rejected": -0.19414062798023224, "step": 10740 }, { "epoch": 0.816404025061705, "grad_norm": 2.2989690649655508, "learning_rate": 7.715885154726593e-07, "log_odds_chosen": 1.5558593273162842, "log_odds_ratio": -0.41997069120407104, "logits/chosen": -1.253515601158142, "logits/rejected": -1.0978515148162842, "logps/chosen": -0.6822265386581421, "logps/rejected": -1.814843773841858, "loss": 0.8306, "nll_loss": 0.8408203125, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.06815185397863388, "rewards/margins": 0.11319275200366974, "rewards/rejected": -0.1815185546875, "step": 10750 }, { "epoch": 0.8171634706664135, "grad_norm": 1.959009278262218, "learning_rate": 7.71229887279699e-07, "log_odds_chosen": 1.5510742664337158, "log_odds_ratio": -0.402587890625, "logits/chosen": -1.2041015625, "logits/rejected": -1.0324218273162842, "logps/chosen": -0.6532226800918579, "logps/rejected": -1.7335937023162842, "loss": 0.8338, "nll_loss": 0.78173828125, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.06534423679113388, "rewards/margins": 0.10796508938074112, "rewards/rejected": -0.17331543564796448, "step": 10760 }, { "epoch": 0.8179229162711221, "grad_norm": 2.0885374811301847, "learning_rate": 7.708717586849164e-07, "log_odds_chosen": 1.409277319908142, "log_odds_ratio": -0.39433592557907104, "logits/chosen": -1.3048827648162842, "logits/rejected": -1.1335937976837158, "logps/chosen": -0.694140613079071, "logps/rejected": -1.684960961341858, "loss": 0.8338, "nll_loss": 0.76318359375, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06940917670726776, "rewards/margins": 0.09907226264476776, "rewards/rejected": -0.1685791015625, "step": 10770 }, { "epoch": 0.8186823618758307, "grad_norm": 2.312827478707372, "learning_rate": 7.705141285294196e-07, "log_odds_chosen": 1.50390625, "log_odds_ratio": -0.4205078184604645, "logits/chosen": -1.3263671398162842, "logits/rejected": -1.1453125476837158, "logps/chosen": -0.702929675579071, "logps/rejected": -1.826562523841858, "loss": 0.8182, "nll_loss": 0.830859363079071, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.07028808444738388, "rewards/margins": 0.1124267578125, "rewards/rejected": -0.18254394829273224, "step": 10780 }, { "epoch": 0.8194418074805392, "grad_norm": 1.8531635133081321, "learning_rate": 7.701569956580767e-07, "log_odds_chosen": 1.559545874595642, "log_odds_ratio": -0.40776365995407104, "logits/chosen": -1.312109351158142, "logits/rejected": -1.1339843273162842, "logps/chosen": -0.6805664300918579, "logps/rejected": -1.795312523841858, "loss": 0.8407, "nll_loss": 0.7994140386581421, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.068115234375, "rewards/margins": 0.11136779934167862, "rewards/rejected": -0.17958983778953552, "step": 10790 }, { "epoch": 0.8202012530852477, "grad_norm": 1.8970354254240354, "learning_rate": 7.69800358919501e-07, "log_odds_chosen": 1.337646484375, "log_odds_ratio": -0.45068359375, "logits/chosen": -1.296875, "logits/rejected": -1.1447265148162842, "logps/chosen": -0.681933581829071, "logps/rejected": -1.6345703601837158, "loss": 0.832, "nll_loss": 0.8089843988418579, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.06815185397863388, "rewards/margins": 0.09517822414636612, "rewards/rejected": -0.1634521484375, "step": 10800 }, { "epoch": 0.8209606986899564, "grad_norm": 1.8112357714731788, "learning_rate": 7.694442171660333e-07, "log_odds_chosen": 1.627343773841858, "log_odds_ratio": -0.36372071504592896, "logits/chosen": -1.3330078125, "logits/rejected": -1.1242187023162842, "logps/chosen": -0.6143554449081421, "logps/rejected": -1.7462890148162842, "loss": 0.7628, "nll_loss": 0.684277355670929, "rewards/accuracies": 0.84375, "rewards/chosen": -0.0614013671875, "rewards/margins": 0.11328125, "rewards/rejected": -0.17463378608226776, "step": 10810 }, { "epoch": 0.8217201442946649, "grad_norm": 1.8196595961512592, "learning_rate": 7.690885692537282e-07, "log_odds_chosen": 1.482031226158142, "log_odds_ratio": -0.34882813692092896, "logits/chosen": -1.245703101158142, "logits/rejected": -1.0333983898162842, "logps/chosen": -0.7392578125, "logps/rejected": -1.7755858898162842, "loss": 0.8165, "nll_loss": 0.7979491949081421, "rewards/accuracies": 0.856249988079071, "rewards/chosen": -0.07398681342601776, "rewards/margins": 0.10355224460363388, "rewards/rejected": -0.17756347358226776, "step": 10820 }, { "epoch": 0.8224795898993734, "grad_norm": 2.5092282628883815, "learning_rate": 7.687334140423383e-07, "log_odds_chosen": 1.6103515625, "log_odds_ratio": -0.40644532442092896, "logits/chosen": -1.3849608898162842, "logits/rejected": -1.197265625, "logps/chosen": -0.7168945074081421, "logps/rejected": -1.9142577648162842, "loss": 0.8045, "nll_loss": 0.716015636920929, "rewards/accuracies": 0.78125, "rewards/chosen": -0.07169189304113388, "rewards/margins": 0.11986084282398224, "rewards/rejected": -0.19140625, "step": 10830 }, { "epoch": 0.8232390355040821, "grad_norm": 4.269745727764095, "learning_rate": 7.683787503952985e-07, "log_odds_chosen": 1.372802734375, "log_odds_ratio": -0.43413084745407104, "logits/chosen": -1.328125, "logits/rejected": -1.1203124523162842, "logps/chosen": -0.701171875, "logps/rejected": -1.6765625476837158, "loss": 0.8298, "nll_loss": 0.7953125238418579, "rewards/accuracies": 0.75, "rewards/chosen": -0.070068359375, "rewards/margins": 0.09747314453125, "rewards/rejected": -0.16745606064796448, "step": 10840 }, { "epoch": 0.8239984811087906, "grad_norm": 2.5529910928507635, "learning_rate": 7.680245771797108e-07, "log_odds_chosen": 1.866796851158142, "log_odds_ratio": -0.3421874940395355, "logits/chosen": -1.3259766101837158, "logits/rejected": -1.099023461341858, "logps/chosen": -0.664355456829071, "logps/rejected": -2.032031297683716, "loss": 0.8233, "nll_loss": 0.7744140625, "rewards/accuracies": 0.862500011920929, "rewards/chosen": -0.06644286960363388, "rewards/margins": 0.13673095405101776, "rewards/rejected": -0.20327147841453552, "step": 10850 }, { "epoch": 0.8247579267134991, "grad_norm": 1.844501596241338, "learning_rate": 7.676708932663293e-07, "log_odds_chosen": 1.5615234375, "log_odds_ratio": -0.41120606660842896, "logits/chosen": -1.222070336341858, "logits/rejected": -1.057226538658142, "logps/chosen": -0.692089855670929, "logps/rejected": -1.828515648841858, "loss": 0.8042, "nll_loss": 0.7835937738418579, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.06916503608226776, "rewards/margins": 0.11370849609375, "rewards/rejected": -0.18281249701976776, "step": 10860 }, { "epoch": 0.8255173723182077, "grad_norm": 1.643029420969285, "learning_rate": 7.67317697529545e-07, "log_odds_chosen": 1.706445336341858, "log_odds_ratio": -0.3582519590854645, "logits/chosen": -1.223242163658142, "logits/rejected": -1.041406273841858, "logps/chosen": -0.6705077886581421, "logps/rejected": -1.906640648841858, "loss": 0.8518, "nll_loss": 0.7708984613418579, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.0670166015625, "rewards/margins": 0.12369384616613388, "rewards/rejected": -0.19050292670726776, "step": 10870 }, { "epoch": 0.8262768179229163, "grad_norm": 2.818124042014355, "learning_rate": 7.669649888473704e-07, "log_odds_chosen": 1.981835961341858, "log_odds_ratio": -0.327392578125, "logits/chosen": -1.344140648841858, "logits/rejected": -1.1140625476837158, "logps/chosen": -0.6543945074081421, "logps/rejected": -2.141796827316284, "loss": 0.8122, "nll_loss": 0.685253918170929, "rewards/accuracies": 0.856249988079071, "rewards/chosen": -0.06549072265625, "rewards/margins": 0.14865723252296448, "rewards/rejected": -0.21413573622703552, "step": 10880 }, { "epoch": 0.8270362635276248, "grad_norm": 2.0194179278819226, "learning_rate": 7.666127661014253e-07, "log_odds_chosen": 1.54736328125, "log_odds_ratio": -0.3995605409145355, "logits/chosen": -1.255273461341858, "logits/rejected": -1.1082031726837158, "logps/chosen": -0.704394519329071, "logps/rejected": -1.812890648841858, "loss": 0.8319, "nll_loss": 0.817675769329071, "rewards/accuracies": 0.84375, "rewards/chosen": -0.07047118991613388, "rewards/margins": 0.11090087890625, "rewards/rejected": -0.181396484375, "step": 10890 }, { "epoch": 0.8277957091323334, "grad_norm": 2.686839012928601, "learning_rate": 7.662610281769211e-07, "log_odds_chosen": 1.5714843273162842, "log_odds_ratio": -0.3800292909145355, "logits/chosen": -1.22265625, "logits/rejected": -1.0763671398162842, "logps/chosen": -0.699414074420929, "logps/rejected": -1.835546851158142, "loss": 0.8122, "nll_loss": 0.716601550579071, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.06993408501148224, "rewards/margins": 0.11342773586511612, "rewards/rejected": -0.18344727158546448, "step": 10900 }, { "epoch": 0.828555154737042, "grad_norm": 1.9525949581125608, "learning_rate": 7.659097739626465e-07, "log_odds_chosen": 1.4021484851837158, "log_odds_ratio": -0.4317382872104645, "logits/chosen": -1.3562500476837158, "logits/rejected": -1.1437499523162842, "logps/chosen": -0.6958984136581421, "logps/rejected": -1.721289038658142, "loss": 0.7953, "nll_loss": 0.7841796875, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.06959228217601776, "rewards/margins": 0.10255737602710724, "rewards/rejected": -0.17209473252296448, "step": 10910 }, { "epoch": 0.8293146003417505, "grad_norm": 2.244421193235883, "learning_rate": 7.655590023509527e-07, "log_odds_chosen": 1.4833984375, "log_odds_ratio": -0.43916016817092896, "logits/chosen": -1.236718773841858, "logits/rejected": -1.0958983898162842, "logps/chosen": -0.7001953125, "logps/rejected": -1.791015625, "loss": 0.8297, "nll_loss": 0.8084961175918579, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.07009277492761612, "rewards/margins": 0.109130859375, "rewards/rejected": -0.17915038764476776, "step": 10920 }, { "epoch": 0.8300740459464591, "grad_norm": 1.5782492580666134, "learning_rate": 7.652087122377384e-07, "log_odds_chosen": 1.5190918445587158, "log_odds_ratio": -0.4065918028354645, "logits/chosen": -1.2685546875, "logits/rejected": -1.119531273841858, "logps/chosen": -0.6859375238418579, "logps/rejected": -1.7843749523162842, "loss": 0.7887, "nll_loss": 0.725390613079071, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.06861571967601776, "rewards/margins": 0.1100616455078125, "rewards/rejected": -0.17861327528953552, "step": 10930 }, { "epoch": 0.8308334915511677, "grad_norm": 9.764566043394517, "learning_rate": 7.648589025224355e-07, "log_odds_chosen": 1.4408690929412842, "log_odds_ratio": -0.40458983182907104, "logits/chosen": -1.17578125, "logits/rejected": -1.0349609851837158, "logps/chosen": -0.7237304449081421, "logps/rejected": -1.75390625, "loss": 0.8382, "nll_loss": 0.8666015863418579, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.07235107570886612, "rewards/margins": 0.10300598293542862, "rewards/rejected": -0.17543944716453552, "step": 10940 }, { "epoch": 0.8315929371558762, "grad_norm": 1.7794190559851466, "learning_rate": 7.645095721079945e-07, "log_odds_chosen": 1.4606444835662842, "log_odds_ratio": -0.43598634004592896, "logits/chosen": -1.3234374523162842, "logits/rejected": -1.092382788658142, "logps/chosen": -0.6996093988418579, "logps/rejected": -1.7880859375, "loss": 0.8434, "nll_loss": 0.741992175579071, "rewards/accuracies": 0.78125, "rewards/chosen": -0.07000732421875, "rewards/margins": 0.10894165188074112, "rewards/rejected": -0.17890624701976776, "step": 10950 }, { "epoch": 0.8323523827605848, "grad_norm": 2.039458709885709, "learning_rate": 7.641607199008701e-07, "log_odds_chosen": 1.770898461341858, "log_odds_ratio": -0.35655516386032104, "logits/chosen": -1.4119141101837158, "logits/rejected": -1.2185547351837158, "logps/chosen": -0.6435546875, "logps/rejected": -1.912695288658142, "loss": 0.8259, "nll_loss": 0.7655273675918579, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -0.06439208984375, "rewards/margins": 0.12700195610523224, "rewards/rejected": -0.1912841796875, "step": 10960 }, { "epoch": 0.8331118283652933, "grad_norm": 2.0095558068168624, "learning_rate": 7.638123448110066e-07, "log_odds_chosen": 1.7041015625, "log_odds_ratio": -0.3779296875, "logits/chosen": -1.304101586341858, "logits/rejected": -1.140039086341858, "logps/chosen": -0.6766601800918579, "logps/rejected": -1.88671875, "loss": 0.8085, "nll_loss": 0.748242199420929, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.06766357272863388, "rewards/margins": 0.120849609375, "rewards/rejected": -0.18852539360523224, "step": 10970 }, { "epoch": 0.8338712739700019, "grad_norm": 1.9843242183544112, "learning_rate": 7.634644457518242e-07, "log_odds_chosen": 1.6984374523162842, "log_odds_ratio": -0.34467774629592896, "logits/chosen": -1.352148413658142, "logits/rejected": -1.1208984851837158, "logps/chosen": -0.662792980670929, "logps/rejected": -1.853515625, "loss": 0.8372, "nll_loss": 0.7793945074081421, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -0.06623534858226776, "rewards/margins": 0.119140625, "rewards/rejected": -0.18535156548023224, "step": 10980 }, { "epoch": 0.8346307195747105, "grad_norm": 2.005129384233812, "learning_rate": 7.631170216402039e-07, "log_odds_chosen": 1.4904296398162842, "log_odds_ratio": -0.36455076932907104, "logits/chosen": -1.266015648841858, "logits/rejected": -1.0763671398162842, "logps/chosen": -0.659960925579071, "logps/rejected": -1.728906273841858, "loss": 0.822, "nll_loss": 0.798144519329071, "rewards/accuracies": 0.856249988079071, "rewards/chosen": -0.06591796875, "rewards/margins": 0.10688476264476776, "rewards/rejected": -0.17292480170726776, "step": 10990 }, { "epoch": 0.835390165179419, "grad_norm": 2.1427757003517054, "learning_rate": 7.627700713964739e-07, "log_odds_chosen": 1.841210961341858, "log_odds_ratio": -0.35432130098342896, "logits/chosen": -1.234375, "logits/rejected": -1.029687523841858, "logps/chosen": -0.6708984375, "logps/rejected": -2.048632860183716, "loss": 0.8299, "nll_loss": 0.7640625238418579, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.06702880561351776, "rewards/margins": 0.13785400986671448, "rewards/rejected": -0.20478515326976776, "step": 11000 }, { "epoch": 0.8361496107841276, "grad_norm": 1.9610937933907204, "learning_rate": 7.624235939443953e-07, "log_odds_chosen": 1.3707275390625, "log_odds_ratio": -0.43115234375, "logits/chosen": -1.239648461341858, "logits/rejected": -1.177734375, "logps/chosen": -0.7173827886581421, "logps/rejected": -1.6882812976837158, "loss": 0.8154, "nll_loss": 0.7720702886581421, "rewards/accuracies": 0.78125, "rewards/chosen": -0.07172851264476776, "rewards/margins": 0.09705810248851776, "rewards/rejected": -0.16877441108226776, "step": 11010 }, { "epoch": 0.8369090563888362, "grad_norm": 2.6461291108806178, "learning_rate": 7.620775882111482e-07, "log_odds_chosen": 1.50927734375, "log_odds_ratio": -0.3717285096645355, "logits/chosen": -1.322656273841858, "logits/rejected": -1.1328125, "logps/chosen": -0.6646484136581421, "logps/rejected": -1.7195312976837158, "loss": 0.8192, "nll_loss": 0.713183581829071, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.06646728515625, "rewards/margins": 0.10549316555261612, "rewards/rejected": -0.17207030951976776, "step": 11020 }, { "epoch": 0.8376685019935447, "grad_norm": 1.8406991670621995, "learning_rate": 7.617320531273181e-07, "log_odds_chosen": 1.497778296470642, "log_odds_ratio": -0.432373046875, "logits/chosen": -1.253515601158142, "logits/rejected": -1.0947265625, "logps/chosen": -0.66162109375, "logps/rejected": -1.7361328601837158, "loss": 0.8224, "nll_loss": 0.7978515625, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.066162109375, "rewards/margins": 0.10738830268383026, "rewards/rejected": -0.173583984375, "step": 11030 }, { "epoch": 0.8384279475982532, "grad_norm": 2.1983646954219465, "learning_rate": 7.613869876268809e-07, "log_odds_chosen": 1.5805175304412842, "log_odds_ratio": -0.4537109434604645, "logits/chosen": -1.2791016101837158, "logits/rejected": -1.0701172351837158, "logps/chosen": -0.7582031488418579, "logps/rejected": -1.935937523841858, "loss": 0.7996, "nll_loss": 0.775683581829071, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.07583007961511612, "rewards/margins": 0.11785583198070526, "rewards/rejected": -0.19370117783546448, "step": 11040 }, { "epoch": 0.8391873932029619, "grad_norm": 2.057667973541465, "learning_rate": 7.610423906471905e-07, "log_odds_chosen": 1.751367211341858, "log_odds_ratio": -0.4207519590854645, "logits/chosen": -1.173242211341858, "logits/rejected": -1.0910155773162842, "logps/chosen": -0.6815429925918579, "logps/rejected": -1.9812500476837158, "loss": 0.8122, "nll_loss": 0.785351574420929, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.068115234375, "rewards/margins": 0.12998047471046448, "rewards/rejected": -0.19809570908546448, "step": 11050 }, { "epoch": 0.8399468388076704, "grad_norm": 2.016305414572827, "learning_rate": 7.606982611289639e-07, "log_odds_chosen": 1.7158081531524658, "log_odds_ratio": -0.3609375059604645, "logits/chosen": -1.4406249523162842, "logits/rejected": -1.166601538658142, "logps/chosen": -0.6558593511581421, "logps/rejected": -1.885351538658142, "loss": 0.8146, "nll_loss": 0.759570300579071, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.06560058891773224, "rewards/margins": 0.12304077297449112, "rewards/rejected": -0.18869629502296448, "step": 11060 }, { "epoch": 0.8407062844123789, "grad_norm": 1.9695488049291192, "learning_rate": 7.60354598016268e-07, "log_odds_chosen": 1.42578125, "log_odds_ratio": -0.40532225370407104, "logits/chosen": -1.3095703125, "logits/rejected": -1.0849609375, "logps/chosen": -0.7445312738418579, "logps/rejected": -1.789453148841858, "loss": 0.8388, "nll_loss": 0.8480468988418579, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.07452392578125, "rewards/margins": 0.10444946587085724, "rewards/rejected": -0.1788330078125, "step": 11070 }, { "epoch": 0.8414657300170876, "grad_norm": 2.2460268664922047, "learning_rate": 7.600114002565063e-07, "log_odds_chosen": 1.3542969226837158, "log_odds_ratio": -0.455322265625, "logits/chosen": -1.3193359375, "logits/rejected": -1.201562523841858, "logps/chosen": -0.667187511920929, "logps/rejected": -1.6359374523162842, "loss": 0.8219, "nll_loss": 0.843945324420929, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.06669922173023224, "rewards/margins": 0.09682311862707138, "rewards/rejected": -0.16350097954273224, "step": 11080 }, { "epoch": 0.8422251756217961, "grad_norm": 1.6253079511863349, "learning_rate": 7.596686668004049e-07, "log_odds_chosen": 1.557031273841858, "log_odds_ratio": -0.40234375, "logits/chosen": -1.3162109851837158, "logits/rejected": -1.109960913658142, "logps/chosen": -0.6732422113418579, "logps/rejected": -1.798828125, "loss": 0.8059, "nll_loss": 0.7557617425918579, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.06730957329273224, "rewards/margins": 0.11260986328125, "rewards/rejected": -0.17998047173023224, "step": 11090 }, { "epoch": 0.8429846212265046, "grad_norm": 1.9227075683974308, "learning_rate": 7.593263966019991e-07, "log_odds_chosen": 1.721582055091858, "log_odds_ratio": -0.3907226622104645, "logits/chosen": -1.290624976158142, "logits/rejected": -1.0759766101837158, "logps/chosen": -0.662304699420929, "logps/rejected": -1.8781249523162842, "loss": 0.8139, "nll_loss": 0.717968761920929, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.066162109375, "rewards/margins": 0.12158203125, "rewards/rejected": -0.18779297173023224, "step": 11100 }, { "epoch": 0.8437440668312132, "grad_norm": 2.3408562055689273, "learning_rate": 7.589845886186201e-07, "log_odds_chosen": 1.4849121570587158, "log_odds_ratio": -0.41948240995407104, "logits/chosen": -1.37109375, "logits/rejected": -1.1052734851837158, "logps/chosen": -0.722460925579071, "logps/rejected": -1.8396484851837158, "loss": 0.8108, "nll_loss": 0.8248046636581421, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.07218017429113388, "rewards/margins": 0.11176757514476776, "rewards/rejected": -0.18403320014476776, "step": 11110 }, { "epoch": 0.8445035124359218, "grad_norm": 1.9533293261580387, "learning_rate": 7.586432418108816e-07, "log_odds_chosen": 1.635351538658142, "log_odds_ratio": -0.4120117127895355, "logits/chosen": -1.257421851158142, "logits/rejected": -1.150781273841858, "logps/chosen": -0.693554699420929, "logps/rejected": -1.904882788658142, "loss": 0.8355, "nll_loss": 0.755664050579071, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.06939697265625, "rewards/margins": 0.12110595405101776, "rewards/rejected": -0.19040527939796448, "step": 11120 }, { "epoch": 0.8452629580406303, "grad_norm": 2.208359334093822, "learning_rate": 7.583023551426664e-07, "log_odds_chosen": 1.893652319908142, "log_odds_ratio": -0.3508544862270355, "logits/chosen": -1.242578148841858, "logits/rejected": -1.0734374523162842, "logps/chosen": -0.6958984136581421, "logps/rejected": -2.091796875, "loss": 0.8217, "nll_loss": 0.7796875238418579, "rewards/accuracies": 0.8125, "rewards/chosen": -0.06960449367761612, "rewards/margins": 0.13968506455421448, "rewards/rejected": -0.20900878310203552, "step": 11130 }, { "epoch": 0.8460224036453389, "grad_norm": 2.0049546847025574, "learning_rate": 7.579619275811138e-07, "log_odds_chosen": 1.61328125, "log_odds_ratio": -0.389892578125, "logits/chosen": -1.2443358898162842, "logits/rejected": -1.094140648841858, "logps/chosen": -0.685546875, "logps/rejected": -1.8624999523162842, "loss": 0.8241, "nll_loss": 0.8046875, "rewards/accuracies": 0.78125, "rewards/chosen": -0.06853027641773224, "rewards/margins": 0.11773376166820526, "rewards/rejected": -0.18637695908546448, "step": 11140 }, { "epoch": 0.8467818492500475, "grad_norm": 1.7497887398360827, "learning_rate": 7.576219580966055e-07, "log_odds_chosen": 1.761438012123108, "log_odds_ratio": -0.3461669981479645, "logits/chosen": -1.2619140148162842, "logits/rejected": -1.147851586341858, "logps/chosen": -0.65966796875, "logps/rejected": -1.9519531726837158, "loss": 0.8037, "nll_loss": 0.729296863079071, "rewards/accuracies": 0.856249988079071, "rewards/chosen": -0.06595458835363388, "rewards/margins": 0.12940827012062073, "rewards/rejected": -0.19545897841453552, "step": 11150 }, { "epoch": 0.847541294854756, "grad_norm": 1.761036904132655, "learning_rate": 7.57282445662753e-07, "log_odds_chosen": 1.3245728015899658, "log_odds_ratio": -0.43168944120407104, "logits/chosen": -1.4113280773162842, "logits/rejected": -1.222070336341858, "logps/chosen": -0.69580078125, "logps/rejected": -1.5935547351837158, "loss": 0.8196, "nll_loss": 0.7646484375, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.06953124701976776, "rewards/margins": 0.08977661281824112, "rewards/rejected": -0.15937499701976776, "step": 11160 }, { "epoch": 0.8483007404594646, "grad_norm": 2.280271686099706, "learning_rate": 7.569433892563852e-07, "log_odds_chosen": 1.5837891101837158, "log_odds_ratio": -0.3680175840854645, "logits/chosen": -1.171289086341858, "logits/rejected": -1.065039038658142, "logps/chosen": -0.67431640625, "logps/rejected": -1.792578101158142, "loss": 0.8433, "nll_loss": 0.8880859613418579, "rewards/accuracies": 0.84375, "rewards/chosen": -0.06743164360523224, "rewards/margins": 0.11176757514476776, "rewards/rejected": -0.17924804985523224, "step": 11170 }, { "epoch": 0.8490601860641731, "grad_norm": 2.1135674023305406, "learning_rate": 7.566047878575343e-07, "log_odds_chosen": 1.6154296398162842, "log_odds_ratio": -0.3807617127895355, "logits/chosen": -1.26953125, "logits/rejected": -1.121679663658142, "logps/chosen": -0.696582019329071, "logps/rejected": -1.840234398841858, "loss": 0.7926, "nll_loss": 0.801562488079071, "rewards/accuracies": 0.8125, "rewards/chosen": -0.06965331733226776, "rewards/margins": 0.11424560844898224, "rewards/rejected": -0.18403320014476776, "step": 11180 }, { "epoch": 0.8498196316688817, "grad_norm": 1.945724099238978, "learning_rate": 7.562666404494236e-07, "log_odds_chosen": 1.5110352039337158, "log_odds_ratio": -0.41645509004592896, "logits/chosen": -1.2726562023162842, "logits/rejected": -1.154882788658142, "logps/chosen": -0.7152343988418579, "logps/rejected": -1.8425781726837158, "loss": 0.8093, "nll_loss": 0.802539050579071, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.07147216796875, "rewards/margins": 0.11285400390625, "rewards/rejected": -0.18430176377296448, "step": 11190 }, { "epoch": 0.8505790772735903, "grad_norm": 2.6008887031908197, "learning_rate": 7.559289460184543e-07, "log_odds_chosen": 1.787500023841858, "log_odds_ratio": -0.35053712129592896, "logits/chosen": -1.355859398841858, "logits/rejected": -1.1728515625, "logps/chosen": -0.63525390625, "logps/rejected": -1.888671875, "loss": 0.7975, "nll_loss": 0.690722644329071, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -0.063507080078125, "rewards/margins": 0.12554931640625, "rewards/rejected": -0.1888427734375, "step": 11200 }, { "epoch": 0.8513385228782988, "grad_norm": 2.3786339712326763, "learning_rate": 7.555917035541937e-07, "log_odds_chosen": 1.567968726158142, "log_odds_ratio": -0.396240234375, "logits/chosen": -1.3125, "logits/rejected": -1.1189453601837158, "logps/chosen": -0.691210925579071, "logps/rejected": -1.850000023841858, "loss": 0.7898, "nll_loss": 0.7494140863418579, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.06911621242761612, "rewards/margins": 0.11590576171875, "rewards/rejected": -0.18491211533546448, "step": 11210 }, { "epoch": 0.8520979684830075, "grad_norm": 1.9146147819086947, "learning_rate": 7.552549120493609e-07, "log_odds_chosen": 1.6541016101837158, "log_odds_ratio": -0.389892578125, "logits/chosen": -1.3425781726837158, "logits/rejected": -1.148046851158142, "logps/chosen": -0.729296863079071, "logps/rejected": -1.9626953601837158, "loss": 0.8149, "nll_loss": 0.787792980670929, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.07290039211511612, "rewards/margins": 0.12343750149011612, "rewards/rejected": -0.19624023139476776, "step": 11220 }, { "epoch": 0.852857414087716, "grad_norm": 2.9043219257690778, "learning_rate": 7.549185704998158e-07, "log_odds_chosen": 1.6692383289337158, "log_odds_ratio": -0.4144531190395355, "logits/chosen": -1.2980468273162842, "logits/rejected": -1.115820288658142, "logps/chosen": -0.719042956829071, "logps/rejected": -1.9128906726837158, "loss": 0.7782, "nll_loss": 0.727246105670929, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.07192382961511612, "rewards/margins": 0.119384765625, "rewards/rejected": -0.19118651747703552, "step": 11230 }, { "epoch": 0.8536168596924245, "grad_norm": 2.3336198209881394, "learning_rate": 7.545826779045449e-07, "log_odds_chosen": 1.462304711341858, "log_odds_ratio": -0.41533201932907104, "logits/chosen": -1.296484351158142, "logits/rejected": -1.087499976158142, "logps/chosen": -0.704394519329071, "logps/rejected": -1.7492187023162842, "loss": 0.8417, "nll_loss": 0.842578113079071, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.07041015475988388, "rewards/margins": 0.10434570163488388, "rewards/rejected": -0.1749267578125, "step": 11240 }, { "epoch": 0.854376305297133, "grad_norm": 2.938195498795707, "learning_rate": 7.542472332656506e-07, "log_odds_chosen": 1.5173828601837158, "log_odds_ratio": -0.4122070372104645, "logits/chosen": -1.1865234375, "logits/rejected": -1.014062523841858, "logps/chosen": -0.66943359375, "logps/rejected": -1.736914038658142, "loss": 0.809, "nll_loss": 0.712207019329071, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06695556640625, "rewards/margins": 0.10684814304113388, "rewards/rejected": -0.17368164658546448, "step": 11250 }, { "epoch": 0.8551357509018417, "grad_norm": 1.881615977470265, "learning_rate": 7.539122355883373e-07, "log_odds_chosen": 1.574609398841858, "log_odds_ratio": -0.39213865995407104, "logits/chosen": -1.2121093273162842, "logits/rejected": -1.056054711341858, "logps/chosen": -0.691210925579071, "logps/rejected": -1.841406226158142, "loss": 0.7865, "nll_loss": 0.8326171636581421, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.06911621242761612, "rewards/margins": 0.11501464992761612, "rewards/rejected": -0.18403320014476776, "step": 11260 }, { "epoch": 0.8558951965065502, "grad_norm": 3.03668071934163, "learning_rate": 7.535776838808995e-07, "log_odds_chosen": 1.8955078125, "log_odds_ratio": -0.35187989473342896, "logits/chosen": -1.2775390148162842, "logits/rejected": -1.067773461341858, "logps/chosen": -0.677441418170929, "logps/rejected": -2.0859375, "loss": 0.8027, "nll_loss": 0.7601562738418579, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -0.06773681938648224, "rewards/margins": 0.14083251357078552, "rewards/rejected": -0.20859375596046448, "step": 11270 }, { "epoch": 0.8566546421112587, "grad_norm": 2.0649439183095595, "learning_rate": 7.532435771547094e-07, "log_odds_chosen": 1.606835961341858, "log_odds_ratio": -0.37153321504592896, "logits/chosen": -1.2521483898162842, "logits/rejected": -1.1003906726837158, "logps/chosen": -0.637011706829071, "logps/rejected": -1.7429687976837158, "loss": 0.8039, "nll_loss": 0.746289074420929, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.06365966796875, "rewards/margins": 0.110595703125, "rewards/rejected": -0.17414550483226776, "step": 11280 }, { "epoch": 0.8574140877159674, "grad_norm": 2.4286201353412182, "learning_rate": 7.52909914424205e-07, "log_odds_chosen": 1.664648413658142, "log_odds_ratio": -0.384033203125, "logits/chosen": -1.2900390625, "logits/rejected": -1.1521484851837158, "logps/chosen": -0.6976562738418579, "logps/rejected": -1.913476586341858, "loss": 0.814, "nll_loss": 0.7925781011581421, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.06975097954273224, "rewards/margins": 0.12153320014476776, "rewards/rejected": -0.19123534858226776, "step": 11290 }, { "epoch": 0.8581735333206759, "grad_norm": 2.223294426884464, "learning_rate": 7.525766947068777e-07, "log_odds_chosen": 1.7355468273162842, "log_odds_ratio": -0.4034667909145355, "logits/chosen": -1.2531249523162842, "logits/rejected": -1.1326172351837158, "logps/chosen": -0.724316418170929, "logps/rejected": -2.010546922683716, "loss": 0.8253, "nll_loss": 0.762499988079071, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.0723876953125, "rewards/margins": 0.12865599989891052, "rewards/rejected": -0.200927734375, "step": 11300 }, { "epoch": 0.8589329789253844, "grad_norm": 2.686165676761099, "learning_rate": 7.522439170232598e-07, "log_odds_chosen": 1.7537109851837158, "log_odds_ratio": -0.37958985567092896, "logits/chosen": -1.2208983898162842, "logits/rejected": -1.048437476158142, "logps/chosen": -0.68310546875, "logps/rejected": -1.9773437976837158, "loss": 0.8228, "nll_loss": 0.744433581829071, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.06838379055261612, "rewards/margins": 0.12932129204273224, "rewards/rejected": -0.1976318359375, "step": 11310 }, { "epoch": 0.859692424530093, "grad_norm": 2.2869931861001653, "learning_rate": 7.519115803969124e-07, "log_odds_chosen": 1.787500023841858, "log_odds_ratio": -0.37419432401657104, "logits/chosen": -1.2394530773162842, "logits/rejected": -1.087890625, "logps/chosen": -0.679882824420929, "logps/rejected": -2.016406297683716, "loss": 0.822, "nll_loss": 0.804492175579071, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.06800536811351776, "rewards/margins": 0.13347168266773224, "rewards/rejected": -0.20156249403953552, "step": 11320 }, { "epoch": 0.8604518701348016, "grad_norm": 2.1598189465592914, "learning_rate": 7.515796838544139e-07, "log_odds_chosen": 1.7355468273162842, "log_odds_ratio": -0.3515380918979645, "logits/chosen": -1.221289038658142, "logits/rejected": -1.090234398841858, "logps/chosen": -0.6781250238418579, "logps/rejected": -1.9093749523162842, "loss": 0.7978, "nll_loss": 0.701855480670929, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.06783447414636612, "rewards/margins": 0.1229248046875, "rewards/rejected": -0.19086913764476776, "step": 11330 }, { "epoch": 0.8612113157395102, "grad_norm": 2.3830961209034265, "learning_rate": 7.51248226425348e-07, "log_odds_chosen": 1.6960937976837158, "log_odds_ratio": -0.37128907442092896, "logits/chosen": -1.3720703125, "logits/rejected": -1.187890648841858, "logps/chosen": -0.698535144329071, "logps/rejected": -1.93359375, "loss": 0.8288, "nll_loss": 0.7759765386581421, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06988525390625, "rewards/margins": 0.12349853664636612, "rewards/rejected": -0.19340820610523224, "step": 11340 }, { "epoch": 0.8619707613442187, "grad_norm": 2.2498584051697, "learning_rate": 7.509172071422913e-07, "log_odds_chosen": 1.437402367591858, "log_odds_ratio": -0.47294920682907104, "logits/chosen": -1.191015601158142, "logits/rejected": -1.0966796875, "logps/chosen": -0.7391601800918579, "logps/rejected": -1.7937500476837158, "loss": 0.8432, "nll_loss": 0.8189452886581421, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.073974609375, "rewards/margins": 0.10553588718175888, "rewards/rejected": -0.17937012016773224, "step": 11350 }, { "epoch": 0.8627302069489273, "grad_norm": 1.8219492658332292, "learning_rate": 7.505866250408015e-07, "log_odds_chosen": 1.7267577648162842, "log_odds_ratio": -0.35273438692092896, "logits/chosen": -1.3037109375, "logits/rejected": -1.0851562023162842, "logps/chosen": -0.711132824420929, "logps/rejected": -1.9890625476837158, "loss": 0.8307, "nll_loss": 0.8314453363418579, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -0.07105712592601776, "rewards/margins": 0.12797851860523224, "rewards/rejected": -0.19899901747703552, "step": 11360 }, { "epoch": 0.8634896525536359, "grad_norm": 2.2761318200653102, "learning_rate": 7.50256479159406e-07, "log_odds_chosen": 1.4787108898162842, "log_odds_ratio": -0.395751953125, "logits/chosen": -1.3212890625, "logits/rejected": -1.2068359851837158, "logps/chosen": -0.681835949420929, "logps/rejected": -1.7041015625, "loss": 0.8177, "nll_loss": 0.708789050579071, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -0.06815185397863388, "rewards/margins": 0.10216064751148224, "rewards/rejected": -0.17036132514476776, "step": 11370 }, { "epoch": 0.8642490981583444, "grad_norm": 2.380824562563092, "learning_rate": 7.499267685395902e-07, "log_odds_chosen": 1.6027343273162842, "log_odds_ratio": -0.3902831971645355, "logits/chosen": -1.267187476158142, "logits/rejected": -1.101171851158142, "logps/chosen": -0.6768554449081421, "logps/rejected": -1.820703148841858, "loss": 0.8053, "nll_loss": 0.7657226324081421, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -0.06766357272863388, "rewards/margins": 0.11441650241613388, "rewards/rejected": -0.18212890625, "step": 11380 }, { "epoch": 0.8650085437630529, "grad_norm": 1.6148030558296955, "learning_rate": 7.495974922257845e-07, "log_odds_chosen": 1.532812476158142, "log_odds_ratio": -0.4000000059604645, "logits/chosen": -1.290429711341858, "logits/rejected": -1.112695336341858, "logps/chosen": -0.662402331829071, "logps/rejected": -1.7316405773162842, "loss": 0.8149, "nll_loss": 0.752734363079071, "rewards/accuracies": 0.8125, "rewards/chosen": -0.06619872897863388, "rewards/margins": 0.10689697414636612, "rewards/rejected": -0.17307129502296448, "step": 11390 }, { "epoch": 0.8657679893677616, "grad_norm": 1.8617699155600196, "learning_rate": 7.492686492653552e-07, "log_odds_chosen": 1.3976562023162842, "log_odds_ratio": -0.42597657442092896, "logits/chosen": -1.33203125, "logits/rejected": -1.1681640148162842, "logps/chosen": -0.71875, "logps/rejected": -1.709375023841858, "loss": 0.7911, "nll_loss": 0.769726574420929, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.07186279445886612, "rewards/margins": 0.09897460788488388, "rewards/rejected": -0.17087402939796448, "step": 11400 }, { "epoch": 0.8665274349724701, "grad_norm": 1.9192009729649415, "learning_rate": 7.489402387085902e-07, "log_odds_chosen": 1.670312523841858, "log_odds_ratio": -0.37529295682907104, "logits/chosen": -1.2384765148162842, "logits/rejected": -1.0021483898162842, "logps/chosen": -0.707812488079071, "logps/rejected": -1.963281273841858, "loss": 0.8158, "nll_loss": 0.8134765625, "rewards/accuracies": 0.8125, "rewards/chosen": -0.07075195014476776, "rewards/margins": 0.12540283799171448, "rewards/rejected": -0.19614258408546448, "step": 11410 }, { "epoch": 0.8672868805771786, "grad_norm": 1.9846263193874394, "learning_rate": 7.486122596086891e-07, "log_odds_chosen": 1.473413109779358, "log_odds_ratio": -0.4854492247104645, "logits/chosen": -1.2429687976837158, "logits/rejected": -1.105859398841858, "logps/chosen": -0.7308593988418579, "logps/rejected": -1.7947266101837158, "loss": 0.8129, "nll_loss": 0.760058581829071, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.07304687798023224, "rewards/margins": 0.10632934421300888, "rewards/rejected": -0.17946776747703552, "step": 11420 }, { "epoch": 0.8680463261818873, "grad_norm": 2.656228424819449, "learning_rate": 7.482847110217516e-07, "log_odds_chosen": 1.450097680091858, "log_odds_ratio": -0.4153808653354645, "logits/chosen": -1.338281273841858, "logits/rejected": -1.190039038658142, "logps/chosen": -0.7002929449081421, "logps/rejected": -1.6892578601837158, "loss": 0.8139, "nll_loss": 0.7900390625, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.07001952826976776, "rewards/margins": 0.09885253757238388, "rewards/rejected": -0.1689453125, "step": 11430 }, { "epoch": 0.8688057717865958, "grad_norm": 2.394505840265801, "learning_rate": 7.479575920067657e-07, "log_odds_chosen": 1.6242187023162842, "log_odds_ratio": -0.41132813692092896, "logits/chosen": -1.2824218273162842, "logits/rejected": -1.144140601158142, "logps/chosen": -0.6973632574081421, "logps/rejected": -1.9005858898162842, "loss": 0.8257, "nll_loss": 0.770703136920929, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.0697021484375, "rewards/margins": 0.12011108547449112, "rewards/rejected": -0.18994140625, "step": 11440 }, { "epoch": 0.8695652173913043, "grad_norm": 1.87272772384297, "learning_rate": 7.476309016255964e-07, "log_odds_chosen": 1.729101538658142, "log_odds_ratio": -0.38041990995407104, "logits/chosen": -1.2449219226837158, "logits/rejected": -1.114648461341858, "logps/chosen": -0.633496105670929, "logps/rejected": -1.849218726158142, "loss": 0.7896, "nll_loss": 0.71826171875, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.06331787258386612, "rewards/margins": 0.12160644680261612, "rewards/rejected": -0.18505859375, "step": 11450 }, { "epoch": 0.870324662996013, "grad_norm": 2.202409222995104, "learning_rate": 7.473046389429744e-07, "log_odds_chosen": 1.5050780773162842, "log_odds_ratio": -0.37944334745407104, "logits/chosen": -1.3308594226837158, "logits/rejected": -1.1632812023162842, "logps/chosen": -0.6519531011581421, "logps/rejected": -1.7121093273162842, "loss": 0.7982, "nll_loss": 0.7904297113418579, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.06519775092601776, "rewards/margins": 0.10595703125, "rewards/rejected": -0.171142578125, "step": 11460 }, { "epoch": 0.8710841086007215, "grad_norm": 1.9807698207805806, "learning_rate": 7.469788030264852e-07, "log_odds_chosen": 1.5022461414337158, "log_odds_ratio": -0.39824217557907104, "logits/chosen": -1.4207031726837158, "logits/rejected": -1.164648413658142, "logps/chosen": -0.6922851800918579, "logps/rejected": -1.774999976158142, "loss": 0.7895, "nll_loss": 0.7459961175918579, "rewards/accuracies": 0.78125, "rewards/chosen": -0.06925048679113388, "rewards/margins": 0.10813598334789276, "rewards/rejected": -0.17751464247703552, "step": 11470 }, { "epoch": 0.87184355420543, "grad_norm": 1.992697412293918, "learning_rate": 7.466533929465574e-07, "log_odds_chosen": 1.6378905773162842, "log_odds_ratio": -0.38627928495407104, "logits/chosen": -1.4111328125, "logits/rejected": -1.2146484851837158, "logps/chosen": -0.657031238079071, "logps/rejected": -1.8039062023162842, "loss": 0.8106, "nll_loss": 0.720019519329071, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06562499701976776, "rewards/margins": 0.11472167819738388, "rewards/rejected": -0.18039551377296448, "step": 11480 }, { "epoch": 0.8726029998101386, "grad_norm": 1.7110427474696397, "learning_rate": 7.463284077764519e-07, "log_odds_chosen": 1.423925757408142, "log_odds_ratio": -0.44111329317092896, "logits/chosen": -1.1798827648162842, "logits/rejected": -1.0242187976837158, "logps/chosen": -0.6851562261581421, "logps/rejected": -1.702539086341858, "loss": 0.7992, "nll_loss": 0.748828113079071, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.06854248046875, "rewards/margins": 0.1016845703125, "rewards/rejected": -0.17033691704273224, "step": 11490 }, { "epoch": 0.8733624454148472, "grad_norm": 2.1275103271359606, "learning_rate": 7.460038465922511e-07, "log_odds_chosen": 1.518652319908142, "log_odds_ratio": -0.41157227754592896, "logits/chosen": -1.2501952648162842, "logits/rejected": -1.1144530773162842, "logps/chosen": -0.6333984136581421, "logps/rejected": -1.740625023841858, "loss": 0.8189, "nll_loss": 0.76220703125, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.06324462592601776, "rewards/margins": 0.1107177734375, "rewards/rejected": -0.17391356825828552, "step": 11500 }, { "epoch": 0.8741218910195557, "grad_norm": 1.7625070326176555, "learning_rate": 7.456797084728466e-07, "log_odds_chosen": 1.9289062023162842, "log_odds_ratio": -0.35502928495407104, "logits/chosen": -1.353515625, "logits/rejected": -1.1083984375, "logps/chosen": -0.7137695550918579, "logps/rejected": -2.164257764816284, "loss": 0.7969, "nll_loss": 0.797167956829071, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.07137451320886612, "rewards/margins": 0.14505615830421448, "rewards/rejected": -0.21645507216453552, "step": 11510 }, { "epoch": 0.8748813366242643, "grad_norm": 2.268541990817986, "learning_rate": 7.453559924999299e-07, "log_odds_chosen": 1.5872070789337158, "log_odds_ratio": -0.3995117247104645, "logits/chosen": -1.2492187023162842, "logits/rejected": -1.0388672351837158, "logps/chosen": -0.676562488079071, "logps/rejected": -1.845117211341858, "loss": 0.7832, "nll_loss": 0.759765625, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.06757812201976776, "rewards/margins": 0.11683349311351776, "rewards/rejected": -0.18442383408546448, "step": 11520 }, { "epoch": 0.8756407822289729, "grad_norm": 2.296770033684267, "learning_rate": 7.450326977579804e-07, "log_odds_chosen": 1.826757788658142, "log_odds_ratio": -0.368408203125, "logits/chosen": -1.3068358898162842, "logits/rejected": -1.1572265625, "logps/chosen": -0.6434570550918579, "logps/rejected": -1.997460961341858, "loss": 0.798, "nll_loss": 0.7538086175918579, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.06439208984375, "rewards/margins": 0.13536377251148224, "rewards/rejected": -0.1998291015625, "step": 11530 }, { "epoch": 0.8764002278336814, "grad_norm": 2.078521552779195, "learning_rate": 7.447098233342549e-07, "log_odds_chosen": 1.579492211341858, "log_odds_ratio": -0.39042967557907104, "logits/chosen": -1.286718726158142, "logits/rejected": -1.1150391101837158, "logps/chosen": -0.6573241949081421, "logps/rejected": -1.771093726158142, "loss": 0.8207, "nll_loss": 0.8011718988418579, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06574706733226776, "rewards/margins": 0.1114501953125, "rewards/rejected": -0.17719726264476776, "step": 11540 }, { "epoch": 0.87715967343839, "grad_norm": 2.2449616353500845, "learning_rate": 7.443873683187767e-07, "log_odds_chosen": 1.538964867591858, "log_odds_ratio": -0.39692384004592896, "logits/chosen": -1.2253906726837158, "logits/rejected": -0.971875011920929, "logps/chosen": -0.700390636920929, "logps/rejected": -1.819921851158142, "loss": 0.7995, "nll_loss": 0.789355456829071, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.06998290866613388, "rewards/margins": 0.11186523735523224, "rewards/rejected": -0.181884765625, "step": 11550 }, { "epoch": 0.8779191190430985, "grad_norm": 1.8281085330624094, "learning_rate": 7.440653318043245e-07, "log_odds_chosen": 1.72998046875, "log_odds_ratio": -0.3983398377895355, "logits/chosen": -1.2371094226837158, "logits/rejected": -1.0496094226837158, "logps/chosen": -0.67578125, "logps/rejected": -1.990234375, "loss": 0.7892, "nll_loss": 0.767871081829071, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.06761474907398224, "rewards/margins": 0.1314956694841385, "rewards/rejected": -0.19921875, "step": 11560 }, { "epoch": 0.8786785646478071, "grad_norm": 2.6828555965637504, "learning_rate": 7.437437128864224e-07, "log_odds_chosen": 1.458703637123108, "log_odds_ratio": -0.41650390625, "logits/chosen": -1.2892577648162842, "logits/rejected": -1.126367211341858, "logps/chosen": -0.712890625, "logps/rejected": -1.770898461341858, "loss": 0.7813, "nll_loss": 0.753710925579071, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.07124023139476776, "rewards/margins": 0.1057891845703125, "rewards/rejected": -0.17697754502296448, "step": 11570 }, { "epoch": 0.8794380102525157, "grad_norm": 2.10068043041169, "learning_rate": 7.434225106633287e-07, "log_odds_chosen": 1.7990233898162842, "log_odds_ratio": -0.36821287870407104, "logits/chosen": -1.313085913658142, "logits/rejected": -1.119726538658142, "logps/chosen": -0.6856445074081421, "logps/rejected": -2.022656202316284, "loss": 0.7996, "nll_loss": 0.783886730670929, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.06854248046875, "rewards/margins": 0.13359375298023224, "rewards/rejected": -0.20205077528953552, "step": 11580 }, { "epoch": 0.8801974558572242, "grad_norm": 2.218057636136923, "learning_rate": 7.431017242360253e-07, "log_odds_chosen": 1.916015625, "log_odds_ratio": -0.33649903535842896, "logits/chosen": -1.2169921398162842, "logits/rejected": -1.047460913658142, "logps/chosen": -0.66455078125, "logps/rejected": -2.1148438453674316, "loss": 0.8209, "nll_loss": 0.830078125, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.06643066555261612, "rewards/margins": 0.1448974609375, "rewards/rejected": -0.21137695014476776, "step": 11590 }, { "epoch": 0.8809569014619328, "grad_norm": 2.3687606452734253, "learning_rate": 7.427813527082074e-07, "log_odds_chosen": 1.6553955078125, "log_odds_ratio": -0.3995605409145355, "logits/chosen": -1.3185546398162842, "logits/rejected": -1.1064453125, "logps/chosen": -0.70166015625, "logps/rejected": -1.9099609851837158, "loss": 0.8143, "nll_loss": 0.7713867425918579, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.07022704929113388, "rewards/margins": 0.12092895805835724, "rewards/rejected": -0.19101563096046448, "step": 11600 }, { "epoch": 0.8817163470666414, "grad_norm": 2.3017407594396424, "learning_rate": 7.424613951862727e-07, "log_odds_chosen": 1.4977538585662842, "log_odds_ratio": -0.422119140625, "logits/chosen": -1.298828125, "logits/rejected": -1.1052734851837158, "logps/chosen": -0.692578136920929, "logps/rejected": -1.776953101158142, "loss": 0.7866, "nll_loss": 0.769335925579071, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.06923828274011612, "rewards/margins": 0.10841675102710724, "rewards/rejected": -0.17753906548023224, "step": 11610 }, { "epoch": 0.8824757926713499, "grad_norm": 2.1326528778072986, "learning_rate": 7.42141850779311e-07, "log_odds_chosen": 1.826171875, "log_odds_ratio": -0.334716796875, "logits/chosen": -1.33203125, "logits/rejected": -1.112695336341858, "logps/chosen": -0.681933581829071, "logps/rejected": -1.9597656726837158, "loss": 0.8033, "nll_loss": 0.771679699420929, "rewards/accuracies": 0.84375, "rewards/chosen": -0.06821288913488388, "rewards/margins": 0.12785644829273224, "rewards/rejected": -0.19589844346046448, "step": 11620 }, { "epoch": 0.8832352382760584, "grad_norm": 1.9554949543204003, "learning_rate": 7.418227185990941e-07, "log_odds_chosen": 1.566992163658142, "log_odds_ratio": -0.3750976622104645, "logits/chosen": -1.4626953601837158, "logits/rejected": -1.243554711341858, "logps/chosen": -0.68505859375, "logps/rejected": -1.810546875, "loss": 0.8047, "nll_loss": 0.7767578363418579, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.06850586086511612, "rewards/margins": 0.11258544772863388, "rewards/rejected": -0.18095703423023224, "step": 11630 }, { "epoch": 0.8839946838807671, "grad_norm": 2.328220028731324, "learning_rate": 7.415039977600647e-07, "log_odds_chosen": 1.524438500404358, "log_odds_ratio": -0.42106932401657104, "logits/chosen": -1.2537109851837158, "logits/rejected": -1.023828148841858, "logps/chosen": -0.6673828363418579, "logps/rejected": -1.794921875, "loss": 0.8193, "nll_loss": 0.829296886920929, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.06672362983226776, "rewards/margins": 0.11277465522289276, "rewards/rejected": -0.17946776747703552, "step": 11640 }, { "epoch": 0.8847541294854756, "grad_norm": 2.5694469804089417, "learning_rate": 7.411856873793271e-07, "log_odds_chosen": 1.5935547351837158, "log_odds_ratio": -0.39697265625, "logits/chosen": -1.365234375, "logits/rejected": -1.1544921398162842, "logps/chosen": -0.733593761920929, "logps/rejected": -1.899999976158142, "loss": 0.7973, "nll_loss": 0.810742199420929, "rewards/accuracies": 0.8125, "rewards/chosen": -0.07340087741613388, "rewards/margins": 0.11651611328125, "rewards/rejected": -0.18989257514476776, "step": 11650 }, { "epoch": 0.8855135750901841, "grad_norm": 1.7739601750496423, "learning_rate": 7.408677865766361e-07, "log_odds_chosen": 1.694433569908142, "log_odds_ratio": -0.4245849549770355, "logits/chosen": -1.1560547351837158, "logits/rejected": -1.0242187976837158, "logps/chosen": -0.6998046636581421, "logps/rejected": -1.939843773841858, "loss": 0.8253, "nll_loss": 0.76611328125, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.06993408501148224, "rewards/margins": 0.12401123344898224, "rewards/rejected": -0.19404296576976776, "step": 11660 }, { "epoch": 0.8862730206948928, "grad_norm": 1.6470596014774352, "learning_rate": 7.405502944743868e-07, "log_odds_chosen": 1.544335961341858, "log_odds_ratio": -0.37939453125, "logits/chosen": -1.2800781726837158, "logits/rejected": -1.0880858898162842, "logps/chosen": -0.624316394329071, "logps/rejected": -1.7234375476837158, "loss": 0.8251, "nll_loss": 0.7734375, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.06251220405101776, "rewards/margins": 0.10975341498851776, "rewards/rejected": -0.17216797173023224, "step": 11670 }, { "epoch": 0.8870324662996013, "grad_norm": 1.9013044750274686, "learning_rate": 7.402332101976052e-07, "log_odds_chosen": 1.953222632408142, "log_odds_ratio": -0.33625489473342896, "logits/chosen": -1.2111327648162842, "logits/rejected": -1.0242187976837158, "logps/chosen": -0.669140636920929, "logps/rejected": -2.123828172683716, "loss": 0.8071, "nll_loss": 0.7525390386581421, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.06696777045726776, "rewards/margins": 0.1455078125, "rewards/rejected": -0.21254882216453552, "step": 11680 }, { "epoch": 0.8877919119043098, "grad_norm": 2.0488022617343344, "learning_rate": 7.399165328739372e-07, "log_odds_chosen": 1.5656249523162842, "log_odds_ratio": -0.40180665254592896, "logits/chosen": -1.2683594226837158, "logits/rejected": -1.1328125, "logps/chosen": -0.705859363079071, "logps/rejected": -1.846093773841858, "loss": 0.7931, "nll_loss": 0.739550769329071, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.07061767578125, "rewards/margins": 0.11394043266773224, "rewards/rejected": -0.18459472060203552, "step": 11690 }, { "epoch": 0.8885513575090184, "grad_norm": 1.6543565003623768, "learning_rate": 7.396002616336387e-07, "log_odds_chosen": 1.581640601158142, "log_odds_ratio": -0.39067381620407104, "logits/chosen": -1.3193359375, "logits/rejected": -1.1335937976837158, "logps/chosen": -0.692578136920929, "logps/rejected": -1.8505859375, "loss": 0.801, "nll_loss": 0.7625976800918579, "rewards/accuracies": 0.78125, "rewards/chosen": -0.06927490234375, "rewards/margins": 0.11583862453699112, "rewards/rejected": -0.18500976264476776, "step": 11700 }, { "epoch": 0.889310803113727, "grad_norm": 2.1714898039929516, "learning_rate": 7.392843956095663e-07, "log_odds_chosen": 1.9013671875, "log_odds_ratio": -0.35624998807907104, "logits/chosen": -1.2931640148162842, "logits/rejected": -1.1064453125, "logps/chosen": -0.6416991949081421, "logps/rejected": -2.046093702316284, "loss": 0.7817, "nll_loss": 0.7188476324081421, "rewards/accuracies": 0.8125, "rewards/chosen": -0.06416015326976776, "rewards/margins": 0.14046630263328552, "rewards/rejected": -0.20449218153953552, "step": 11710 }, { "epoch": 0.8900702487184355, "grad_norm": 1.7700712147158542, "learning_rate": 7.389689339371664e-07, "log_odds_chosen": 1.5115234851837158, "log_odds_ratio": -0.3954101502895355, "logits/chosen": -1.2648437023162842, "logits/rejected": -1.0714843273162842, "logps/chosen": -0.679394543170929, "logps/rejected": -1.789648413658142, "loss": 0.8234, "nll_loss": 0.8154296875, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06790771335363388, "rewards/margins": 0.110992431640625, "rewards/rejected": -0.17910155653953552, "step": 11720 }, { "epoch": 0.8908296943231441, "grad_norm": 1.9263959705703237, "learning_rate": 7.386538757544653e-07, "log_odds_chosen": 1.326806664466858, "log_odds_ratio": -0.4700683653354645, "logits/chosen": -1.248437523841858, "logits/rejected": -1.105859398841858, "logps/chosen": -0.675976574420929, "logps/rejected": -1.666406273841858, "loss": 0.8104, "nll_loss": 0.7681640386581421, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.06755371391773224, "rewards/margins": 0.09906921535730362, "rewards/rejected": -0.1666259765625, "step": 11730 }, { "epoch": 0.8915891399278527, "grad_norm": 3.699985863400496, "learning_rate": 7.3833922020206e-07, "log_odds_chosen": 1.851953148841858, "log_odds_ratio": -0.37993162870407104, "logits/chosen": -1.363867163658142, "logits/rejected": -1.0986328125, "logps/chosen": -0.686718761920929, "logps/rejected": -2.037890672683716, "loss": 0.8136, "nll_loss": 0.739062488079071, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.06862793117761612, "rewards/margins": 0.13520507514476776, "rewards/rejected": -0.203857421875, "step": 11740 }, { "epoch": 0.8923485855325612, "grad_norm": 1.9830477633793726, "learning_rate": 7.38024966423108e-07, "log_odds_chosen": 1.770117163658142, "log_odds_ratio": -0.3531738221645355, "logits/chosen": -1.2371094226837158, "logits/rejected": -1.07421875, "logps/chosen": -0.703906238079071, "logps/rejected": -2.017578125, "loss": 0.8015, "nll_loss": 0.8023437261581421, "rewards/accuracies": 0.8125, "rewards/chosen": -0.07037353515625, "rewards/margins": 0.13118895888328552, "rewards/rejected": -0.201416015625, "step": 11750 }, { "epoch": 0.8931080311372698, "grad_norm": 3.0199265863323763, "learning_rate": 7.377111135633174e-07, "log_odds_chosen": 1.7019531726837158, "log_odds_ratio": -0.3568359315395355, "logits/chosen": -1.2615234851837158, "logits/rejected": -1.087499976158142, "logps/chosen": -0.7021484375, "logps/rejected": -1.976171851158142, "loss": 0.806, "nll_loss": 0.7845703363418579, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.07022704929113388, "rewards/margins": 0.12736816704273224, "rewards/rejected": -0.19770507514476776, "step": 11760 }, { "epoch": 0.8938674767419783, "grad_norm": 3.135525458890862, "learning_rate": 7.373976607709372e-07, "log_odds_chosen": 1.6052734851837158, "log_odds_ratio": -0.37309569120407104, "logits/chosen": -1.2488281726837158, "logits/rejected": -1.105859398841858, "logps/chosen": -0.672558605670929, "logps/rejected": -1.824609398841858, "loss": 0.8111, "nll_loss": 0.8077148199081421, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -0.0672607421875, "rewards/margins": 0.11512450873851776, "rewards/rejected": -0.1822509765625, "step": 11770 }, { "epoch": 0.894626922346687, "grad_norm": 1.773554126006498, "learning_rate": 7.370846071967476e-07, "log_odds_chosen": 1.647070288658142, "log_odds_ratio": -0.3744873106479645, "logits/chosen": -1.3708984851837158, "logits/rejected": -1.111328125, "logps/chosen": -0.6998046636581421, "logps/rejected": -1.9052734375, "loss": 0.7972, "nll_loss": 0.723339855670929, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.06995849311351776, "rewards/margins": 0.12054443359375, "rewards/rejected": -0.19057616591453552, "step": 11780 }, { "epoch": 0.8953863679513955, "grad_norm": 2.383571841417671, "learning_rate": 7.367719519940501e-07, "log_odds_chosen": 1.4619140625, "log_odds_ratio": -0.43745118379592896, "logits/chosen": -1.307226538658142, "logits/rejected": -1.0546875, "logps/chosen": -0.7035156488418579, "logps/rejected": -1.744140625, "loss": 0.782, "nll_loss": 0.7032226324081421, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.07042236626148224, "rewards/margins": 0.10419311374425888, "rewards/rejected": -0.17458495497703552, "step": 11790 }, { "epoch": 0.896145813556104, "grad_norm": 1.746209162306085, "learning_rate": 7.364596943186587e-07, "log_odds_chosen": 1.673437476158142, "log_odds_ratio": -0.384033203125, "logits/chosen": -1.230859398841858, "logits/rejected": -1.1355469226837158, "logps/chosen": -0.751953125, "logps/rejected": -1.9773437976837158, "loss": 0.8279, "nll_loss": 0.8633788824081421, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.07521972805261612, "rewards/margins": 0.1226806640625, "rewards/rejected": -0.19790038466453552, "step": 11800 }, { "epoch": 0.8969052591608127, "grad_norm": 2.012769798224492, "learning_rate": 7.36147833328889e-07, "log_odds_chosen": 1.743554711341858, "log_odds_ratio": -0.36669921875, "logits/chosen": -1.320898413658142, "logits/rejected": -1.15234375, "logps/chosen": -0.663281261920929, "logps/rejected": -1.9578125476837158, "loss": 0.799, "nll_loss": 0.8177734613418579, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -0.06632079929113388, "rewards/margins": 0.12941893935203552, "rewards/rejected": -0.19570311903953552, "step": 11810 }, { "epoch": 0.8976647047655212, "grad_norm": 2.675498130671647, "learning_rate": 7.358363681855503e-07, "log_odds_chosen": 1.677148461341858, "log_odds_ratio": -0.40620118379592896, "logits/chosen": -1.333593726158142, "logits/rejected": -1.098046898841858, "logps/chosen": -0.764453113079071, "logps/rejected": -2.055859327316284, "loss": 0.8163, "nll_loss": 0.8011718988418579, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.076416015625, "rewards/margins": 0.12924805283546448, "rewards/rejected": -0.20554199814796448, "step": 11820 }, { "epoch": 0.8984241503702297, "grad_norm": 2.2473226101392925, "learning_rate": 7.355252980519345e-07, "log_odds_chosen": 1.7565429210662842, "log_odds_ratio": -0.4131835997104645, "logits/chosen": -1.215234398841858, "logits/rejected": -1.0330078601837158, "logps/chosen": -0.7645508050918579, "logps/rejected": -2.096874952316284, "loss": 0.8181, "nll_loss": 0.8705078363418579, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.07647705078125, "rewards/margins": 0.13332518935203552, "rewards/rejected": -0.20974120497703552, "step": 11830 }, { "epoch": 0.8991835959749382, "grad_norm": 2.1782558729945802, "learning_rate": 7.352146220938078e-07, "log_odds_chosen": 1.7136719226837158, "log_odds_ratio": -0.41179198026657104, "logits/chosen": -1.244531273841858, "logits/rejected": -1.109765648841858, "logps/chosen": -0.650585949420929, "logps/rejected": -1.8478515148162842, "loss": 0.7907, "nll_loss": 0.6942383050918579, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.06507568061351776, "rewards/margins": 0.11989746242761612, "rewards/rejected": -0.18476562201976776, "step": 11840 }, { "epoch": 0.8999430415796469, "grad_norm": 1.9152108793418725, "learning_rate": 7.349043394794005e-07, "log_odds_chosen": 1.6194336414337158, "log_odds_ratio": -0.406494140625, "logits/chosen": -1.3117187023162842, "logits/rejected": -1.1355469226837158, "logps/chosen": -0.702343761920929, "logps/rejected": -1.8644530773162842, "loss": 0.7938, "nll_loss": 0.7254883050918579, "rewards/accuracies": 0.75, "rewards/chosen": -0.07020263373851776, "rewards/margins": 0.11627807468175888, "rewards/rejected": -0.1864013671875, "step": 11850 }, { "epoch": 0.9007024871843554, "grad_norm": 2.0475354961717414, "learning_rate": 7.345944493793987e-07, "log_odds_chosen": 1.534814476966858, "log_odds_ratio": -0.4544921815395355, "logits/chosen": -1.241601586341858, "logits/rejected": -1.134374976158142, "logps/chosen": -0.632617175579071, "logps/rejected": -1.7646484375, "loss": 0.7735, "nll_loss": 0.731738269329071, "rewards/accuracies": 0.75, "rewards/chosen": -0.06326904147863388, "rewards/margins": 0.11318359524011612, "rewards/rejected": -0.17648926377296448, "step": 11860 }, { "epoch": 0.901461932789064, "grad_norm": 2.2257316714217765, "learning_rate": 7.342849509669337e-07, "log_odds_chosen": 1.545556664466858, "log_odds_ratio": -0.46269530057907104, "logits/chosen": -1.2527344226837158, "logits/rejected": -1.041015625, "logps/chosen": -0.7025390863418579, "logps/rejected": -1.8234374523162842, "loss": 0.798, "nll_loss": 0.700976550579071, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.0701904296875, "rewards/margins": 0.11208343505859375, "rewards/rejected": -0.18232421576976776, "step": 11870 }, { "epoch": 0.9022213783937726, "grad_norm": 1.8672918645809016, "learning_rate": 7.339758434175737e-07, "log_odds_chosen": 1.509619116783142, "log_odds_ratio": -0.4093261659145355, "logits/chosen": -1.3125, "logits/rejected": -1.0947265625, "logps/chosen": -0.680859386920929, "logps/rejected": -1.73828125, "loss": 0.796, "nll_loss": 0.7469726800918579, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.06809081882238388, "rewards/margins": 0.10559692233800888, "rewards/rejected": -0.17377929389476776, "step": 11880 }, { "epoch": 0.9029808239984811, "grad_norm": 2.265144038491008, "learning_rate": 7.336671259093143e-07, "log_odds_chosen": 1.922460913658142, "log_odds_ratio": -0.3554931581020355, "logits/chosen": -1.300390601158142, "logits/rejected": -1.1203124523162842, "logps/chosen": -0.621777355670929, "logps/rejected": -2.028515577316284, "loss": 0.8117, "nll_loss": 0.778515636920929, "rewards/accuracies": 0.8125, "rewards/chosen": -0.06217040866613388, "rewards/margins": 0.14058837294578552, "rewards/rejected": -0.20268554985523224, "step": 11890 }, { "epoch": 0.9037402696031896, "grad_norm": 2.262601464153992, "learning_rate": 7.33358797622569e-07, "log_odds_chosen": 1.879492163658142, "log_odds_ratio": -0.3714843690395355, "logits/chosen": -1.164453148841858, "logits/rejected": -1.0701172351837158, "logps/chosen": -0.6834961175918579, "logps/rejected": -2.0550780296325684, "loss": 0.7771, "nll_loss": 0.7318359613418579, "rewards/accuracies": 0.78125, "rewards/chosen": -0.068359375, "rewards/margins": 0.13730469346046448, "rewards/rejected": -0.20556640625, "step": 11900 }, { "epoch": 0.9044997152078982, "grad_norm": 2.311064606037882, "learning_rate": 7.330508577401606e-07, "log_odds_chosen": 1.6693847179412842, "log_odds_ratio": -0.37651365995407104, "logits/chosen": -1.4013671875, "logits/rejected": -1.107812523841858, "logps/chosen": -0.686718761920929, "logps/rejected": -1.8974609375, "loss": 0.7825, "nll_loss": 0.747265636920929, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.06873778998851776, "rewards/margins": 0.12087402492761612, "rewards/rejected": -0.18967285752296448, "step": 11910 }, { "epoch": 0.9052591608126068, "grad_norm": 2.030899460020699, "learning_rate": 7.327433054473117e-07, "log_odds_chosen": 1.34033203125, "log_odds_ratio": -0.4202636778354645, "logits/chosen": -1.281835913658142, "logits/rejected": -1.0888671875, "logps/chosen": -0.7017577886581421, "logps/rejected": -1.674218773841858, "loss": 0.8206, "nll_loss": 0.809374988079071, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.0701904296875, "rewards/margins": 0.09718780219554901, "rewards/rejected": -0.16738280653953552, "step": 11920 }, { "epoch": 0.9060186064173154, "grad_norm": 2.054374613574247, "learning_rate": 7.324361399316357e-07, "log_odds_chosen": 1.671484351158142, "log_odds_ratio": -0.3556152284145355, "logits/chosen": -1.276953101158142, "logits/rejected": -1.061914086341858, "logps/chosen": -0.67138671875, "logps/rejected": -1.8312499523162842, "loss": 0.8186, "nll_loss": 0.7861328125, "rewards/accuracies": 0.856249988079071, "rewards/chosen": -0.067138671875, "rewards/margins": 0.11611328274011612, "rewards/rejected": -0.183349609375, "step": 11930 }, { "epoch": 0.9067780520220239, "grad_norm": 2.301784715254483, "learning_rate": 7.321293603831281e-07, "log_odds_chosen": 1.5427734851837158, "log_odds_ratio": -0.398681640625, "logits/chosen": -1.335546851158142, "logits/rejected": -1.1242187023162842, "logps/chosen": -0.659960925579071, "logps/rejected": -1.715429663658142, "loss": 0.7877, "nll_loss": 0.70947265625, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06602783501148224, "rewards/margins": 0.10555420070886612, "rewards/rejected": -0.1715087890625, "step": 11940 }, { "epoch": 0.9075374976267325, "grad_norm": 1.920440182850682, "learning_rate": 7.318229659941572e-07, "log_odds_chosen": 1.923730492591858, "log_odds_ratio": -0.3512206971645355, "logits/chosen": -1.314453125, "logits/rejected": -1.1423828601837158, "logps/chosen": -0.65771484375, "logps/rejected": -2.0941405296325684, "loss": 0.7872, "nll_loss": 0.7291015386581421, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -0.06574706733226776, "rewards/margins": 0.14384765923023224, "rewards/rejected": -0.20961913466453552, "step": 11950 }, { "epoch": 0.9082969432314411, "grad_norm": 2.613957472721097, "learning_rate": 7.315169559594551e-07, "log_odds_chosen": 1.773828148841858, "log_odds_ratio": -0.38969725370407104, "logits/chosen": -1.3292968273162842, "logits/rejected": -1.16796875, "logps/chosen": -0.7298828363418579, "logps/rejected": -2.0445313453674316, "loss": 0.825, "nll_loss": 0.831835925579071, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.07298584282398224, "rewards/margins": 0.13133545219898224, "rewards/rejected": -0.2044677734375, "step": 11960 }, { "epoch": 0.9090563888361496, "grad_norm": 2.717051144415763, "learning_rate": 7.31211329476109e-07, "log_odds_chosen": 1.5238769054412842, "log_odds_ratio": -0.37592774629592896, "logits/chosen": -1.2667968273162842, "logits/rejected": -1.145898461341858, "logps/chosen": -0.633007824420929, "logps/rejected": -1.656640648841858, "loss": 0.7988, "nll_loss": 0.7353515625, "rewards/accuracies": 0.856249988079071, "rewards/chosen": -0.06333007663488388, "rewards/margins": 0.10235901176929474, "rewards/rejected": -0.16582031548023224, "step": 11970 }, { "epoch": 0.9098158344408582, "grad_norm": 2.970009416161235, "learning_rate": 7.309060857435526e-07, "log_odds_chosen": 1.943457007408142, "log_odds_ratio": -0.3319091796875, "logits/chosen": -1.3068358898162842, "logits/rejected": -1.1359374523162842, "logps/chosen": -0.665722668170929, "logps/rejected": -2.111523389816284, "loss": 0.8015, "nll_loss": 0.752148449420929, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -0.06651611626148224, "rewards/margins": 0.144744873046875, "rewards/rejected": -0.211181640625, "step": 11980 }, { "epoch": 0.9105752800455668, "grad_norm": 2.264522728037632, "learning_rate": 7.30601223963557e-07, "log_odds_chosen": 1.62353515625, "log_odds_ratio": -0.3958984315395355, "logits/chosen": -1.283593773841858, "logits/rejected": -1.109375, "logps/chosen": -0.7138671875, "logps/rejected": -1.92578125, "loss": 0.8102, "nll_loss": 0.7642577886581421, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.07147216796875, "rewards/margins": 0.121246337890625, "rewards/rejected": -0.19267578423023224, "step": 11990 }, { "epoch": 0.9113347256502753, "grad_norm": 2.567843138205162, "learning_rate": 7.302967433402214e-07, "log_odds_chosen": 1.706640601158142, "log_odds_ratio": -0.32451170682907104, "logits/chosen": -1.2468750476837158, "logits/rejected": -1.115625023841858, "logps/chosen": -0.6787109375, "logps/rejected": -1.9152343273162842, "loss": 0.8006, "nll_loss": 0.7373046875, "rewards/accuracies": 0.8687499761581421, "rewards/chosen": -0.06787109375, "rewards/margins": 0.12340088188648224, "rewards/rejected": -0.19140625, "step": 12000 }, { "epoch": 0.9120941712549838, "grad_norm": 2.1593394706767906, "learning_rate": 7.299926430799657e-07, "log_odds_chosen": 1.7234375476837158, "log_odds_ratio": -0.3976806700229645, "logits/chosen": -1.3259766101837158, "logits/rejected": -1.150390625, "logps/chosen": -0.700878918170929, "logps/rejected": -2.0, "loss": 0.7961, "nll_loss": 0.6917968988418579, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.07005615532398224, "rewards/margins": 0.13017578423023224, "rewards/rejected": -0.20046386122703552, "step": 12010 }, { "epoch": 0.9128536168596925, "grad_norm": 1.7079206169699177, "learning_rate": 7.296889223915205e-07, "log_odds_chosen": 1.5422852039337158, "log_odds_ratio": -0.3807373046875, "logits/chosen": -1.306054711341858, "logits/rejected": -1.1378905773162842, "logps/chosen": -0.7109375, "logps/rejected": -1.841796875, "loss": 0.8002, "nll_loss": 0.8345702886581421, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.07099609076976776, "rewards/margins": 0.11323241889476776, "rewards/rejected": -0.18430176377296448, "step": 12020 }, { "epoch": 0.913613062464401, "grad_norm": 1.8947844305105526, "learning_rate": 7.293855804859192e-07, "log_odds_chosen": 1.9304687976837158, "log_odds_ratio": -0.35173338651657104, "logits/chosen": -1.3083984851837158, "logits/rejected": -1.097070336341858, "logps/chosen": -0.6722656488418579, "logps/rejected": -2.1011719703674316, "loss": 0.7912, "nll_loss": 0.7916015386581421, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.06721191108226776, "rewards/margins": 0.14300537109375, "rewards/rejected": -0.21015624701976776, "step": 12030 }, { "epoch": 0.9143725080691095, "grad_norm": 2.7520723985837248, "learning_rate": 7.290826165764892e-07, "log_odds_chosen": 1.6843750476837158, "log_odds_ratio": -0.40849608182907104, "logits/chosen": -1.2048828601837158, "logits/rejected": -1.0603516101837158, "logps/chosen": -0.7220703363418579, "logps/rejected": -1.9609375, "loss": 0.794, "nll_loss": 0.7275390625, "rewards/accuracies": 0.78125, "rewards/chosen": -0.07227782905101776, "rewards/margins": 0.12391357123851776, "rewards/rejected": -0.1961669921875, "step": 12040 }, { "epoch": 0.9151319536738182, "grad_norm": 2.71502480501057, "learning_rate": 7.28780029878843e-07, "log_odds_chosen": 1.65966796875, "log_odds_ratio": -0.3983154296875, "logits/chosen": -1.269921898841858, "logits/rejected": -1.098046898841858, "logps/chosen": -0.701367199420929, "logps/rejected": -1.9070312976837158, "loss": 0.8217, "nll_loss": 0.783203125, "rewards/accuracies": 0.78125, "rewards/chosen": -0.07016601413488388, "rewards/margins": 0.120635986328125, "rewards/rejected": -0.19077149033546448, "step": 12050 }, { "epoch": 0.9158913992785267, "grad_norm": 1.96884432942911, "learning_rate": 7.284778196108706e-07, "log_odds_chosen": 1.51904296875, "log_odds_ratio": -0.3636718690395355, "logits/chosen": -1.198828101158142, "logits/rejected": -1.0314452648162842, "logps/chosen": -0.7015625238418579, "logps/rejected": -1.810156226158142, "loss": 0.8433, "nll_loss": 0.8028320074081421, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -0.07023926079273224, "rewards/margins": 0.11062011867761612, "rewards/rejected": -0.1807861328125, "step": 12060 }, { "epoch": 0.9166508448832352, "grad_norm": 1.9992799252917912, "learning_rate": 7.281759849927299e-07, "log_odds_chosen": 1.514746069908142, "log_odds_ratio": -0.38618165254592896, "logits/chosen": -1.406835913658142, "logits/rejected": -1.1359374523162842, "logps/chosen": -0.6709960699081421, "logps/rejected": -1.745507836341858, "loss": 0.7899, "nll_loss": 0.7208007574081421, "rewards/accuracies": 0.84375, "rewards/chosen": -0.06712646782398224, "rewards/margins": 0.10732116550207138, "rewards/rejected": -0.17448730766773224, "step": 12070 }, { "epoch": 0.9174102904879438, "grad_norm": 2.225221299369134, "learning_rate": 7.278745252468389e-07, "log_odds_chosen": 1.822265625, "log_odds_ratio": -0.36865234375, "logits/chosen": -1.357031226158142, "logits/rejected": -1.1511719226837158, "logps/chosen": -0.6727539300918579, "logps/rejected": -2.0015625953674316, "loss": 0.7941, "nll_loss": 0.78125, "rewards/accuracies": 0.8125, "rewards/chosen": -0.06723632663488388, "rewards/margins": 0.13283690810203552, "rewards/rejected": -0.20024414360523224, "step": 12080 }, { "epoch": 0.9181697360926524, "grad_norm": 1.852443463989819, "learning_rate": 7.275734395978672e-07, "log_odds_chosen": 1.6164062023162842, "log_odds_ratio": -0.42070311307907104, "logits/chosen": -1.1960937976837158, "logits/rejected": -1.102148413658142, "logps/chosen": -0.7083984613418579, "logps/rejected": -1.86328125, "loss": 0.819, "nll_loss": 0.7538086175918579, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.07088623195886612, "rewards/margins": 0.11577148735523224, "rewards/rejected": -0.1865234375, "step": 12090 }, { "epoch": 0.9189291816973609, "grad_norm": 2.138987670701264, "learning_rate": 7.272727272727272e-07, "log_odds_chosen": 1.6100585460662842, "log_odds_ratio": -0.3855957090854645, "logits/chosen": -1.357421875, "logits/rejected": -1.1476562023162842, "logps/chosen": -0.704882800579071, "logps/rejected": -1.8914062976837158, "loss": 0.8038, "nll_loss": 0.7802734375, "rewards/accuracies": 0.8125, "rewards/chosen": -0.07054443657398224, "rewards/margins": 0.1187744140625, "rewards/rejected": -0.189208984375, "step": 12100 }, { "epoch": 0.9196886273020695, "grad_norm": 1.9574050073729268, "learning_rate": 7.269723875005668e-07, "log_odds_chosen": 1.588476538658142, "log_odds_ratio": -0.3549560606479645, "logits/chosen": -1.2999999523162842, "logits/rejected": -1.125585913658142, "logps/chosen": -0.6727539300918579, "logps/rejected": -1.7761719226837158, "loss": 0.7955, "nll_loss": 0.6722656488418579, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.06722412258386612, "rewards/margins": 0.11033935844898224, "rewards/rejected": -0.1776123046875, "step": 12110 }, { "epoch": 0.9204480729067781, "grad_norm": 2.2465457937351516, "learning_rate": 7.266724195127595e-07, "log_odds_chosen": 1.6389648914337158, "log_odds_ratio": -0.3807128965854645, "logits/chosen": -1.1804687976837158, "logits/rejected": -1.0281250476837158, "logps/chosen": -0.681347668170929, "logps/rejected": -1.819726586341858, "loss": 0.8179, "nll_loss": 0.7953125238418579, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.06810303032398224, "rewards/margins": 0.11383666843175888, "rewards/rejected": -0.181884765625, "step": 12120 }, { "epoch": 0.9212075185114866, "grad_norm": 2.7089001068700114, "learning_rate": 7.26372822542898e-07, "log_odds_chosen": 1.8759765625, "log_odds_ratio": -0.3529296815395355, "logits/chosen": -1.2537109851837158, "logits/rejected": -1.117773413658142, "logps/chosen": -0.7476562261581421, "logps/rejected": -2.194531202316284, "loss": 0.8222, "nll_loss": 0.7671874761581421, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.07469482719898224, "rewards/margins": 0.14455565810203552, "rewards/rejected": -0.21943359076976776, "step": 12130 }, { "epoch": 0.9219669641161952, "grad_norm": 1.9005353757299221, "learning_rate": 7.260735958267845e-07, "log_odds_chosen": 1.816992163658142, "log_odds_ratio": -0.3525146543979645, "logits/chosen": -1.2802734375, "logits/rejected": -1.106835961341858, "logps/chosen": -0.6644531488418579, "logps/rejected": -2.01171875, "loss": 0.7788, "nll_loss": 0.7333984375, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -0.06640625, "rewards/margins": 0.13483886420726776, "rewards/rejected": -0.20126953721046448, "step": 12140 }, { "epoch": 0.9227264097209037, "grad_norm": 2.1631353261976627, "learning_rate": 7.257747386024231e-07, "log_odds_chosen": 1.9207031726837158, "log_odds_ratio": -0.34785157442092896, "logits/chosen": -1.3660156726837158, "logits/rejected": -1.1375000476837158, "logps/chosen": -0.6958984136581421, "logps/rejected": -2.094921827316284, "loss": 0.7929, "nll_loss": 0.7230468988418579, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.06962890923023224, "rewards/margins": 0.13999024033546448, "rewards/rejected": -0.20966796576976776, "step": 12150 }, { "epoch": 0.9234858553256123, "grad_norm": 2.4415501109018343, "learning_rate": 7.254762501100117e-07, "log_odds_chosen": 1.7087891101837158, "log_odds_ratio": -0.36906737089157104, "logits/chosen": -1.330468773841858, "logits/rejected": -1.099609375, "logps/chosen": -0.71044921875, "logps/rejected": -1.951171875, "loss": 0.807, "nll_loss": 0.800000011920929, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.07098388671875, "rewards/margins": 0.12403564155101776, "rewards/rejected": -0.195068359375, "step": 12160 }, { "epoch": 0.9242453009303209, "grad_norm": 2.3780476739412597, "learning_rate": 7.251781295919335e-07, "log_odds_chosen": 1.5264160633087158, "log_odds_ratio": -0.4143310487270355, "logits/chosen": -1.33203125, "logits/rejected": -1.1593749523162842, "logps/chosen": -0.7015625238418579, "logps/rejected": -1.828710913658142, "loss": 0.8111, "nll_loss": 0.754589855670929, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.07017822563648224, "rewards/margins": 0.11274109035730362, "rewards/rejected": -0.18300780653953552, "step": 12170 }, { "epoch": 0.9250047465350294, "grad_norm": 3.2619803719070846, "learning_rate": 7.248803762927498e-07, "log_odds_chosen": 1.6291015148162842, "log_odds_ratio": -0.40351563692092896, "logits/chosen": -1.3898437023162842, "logits/rejected": -1.26171875, "logps/chosen": -0.681347668170929, "logps/rejected": -1.8718750476837158, "loss": 0.79, "nll_loss": 0.734375, "rewards/accuracies": 0.8125, "rewards/chosen": -0.06816406548023224, "rewards/margins": 0.11903075873851776, "rewards/rejected": -0.18710938096046448, "step": 12180 }, { "epoch": 0.925764192139738, "grad_norm": 2.329065500071617, "learning_rate": 7.245829894591907e-07, "log_odds_chosen": 1.650292992591858, "log_odds_ratio": -0.3816894590854645, "logits/chosen": -1.352929711341858, "logits/rejected": -1.154882788658142, "logps/chosen": -0.6748046875, "logps/rejected": -1.8830077648162842, "loss": 0.7913, "nll_loss": 0.748828113079071, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06744384765625, "rewards/margins": 0.12067870795726776, "rewards/rejected": -0.188232421875, "step": 12190 }, { "epoch": 0.9265236377444466, "grad_norm": 2.8180016871545703, "learning_rate": 7.242859683401482e-07, "log_odds_chosen": 1.3599121570587158, "log_odds_ratio": -0.4317871034145355, "logits/chosen": -1.2236328125, "logits/rejected": -1.069921851158142, "logps/chosen": -0.7119140625, "logps/rejected": -1.6619141101837158, "loss": 0.7851, "nll_loss": 0.7744140625, "rewards/accuracies": 0.78125, "rewards/chosen": -0.07122802734375, "rewards/margins": 0.09494476020336151, "rewards/rejected": -0.1661376953125, "step": 12200 }, { "epoch": 0.9272830833491551, "grad_norm": 2.4115197771463, "learning_rate": 7.239893121866677e-07, "log_odds_chosen": 1.6669921875, "log_odds_ratio": -0.4050048887729645, "logits/chosen": -1.3078124523162842, "logits/rejected": -1.115234375, "logps/chosen": -0.697460949420929, "logps/rejected": -1.900781273841858, "loss": 0.8158, "nll_loss": 0.8037109375, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.06979980319738388, "rewards/margins": 0.12027587741613388, "rewards/rejected": -0.18984374403953552, "step": 12210 }, { "epoch": 0.9280425289538636, "grad_norm": 1.6322034500747074, "learning_rate": 7.236930202519399e-07, "log_odds_chosen": 1.4137694835662842, "log_odds_ratio": -0.4071289002895355, "logits/chosen": -1.3298828601837158, "logits/rejected": -1.1759765148162842, "logps/chosen": -0.7183593511581421, "logps/rejected": -1.708593726158142, "loss": 0.8091, "nll_loss": 0.769335925579071, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.07182617485523224, "rewards/margins": 0.09918518364429474, "rewards/rejected": -0.1708984375, "step": 12220 }, { "epoch": 0.9288019745585723, "grad_norm": 2.04050223120514, "learning_rate": 7.233970917912936e-07, "log_odds_chosen": 1.7644531726837158, "log_odds_ratio": -0.3428710997104645, "logits/chosen": -1.259374976158142, "logits/rejected": -1.1046874523162842, "logps/chosen": -0.667773425579071, "logps/rejected": -1.942968726158142, "loss": 0.7972, "nll_loss": 0.772167980670929, "rewards/accuracies": 0.862500011920929, "rewards/chosen": -0.06672362983226776, "rewards/margins": 0.12753906846046448, "rewards/rejected": -0.19428710639476776, "step": 12230 }, { "epoch": 0.9295614201632808, "grad_norm": 2.1669863922797545, "learning_rate": 7.231015260621871e-07, "log_odds_chosen": 1.5539062023162842, "log_odds_ratio": -0.39970701932907104, "logits/chosen": -1.272851586341858, "logits/rejected": -1.0251953601837158, "logps/chosen": -0.68212890625, "logps/rejected": -1.819921851158142, "loss": 0.8024, "nll_loss": 0.73583984375, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.06829833984375, "rewards/margins": 0.1136474609375, "rewards/rejected": -0.1820068359375, "step": 12240 }, { "epoch": 0.9303208657679893, "grad_norm": 2.2886589705647893, "learning_rate": 7.228063223242011e-07, "log_odds_chosen": 1.759863257408142, "log_odds_ratio": -0.3848632872104645, "logits/chosen": -1.2912108898162842, "logits/rejected": -1.170312523841858, "logps/chosen": -0.661914050579071, "logps/rejected": -1.943359375, "loss": 0.7742, "nll_loss": 0.693652331829071, "rewards/accuracies": 0.78125, "rewards/chosen": -0.06614990532398224, "rewards/margins": 0.12819214165210724, "rewards/rejected": -0.19416503608226776, "step": 12250 }, { "epoch": 0.931080311372698, "grad_norm": 2.3327373958423863, "learning_rate": 7.225114798390295e-07, "log_odds_chosen": 1.7609374523162842, "log_odds_ratio": -0.34794920682907104, "logits/chosen": -1.3884766101837158, "logits/rejected": -1.1984374523162842, "logps/chosen": -0.6522461175918579, "logps/rejected": -1.932031273841858, "loss": 0.814, "nll_loss": 0.70556640625, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.06522216647863388, "rewards/margins": 0.12790527939796448, "rewards/rejected": -0.19313964247703552, "step": 12260 }, { "epoch": 0.9318397569774065, "grad_norm": 2.17139757349356, "learning_rate": 7.222169978704737e-07, "log_odds_chosen": 1.435156226158142, "log_odds_ratio": -0.43022459745407104, "logits/chosen": -1.308984398841858, "logits/rejected": -1.1521484851837158, "logps/chosen": -0.672656238079071, "logps/rejected": -1.697265625, "loss": 0.7927, "nll_loss": 0.77294921875, "rewards/accuracies": 0.75, "rewards/chosen": -0.06733398139476776, "rewards/margins": 0.1023101806640625, "rewards/rejected": -0.16962890326976776, "step": 12270 }, { "epoch": 0.932599202582115, "grad_norm": 2.154223162315315, "learning_rate": 7.219228756844335e-07, "log_odds_chosen": 1.6384766101837158, "log_odds_ratio": -0.3525390625, "logits/chosen": -1.2941405773162842, "logits/rejected": -1.162695288658142, "logps/chosen": -0.644726574420929, "logps/rejected": -1.811132788658142, "loss": 0.8062, "nll_loss": 0.7625976800918579, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -0.06450195610523224, "rewards/margins": 0.11670532077550888, "rewards/rejected": -0.18115234375, "step": 12280 }, { "epoch": 0.9333586481868236, "grad_norm": 2.238004917788174, "learning_rate": 7.216291125488994e-07, "log_odds_chosen": 1.57666015625, "log_odds_ratio": -0.4132324159145355, "logits/chosen": -1.338476538658142, "logits/rejected": -1.1376953125, "logps/chosen": -0.7496093511581421, "logps/rejected": -1.9328124523162842, "loss": 0.7899, "nll_loss": 0.7611328363418579, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.07503662258386612, "rewards/margins": 0.11835937201976776, "rewards/rejected": -0.19333496689796448, "step": 12290 }, { "epoch": 0.9341180937915322, "grad_norm": 1.689975241430637, "learning_rate": 7.213357077339458e-07, "log_odds_chosen": 2.002734422683716, "log_odds_ratio": -0.31633299589157104, "logits/chosen": -1.2404296398162842, "logits/rejected": -1.0558593273162842, "logps/chosen": -0.655566394329071, "logps/rejected": -2.09375, "loss": 0.7767, "nll_loss": 0.6932617425918579, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.06553955376148224, "rewards/margins": 0.14401856064796448, "rewards/rejected": -0.20957031846046448, "step": 12300 }, { "epoch": 0.9348775393962407, "grad_norm": 1.9763446202978832, "learning_rate": 7.210426605117224e-07, "log_odds_chosen": 1.6223633289337158, "log_odds_ratio": -0.37885743379592896, "logits/chosen": -1.3583984375, "logits/rejected": -1.178125023841858, "logps/chosen": -0.6591796875, "logps/rejected": -1.7976562976837158, "loss": 0.8043, "nll_loss": 0.690625011920929, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.06589355319738388, "rewards/margins": 0.11367950588464737, "rewards/rejected": -0.1796875, "step": 12310 }, { "epoch": 0.9356369850009493, "grad_norm": 1.8553784790177714, "learning_rate": 7.207499701564471e-07, "log_odds_chosen": 1.8896484375, "log_odds_ratio": -0.3421630859375, "logits/chosen": -1.3048827648162842, "logits/rejected": -1.122656226158142, "logps/chosen": -0.7098633050918579, "logps/rejected": -2.0863280296325684, "loss": 0.7816, "nll_loss": 0.6957031488418579, "rewards/accuracies": 0.856249988079071, "rewards/chosen": -0.07099609076976776, "rewards/margins": 0.1375732421875, "rewards/rejected": -0.208740234375, "step": 12320 }, { "epoch": 0.9363964306056579, "grad_norm": 11.77630911027518, "learning_rate": 7.204576359443989e-07, "log_odds_chosen": 1.907812476158142, "log_odds_ratio": -0.3348144590854645, "logits/chosen": -1.261328101158142, "logits/rejected": -1.0480468273162842, "logps/chosen": -0.6861327886581421, "logps/rejected": -2.083984375, "loss": 0.8306, "nll_loss": 0.827343761920929, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -0.06862793117761612, "rewards/margins": 0.13968506455421448, "rewards/rejected": -0.20830078423023224, "step": 12330 }, { "epoch": 0.9371558762103664, "grad_norm": 1.9798240643505503, "learning_rate": 7.201656571539094e-07, "log_odds_chosen": 1.8720703125, "log_odds_ratio": -0.33837890625, "logits/chosen": -1.3576171398162842, "logits/rejected": -1.1240234375, "logps/chosen": -0.6888672113418579, "logps/rejected": -2.0357422828674316, "loss": 0.8151, "nll_loss": 0.787304699420929, "rewards/accuracies": 0.862500011920929, "rewards/chosen": -0.06882324069738388, "rewards/margins": 0.13472290337085724, "rewards/rejected": -0.2034912109375, "step": 12340 }, { "epoch": 0.937915321815075, "grad_norm": 1.844166037502162, "learning_rate": 7.19874033065356e-07, "log_odds_chosen": 1.412109375, "log_odds_ratio": -0.4686035215854645, "logits/chosen": -1.2263672351837158, "logits/rejected": -1.0634765625, "logps/chosen": -0.7212890386581421, "logps/rejected": -1.7644531726837158, "loss": 0.792, "nll_loss": 0.8623046875, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.0721435546875, "rewards/margins": 0.1041259765625, "rewards/rejected": -0.17626953125, "step": 12350 }, { "epoch": 0.9386747674197835, "grad_norm": 1.8341316008210111, "learning_rate": 7.195827629611545e-07, "log_odds_chosen": 1.8445312976837158, "log_odds_ratio": -0.3741210997104645, "logits/chosen": -1.3009765148162842, "logits/rejected": -1.1298828125, "logps/chosen": -0.657519519329071, "logps/rejected": -2.0396485328674316, "loss": 0.8071, "nll_loss": 0.8041015863418579, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.06568603217601776, "rewards/margins": 0.13828125596046448, "rewards/rejected": -0.20395508408546448, "step": 12360 }, { "epoch": 0.9394342130244921, "grad_norm": 1.874251174993908, "learning_rate": 7.19291846125751e-07, "log_odds_chosen": 1.8349609375, "log_odds_ratio": -0.3509765565395355, "logits/chosen": -1.339453101158142, "logits/rejected": -1.1494140625, "logps/chosen": -0.682421863079071, "logps/rejected": -1.9949219226837158, "loss": 0.7864, "nll_loss": 0.7666015625, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.06818847358226776, "rewards/margins": 0.13150635361671448, "rewards/rejected": -0.19970703125, "step": 12370 }, { "epoch": 0.9401936586292007, "grad_norm": 2.0402263926405944, "learning_rate": 7.190012818456154e-07, "log_odds_chosen": 1.633886694908142, "log_odds_ratio": -0.41455078125, "logits/chosen": -1.137304663658142, "logits/rejected": -1.0654296875, "logps/chosen": -0.69189453125, "logps/rejected": -1.885156273841858, "loss": 0.7893, "nll_loss": 0.7831054925918579, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.06923828274011612, "rewards/margins": 0.11934814602136612, "rewards/rejected": -0.18852539360523224, "step": 12380 }, { "epoch": 0.9409531042339092, "grad_norm": 2.2993465116698917, "learning_rate": 7.187110694092334e-07, "log_odds_chosen": 1.607031226158142, "log_odds_ratio": -0.40019530057907104, "logits/chosen": -1.132421851158142, "logits/rejected": -1.054101586341858, "logps/chosen": -0.658203125, "logps/rejected": -1.794531226158142, "loss": 0.7936, "nll_loss": 0.7227538824081421, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.06582031399011612, "rewards/margins": 0.11345215141773224, "rewards/rejected": -0.1793212890625, "step": 12390 }, { "epoch": 0.9417125498386179, "grad_norm": 1.73944935234149, "learning_rate": 7.184212081070996e-07, "log_odds_chosen": 1.9119141101837158, "log_odds_ratio": -0.3604980409145355, "logits/chosen": -1.3654296398162842, "logits/rejected": -1.102148413658142, "logps/chosen": -0.7197265625, "logps/rejected": -2.1480469703674316, "loss": 0.8155, "nll_loss": 0.7984374761581421, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.07196044921875, "rewards/margins": 0.14262695610523224, "rewards/rejected": -0.21474608778953552, "step": 12400 }, { "epoch": 0.9424719954433264, "grad_norm": 2.242990145345622, "learning_rate": 7.181316972317097e-07, "log_odds_chosen": 1.8468749523162842, "log_odds_ratio": -0.34833985567092896, "logits/chosen": -1.2472655773162842, "logits/rejected": -1.0246093273162842, "logps/chosen": -0.70849609375, "logps/rejected": -2.044140577316284, "loss": 0.7827, "nll_loss": 0.7206054925918579, "rewards/accuracies": 0.8125, "rewards/chosen": -0.07087402045726776, "rewards/margins": 0.13352051377296448, "rewards/rejected": -0.20441894233226776, "step": 12410 }, { "epoch": 0.9432314410480349, "grad_norm": 2.8631484059167196, "learning_rate": 7.17842536077554e-07, "log_odds_chosen": 1.7151367664337158, "log_odds_ratio": -0.3804687559604645, "logits/chosen": -1.1982421875, "logits/rejected": -1.015625, "logps/chosen": -0.645312488079071, "logps/rejected": -1.8722655773162842, "loss": 0.7917, "nll_loss": 0.749316394329071, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.06453857570886612, "rewards/margins": 0.12273101508617401, "rewards/rejected": -0.18735352158546448, "step": 12420 }, { "epoch": 0.9439908866527434, "grad_norm": 1.940658803135396, "learning_rate": 7.175537239411094e-07, "log_odds_chosen": 1.8464844226837158, "log_odds_ratio": -0.3818115293979645, "logits/chosen": -1.2072265148162842, "logits/rejected": -1.038476586341858, "logps/chosen": -0.6845703125, "logps/rejected": -2.112499952316284, "loss": 0.7969, "nll_loss": 0.7328125238418579, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.06849364936351776, "rewards/margins": 0.14274902641773224, "rewards/rejected": -0.211181640625, "step": 12430 }, { "epoch": 0.9447503322574521, "grad_norm": 2.133502818944503, "learning_rate": 7.172652601208325e-07, "log_odds_chosen": 1.830468773841858, "log_odds_ratio": -0.3373779356479645, "logits/chosen": -1.225000023841858, "logits/rejected": -1.017187476158142, "logps/chosen": -0.6724609136581421, "logps/rejected": -2.025585889816284, "loss": 0.7619, "nll_loss": 0.7606445550918579, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.0672607421875, "rewards/margins": 0.13522949814796448, "rewards/rejected": -0.20256347954273224, "step": 12440 }, { "epoch": 0.9455097778621606, "grad_norm": 2.08285954501526, "learning_rate": 7.169771439171534e-07, "log_odds_chosen": 1.8005859851837158, "log_odds_ratio": -0.35661619901657104, "logits/chosen": -1.1804687976837158, "logits/rejected": -0.9906250238418579, "logps/chosen": -0.640625, "logps/rejected": -1.950781226158142, "loss": 0.7929, "nll_loss": 0.7015625238418579, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.06401367485523224, "rewards/margins": 0.13103027641773224, "rewards/rejected": -0.19504395127296448, "step": 12450 }, { "epoch": 0.9462692234668691, "grad_norm": 2.3887893923788965, "learning_rate": 7.166893746324661e-07, "log_odds_chosen": 1.7887694835662842, "log_odds_ratio": -0.3664794862270355, "logits/chosen": -1.3173828125, "logits/rejected": -1.157812476158142, "logps/chosen": -0.6771484613418579, "logps/rejected": -1.965234398841858, "loss": 0.7757, "nll_loss": 0.728808581829071, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.06773681938648224, "rewards/margins": 0.12892456352710724, "rewards/rejected": -0.19660644233226776, "step": 12460 }, { "epoch": 0.9470286690715778, "grad_norm": 2.476876987530823, "learning_rate": 7.164019515711245e-07, "log_odds_chosen": 1.646582007408142, "log_odds_ratio": -0.392333984375, "logits/chosen": -1.3386719226837158, "logits/rejected": -1.1687500476837158, "logps/chosen": -0.722851574420929, "logps/rejected": -1.92578125, "loss": 0.8066, "nll_loss": 0.7681640386581421, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.07232666015625, "rewards/margins": 0.12028197944164276, "rewards/rejected": -0.19257812201976776, "step": 12470 }, { "epoch": 0.9477881146762863, "grad_norm": 1.963554337132372, "learning_rate": 7.161148740394328e-07, "log_odds_chosen": 1.83203125, "log_odds_ratio": -0.36088865995407104, "logits/chosen": -1.239843726158142, "logits/rejected": -1.1248047351837158, "logps/chosen": -0.71142578125, "logps/rejected": -2.065234422683716, "loss": 0.7838, "nll_loss": 0.756542980670929, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -0.07115478813648224, "rewards/margins": 0.135498046875, "rewards/rejected": -0.20659179985523224, "step": 12480 }, { "epoch": 0.9485475602809949, "grad_norm": 2.436780818511159, "learning_rate": 7.158281413456402e-07, "log_odds_chosen": 1.6173827648162842, "log_odds_ratio": -0.38623046875, "logits/chosen": -1.334375023841858, "logits/rejected": -1.1064453125, "logps/chosen": -0.6968749761581421, "logps/rejected": -1.8583984375, "loss": 0.7917, "nll_loss": 0.719042956829071, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.06960449367761612, "rewards/margins": 0.11619873344898224, "rewards/rejected": -0.18596191704273224, "step": 12490 }, { "epoch": 0.9493070058857034, "grad_norm": 2.3053765148812593, "learning_rate": 7.155417527999326e-07, "log_odds_chosen": 1.7921874523162842, "log_odds_ratio": -0.3750976622104645, "logits/chosen": -1.2443358898162842, "logits/rejected": -1.067968726158142, "logps/chosen": -0.686328113079071, "logps/rejected": -1.999609351158142, "loss": 0.791, "nll_loss": 0.7666015625, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.06859131157398224, "rewards/margins": 0.13139037787914276, "rewards/rejected": -0.20002441108226776, "step": 12500 }, { "epoch": 0.950066451490412, "grad_norm": 2.6499449205144874, "learning_rate": 7.152557077144268e-07, "log_odds_chosen": 1.7830078601837158, "log_odds_ratio": -0.37666016817092896, "logits/chosen": -1.233789086341858, "logits/rejected": -1.096093773841858, "logps/chosen": -0.6600586175918579, "logps/rejected": -1.973046898841858, "loss": 0.7591, "nll_loss": 0.7132812738418579, "rewards/accuracies": 0.8125, "rewards/chosen": -0.06600341945886612, "rewards/margins": 0.13112792372703552, "rewards/rejected": -0.1971435546875, "step": 12510 }, { "epoch": 0.9508258970951206, "grad_norm": 2.726075293263638, "learning_rate": 7.149700054031623e-07, "log_odds_chosen": 1.684472680091858, "log_odds_ratio": -0.37272948026657104, "logits/chosen": -1.250390648841858, "logits/rejected": -1.1115233898162842, "logps/chosen": -0.648242175579071, "logps/rejected": -1.8527343273162842, "loss": 0.782, "nll_loss": 0.76025390625, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -0.06479492038488388, "rewards/margins": 0.12047119438648224, "rewards/rejected": -0.18532714247703552, "step": 12520 }, { "epoch": 0.9515853426998291, "grad_norm": 2.368715467535154, "learning_rate": 7.146846451820958e-07, "log_odds_chosen": 1.4275391101837158, "log_odds_ratio": -0.38676756620407104, "logits/chosen": -1.3162109851837158, "logits/rejected": -1.132421851158142, "logps/chosen": -0.6585937738418579, "logps/rejected": -1.680273413658142, "loss": 0.8197, "nll_loss": 0.7129882574081421, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.06586913764476776, "rewards/margins": 0.10205078125, "rewards/rejected": -0.16799315810203552, "step": 12530 }, { "epoch": 0.9523447883045377, "grad_norm": 2.261225369741669, "learning_rate": 7.14399626369093e-07, "log_odds_chosen": 1.8825194835662842, "log_odds_ratio": -0.3388915956020355, "logits/chosen": -1.2263672351837158, "logits/rejected": -1.052148461341858, "logps/chosen": -0.675585925579071, "logps/rejected": -2.0628905296325684, "loss": 0.7862, "nll_loss": 0.7435547113418579, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.06755371391773224, "rewards/margins": 0.13873901963233948, "rewards/rejected": -0.2064208984375, "step": 12540 }, { "epoch": 0.9531042339092463, "grad_norm": 2.1483388641673553, "learning_rate": 7.141149482839228e-07, "log_odds_chosen": 1.373632788658142, "log_odds_ratio": -0.42939454317092896, "logits/chosen": -1.287109375, "logits/rejected": -1.0662109851837158, "logps/chosen": -0.755664050579071, "logps/rejected": -1.765234351158142, "loss": 0.8146, "nll_loss": 0.862109363079071, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.07547607272863388, "rewards/margins": 0.10101318359375, "rewards/rejected": -0.17668457329273224, "step": 12550 }, { "epoch": 0.9538636795139548, "grad_norm": 2.1703818739610123, "learning_rate": 7.138306102482496e-07, "log_odds_chosen": 1.7565429210662842, "log_odds_ratio": -0.3507324159145355, "logits/chosen": -1.369531273841858, "logits/rejected": -1.1710937023162842, "logps/chosen": -0.629589855670929, "logps/rejected": -1.8798828125, "loss": 0.8009, "nll_loss": 0.721875011920929, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -0.06301269680261612, "rewards/margins": 0.12495269626379013, "rewards/rejected": -0.18793945014476776, "step": 12560 }, { "epoch": 0.9546231251186634, "grad_norm": 1.8790398458236333, "learning_rate": 7.135466115856274e-07, "log_odds_chosen": 1.8371093273162842, "log_odds_ratio": -0.3545898497104645, "logits/chosen": -1.3681640625, "logits/rejected": -1.1632812023162842, "logps/chosen": -0.706835925579071, "logps/rejected": -2.091796875, "loss": 0.8002, "nll_loss": 0.752246081829071, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.0706787109375, "rewards/margins": 0.13850097358226776, "rewards/rejected": -0.20908203721046448, "step": 12570 }, { "epoch": 0.955382570723372, "grad_norm": 2.176703629449019, "learning_rate": 7.13262951621492e-07, "log_odds_chosen": 1.881445288658142, "log_odds_ratio": -0.36748045682907104, "logits/chosen": -1.239648461341858, "logits/rejected": -1.004296898841858, "logps/chosen": -0.732617199420929, "logps/rejected": -2.078125, "loss": 0.7762, "nll_loss": 0.814453125, "rewards/accuracies": 0.8125, "rewards/chosen": -0.07330322265625, "rewards/margins": 0.13447265326976776, "rewards/rejected": -0.20791015028953552, "step": 12580 }, { "epoch": 0.9561420163280805, "grad_norm": 1.8387210592456034, "learning_rate": 7.129796296831554e-07, "log_odds_chosen": 1.63037109375, "log_odds_ratio": -0.3746093809604645, "logits/chosen": -1.234375, "logits/rejected": -1.104882836341858, "logps/chosen": -0.637402355670929, "logps/rejected": -1.7820312976837158, "loss": 0.7779, "nll_loss": 0.708300769329071, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.06378173828125, "rewards/margins": 0.11461181938648224, "rewards/rejected": -0.17844238877296448, "step": 12590 }, { "epoch": 0.956901461932789, "grad_norm": 2.110513731367032, "learning_rate": 7.126966450997984e-07, "log_odds_chosen": 1.600195288658142, "log_odds_ratio": -0.37578123807907104, "logits/chosen": -1.242773413658142, "logits/rejected": -1.0421874523162842, "logps/chosen": -0.732226550579071, "logps/rejected": -1.871484398841858, "loss": 0.7846, "nll_loss": 0.7890625, "rewards/accuracies": 0.8125, "rewards/chosen": -0.07320556789636612, "rewards/margins": 0.11375732719898224, "rewards/rejected": -0.18701171875, "step": 12600 }, { "epoch": 0.9576609075374977, "grad_norm": 1.9931633019157153, "learning_rate": 7.124139972024637e-07, "log_odds_chosen": 1.6046874523162842, "log_odds_ratio": -0.401611328125, "logits/chosen": -1.2648437023162842, "logits/rejected": -1.098046898841858, "logps/chosen": -0.650390625, "logps/rejected": -1.751953125, "loss": 0.7959, "nll_loss": 0.7640625238418579, "rewards/accuracies": 0.8125, "rewards/chosen": -0.06505127251148224, "rewards/margins": 0.11008300632238388, "rewards/rejected": -0.17507323622703552, "step": 12610 }, { "epoch": 0.9584203531422062, "grad_norm": 2.066771790604578, "learning_rate": 7.121316853240503e-07, "log_odds_chosen": 1.501367211341858, "log_odds_ratio": -0.4216064512729645, "logits/chosen": -1.2595703601837158, "logits/rejected": -1.125, "logps/chosen": -0.7071288824081421, "logps/rejected": -1.7912108898162842, "loss": 0.7847, "nll_loss": 0.7730468511581421, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.07071533054113388, "rewards/margins": 0.10836181789636612, "rewards/rejected": -0.17900390923023224, "step": 12620 }, { "epoch": 0.9591797987469147, "grad_norm": 2.2624913483446836, "learning_rate": 7.118497087993057e-07, "log_odds_chosen": 1.459814429283142, "log_odds_ratio": -0.4432128965854645, "logits/chosen": -1.225195288658142, "logits/rejected": -1.054101586341858, "logps/chosen": -0.723437488079071, "logps/rejected": -1.8054687976837158, "loss": 0.8268, "nll_loss": 0.9164062738418579, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.07237549126148224, "rewards/margins": 0.1081695556640625, "rewards/rejected": -0.18044432997703552, "step": 12630 }, { "epoch": 0.9599392443516234, "grad_norm": 1.7938062256640919, "learning_rate": 7.1156806696482e-07, "log_odds_chosen": 1.557226538658142, "log_odds_ratio": -0.3678222596645355, "logits/chosen": -1.3488280773162842, "logits/rejected": -1.10546875, "logps/chosen": -0.677929699420929, "logps/rejected": -1.7705078125, "loss": 0.7912, "nll_loss": 0.765625, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.06773681938648224, "rewards/margins": 0.10938110202550888, "rewards/rejected": -0.177001953125, "step": 12640 }, { "epoch": 0.9606986899563319, "grad_norm": 1.9984425130899108, "learning_rate": 7.112867591590192e-07, "log_odds_chosen": 1.8330078125, "log_odds_ratio": -0.36181640625, "logits/chosen": -1.3458983898162842, "logits/rejected": -1.196679711341858, "logps/chosen": -0.6670898199081421, "logps/rejected": -1.983984351158142, "loss": 0.7939, "nll_loss": 0.694531261920929, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.06667480617761612, "rewards/margins": 0.13165283203125, "rewards/rejected": -0.19833984971046448, "step": 12650 }, { "epoch": 0.9614581355610404, "grad_norm": 1.9340662137211229, "learning_rate": 7.110057847221588e-07, "log_odds_chosen": 1.6970703601837158, "log_odds_ratio": -0.35637205839157104, "logits/chosen": -1.278710961341858, "logits/rejected": -1.117773413658142, "logps/chosen": -0.706835925579071, "logps/rejected": -1.9542968273162842, "loss": 0.8063, "nll_loss": 0.717578113079071, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.0706787109375, "rewards/margins": 0.12470702826976776, "rewards/rejected": -0.19536133110523224, "step": 12660 }, { "epoch": 0.962217581165749, "grad_norm": 2.006039847334674, "learning_rate": 7.107251429963166e-07, "log_odds_chosen": 1.949853539466858, "log_odds_ratio": -0.35063475370407104, "logits/chosen": -1.186914086341858, "logits/rejected": -1.037500023841858, "logps/chosen": -0.677539050579071, "logps/rejected": -2.168164014816284, "loss": 0.791, "nll_loss": 0.737109363079071, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.06782226264476776, "rewards/margins": 0.14898070693016052, "rewards/rejected": -0.216796875, "step": 12670 }, { "epoch": 0.9629770267704576, "grad_norm": 1.9010466724392234, "learning_rate": 7.104448333253878e-07, "log_odds_chosen": 1.588476538658142, "log_odds_ratio": -0.3582519590854645, "logits/chosen": -1.33203125, "logits/rejected": -1.1091797351837158, "logps/chosen": -0.7093750238418579, "logps/rejected": -1.8367187976837158, "loss": 0.776, "nll_loss": 0.7662109136581421, "rewards/accuracies": 0.84375, "rewards/chosen": -0.07094726711511612, "rewards/margins": 0.11270751804113388, "rewards/rejected": -0.1837158203125, "step": 12680 }, { "epoch": 0.9637364723751661, "grad_norm": 2.058463999298868, "learning_rate": 7.101648550550766e-07, "log_odds_chosen": 1.914819359779358, "log_odds_ratio": -0.3724121153354645, "logits/chosen": -1.252343773841858, "logits/rejected": -1.0185546875, "logps/chosen": -0.6717773675918579, "logps/rejected": -2.0601563453674316, "loss": 0.7913, "nll_loss": 0.72509765625, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.06723632663488388, "rewards/margins": 0.13879700005054474, "rewards/rejected": -0.20607909560203552, "step": 12690 }, { "epoch": 0.9644959179798747, "grad_norm": 2.3204129403074876, "learning_rate": 7.098852075328911e-07, "log_odds_chosen": 1.7404296398162842, "log_odds_ratio": -0.33442384004592896, "logits/chosen": -1.3291015625, "logits/rejected": -1.1570312976837158, "logps/chosen": -0.6373046636581421, "logps/rejected": -1.8718750476837158, "loss": 0.7929, "nll_loss": 0.6869140863418579, "rewards/accuracies": 0.875, "rewards/chosen": -0.06362304836511612, "rewards/margins": 0.12370605766773224, "rewards/rejected": -0.18735352158546448, "step": 12700 }, { "epoch": 0.9652553635845833, "grad_norm": 2.207475798686682, "learning_rate": 7.096058901081364e-07, "log_odds_chosen": 1.685449242591858, "log_odds_ratio": -0.39765626192092896, "logits/chosen": -1.222070336341858, "logits/rejected": -1.065820336341858, "logps/chosen": -0.7730468511581421, "logps/rejected": -2.005859375, "loss": 0.7669, "nll_loss": 0.7574218511581421, "rewards/accuracies": 0.78125, "rewards/chosen": -0.07724609225988388, "rewards/margins": 0.12337646633386612, "rewards/rejected": -0.20048828423023224, "step": 12710 }, { "epoch": 0.9660148091892918, "grad_norm": 2.0663226849446286, "learning_rate": 7.093269021319087e-07, "log_odds_chosen": 1.7585937976837158, "log_odds_ratio": -0.4031738340854645, "logits/chosen": -1.3087890148162842, "logits/rejected": -1.162695288658142, "logps/chosen": -0.7206054925918579, "logps/rejected": -2.030078172683716, "loss": 0.7933, "nll_loss": 0.7828124761581421, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.07200928032398224, "rewards/margins": 0.13090820610523224, "rewards/rejected": -0.20292969048023224, "step": 12720 }, { "epoch": 0.9667742547940004, "grad_norm": 1.7719573950682022, "learning_rate": 7.090482429570884e-07, "log_odds_chosen": 1.458398461341858, "log_odds_ratio": -0.4191528260707855, "logits/chosen": -1.3835937976837158, "logits/rejected": -1.1027343273162842, "logps/chosen": -0.686328113079071, "logps/rejected": -1.720117211341858, "loss": 0.7659, "nll_loss": 0.6783202886581421, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.06865234673023224, "rewards/margins": 0.10337524116039276, "rewards/rejected": -0.17209473252296448, "step": 12730 }, { "epoch": 0.9675337003987089, "grad_norm": 2.074916555090248, "learning_rate": 7.087699119383339e-07, "log_odds_chosen": 1.451562523841858, "log_odds_ratio": -0.4014648497104645, "logits/chosen": -1.291406273841858, "logits/rejected": -1.1064453125, "logps/chosen": -0.712695300579071, "logps/rejected": -1.7585937976837158, "loss": 0.7902, "nll_loss": 0.7095702886581421, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.07135009765625, "rewards/margins": 0.10441894829273224, "rewards/rejected": -0.1759033203125, "step": 12740 }, { "epoch": 0.9682931460034175, "grad_norm": 2.5396425460600653, "learning_rate": 7.084919084320762e-07, "log_odds_chosen": 1.6124999523162842, "log_odds_ratio": -0.34916990995407104, "logits/chosen": -1.2208983898162842, "logits/rejected": -1.04296875, "logps/chosen": -0.6484375, "logps/rejected": -1.75390625, "loss": 0.8067, "nll_loss": 0.7904297113418579, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -0.06479492038488388, "rewards/margins": 0.11060790717601776, "rewards/rejected": -0.17526856064796448, "step": 12750 }, { "epoch": 0.9690525916081261, "grad_norm": 1.8898647906353983, "learning_rate": 7.08214231796511e-07, "log_odds_chosen": 1.810937523841858, "log_odds_ratio": -0.378173828125, "logits/chosen": -1.3332030773162842, "logits/rejected": -1.19140625, "logps/chosen": -0.7500976324081421, "logps/rejected": -2.1019530296325684, "loss": 0.8074, "nll_loss": 0.765332043170929, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.07498779147863388, "rewards/margins": 0.13524170219898224, "rewards/rejected": -0.21000976860523224, "step": 12760 }, { "epoch": 0.9698120372128346, "grad_norm": 2.4705840314072063, "learning_rate": 7.079368813915939e-07, "log_odds_chosen": 1.5283203125, "log_odds_ratio": -0.40814208984375, "logits/chosen": -1.27734375, "logits/rejected": -1.1238281726837158, "logps/chosen": -0.7158203125, "logps/rejected": -1.812109351158142, "loss": 0.808, "nll_loss": 0.766796886920929, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.07154540717601776, "rewards/margins": 0.109649658203125, "rewards/rejected": -0.18125000596046448, "step": 12770 }, { "epoch": 0.9705714828175432, "grad_norm": 1.8877925851695068, "learning_rate": 7.076598565790337e-07, "log_odds_chosen": 1.5548827648162842, "log_odds_ratio": -0.40947264432907104, "logits/chosen": -1.293359398841858, "logits/rejected": -1.1154296398162842, "logps/chosen": -0.6703125238418579, "logps/rejected": -1.791015625, "loss": 0.7894, "nll_loss": 0.7552734613418579, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.06696777045726776, "rewards/margins": 0.11204834282398224, "rewards/rejected": -0.17915038764476776, "step": 12780 }, { "epoch": 0.9713309284222518, "grad_norm": 2.394460997649757, "learning_rate": 7.073831567222859e-07, "log_odds_chosen": 1.959375023841858, "log_odds_ratio": -0.3211425840854645, "logits/chosen": -1.292578101158142, "logits/rejected": -1.0275390148162842, "logps/chosen": -0.679882824420929, "logps/rejected": -2.146484375, "loss": 0.7885, "nll_loss": 0.685742199420929, "rewards/accuracies": 0.875, "rewards/chosen": -0.06794433295726776, "rewards/margins": 0.14670410752296448, "rewards/rejected": -0.21467284858226776, "step": 12790 }, { "epoch": 0.9720903740269603, "grad_norm": 2.2007445484107273, "learning_rate": 7.071067811865475e-07, "log_odds_chosen": 1.5481445789337158, "log_odds_ratio": -0.37504881620407104, "logits/chosen": -1.3966796398162842, "logits/rejected": -1.2199218273162842, "logps/chosen": -0.6714843511581421, "logps/rejected": -1.7353515625, "loss": 0.7675, "nll_loss": 0.6893554925918579, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.06708984076976776, "rewards/margins": 0.10643310844898224, "rewards/rejected": -0.17363281548023224, "step": 12800 }, { "epoch": 0.9728498196316688, "grad_norm": 1.9328356578563417, "learning_rate": 7.068307293387497e-07, "log_odds_chosen": 1.725000023841858, "log_odds_ratio": -0.3557372987270355, "logits/chosen": -1.360937476158142, "logits/rejected": -1.083398461341858, "logps/chosen": -0.729296863079071, "logps/rejected": -1.9794921875, "loss": 0.8088, "nll_loss": 0.764355480670929, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.07292480766773224, "rewards/margins": 0.12503662705421448, "rewards/rejected": -0.19809570908546448, "step": 12810 }, { "epoch": 0.9736092652363775, "grad_norm": 1.896680223242996, "learning_rate": 7.065550005475526e-07, "log_odds_chosen": 1.2903320789337158, "log_odds_ratio": -0.45849609375, "logits/chosen": -1.287500023841858, "logits/rejected": -1.115234375, "logps/chosen": -0.677441418170929, "logps/rejected": -1.6023437976837158, "loss": 0.8021, "nll_loss": 0.7271484136581421, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.06776122748851776, "rewards/margins": 0.09246215969324112, "rewards/rejected": -0.16022948920726776, "step": 12820 }, { "epoch": 0.974368710841086, "grad_norm": 2.549725763766807, "learning_rate": 7.062795941833388e-07, "log_odds_chosen": 1.8722655773162842, "log_odds_ratio": -0.35944825410842896, "logits/chosen": -1.338281273841858, "logits/rejected": -1.1277344226837158, "logps/chosen": -0.7408202886581421, "logps/rejected": -2.1126952171325684, "loss": 0.7839, "nll_loss": 0.716503918170929, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -0.07406006008386612, "rewards/margins": 0.13737793266773224, "rewards/rejected": -0.21140137314796448, "step": 12830 }, { "epoch": 0.9751281564457945, "grad_norm": 2.0388026815112714, "learning_rate": 7.060045096182077e-07, "log_odds_chosen": 1.7365233898162842, "log_odds_ratio": -0.3511962890625, "logits/chosen": -1.229882836341858, "logits/rejected": -1.0310547351837158, "logps/chosen": -0.683789074420929, "logps/rejected": -1.9509766101837158, "loss": 0.7512, "nll_loss": 0.663378894329071, "rewards/accuracies": 0.8125, "rewards/chosen": -0.068359375, "rewards/margins": 0.12687988579273224, "rewards/rejected": -0.19511719048023224, "step": 12840 }, { "epoch": 0.9758876020505032, "grad_norm": 2.2659267895291686, "learning_rate": 7.057297462259693e-07, "log_odds_chosen": 1.433691382408142, "log_odds_ratio": -0.381591796875, "logits/chosen": -1.350000023841858, "logits/rejected": -1.1248047351837158, "logps/chosen": -0.7164062261581421, "logps/rejected": -1.7224609851837158, "loss": 0.7708, "nll_loss": 0.749804675579071, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.07164306938648224, "rewards/margins": 0.100494384765625, "rewards/rejected": -0.172119140625, "step": 12850 }, { "epoch": 0.9766470476552117, "grad_norm": 3.038838052620563, "learning_rate": 7.05455303382138e-07, "log_odds_chosen": 1.697265625, "log_odds_ratio": -0.35209959745407104, "logits/chosen": -1.3205077648162842, "logits/rejected": -1.1101562976837158, "logps/chosen": -0.6700195074081421, "logps/rejected": -1.8503906726837158, "loss": 0.7712, "nll_loss": 0.7027343511581421, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -0.06695556640625, "rewards/margins": 0.11800537258386612, "rewards/rejected": -0.18503418564796448, "step": 12860 }, { "epoch": 0.9774064932599202, "grad_norm": 2.1056815478848705, "learning_rate": 7.051811804639268e-07, "log_odds_chosen": 1.601171851158142, "log_odds_ratio": -0.3719238340854645, "logits/chosen": -1.366601586341858, "logits/rejected": -1.079492211341858, "logps/chosen": -0.689648449420929, "logps/rejected": -1.8623046875, "loss": 0.7986, "nll_loss": 0.762890636920929, "rewards/accuracies": 0.8125, "rewards/chosen": -0.06894531100988388, "rewards/margins": 0.11726073920726776, "rewards/rejected": -0.18637695908546448, "step": 12870 }, { "epoch": 0.9781659388646288, "grad_norm": 2.1060876953582053, "learning_rate": 7.049073768502414e-07, "log_odds_chosen": 1.449804663658142, "log_odds_ratio": -0.3846679627895355, "logits/chosen": -1.310937523841858, "logits/rejected": -1.1013672351837158, "logps/chosen": -0.6626952886581421, "logps/rejected": -1.6531250476837158, "loss": 0.8015, "nll_loss": 0.6644531488418579, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.06614990532398224, "rewards/margins": 0.09923096001148224, "rewards/rejected": -0.16545410454273224, "step": 12880 }, { "epoch": 0.9789253844693374, "grad_norm": 2.0329442497296215, "learning_rate": 7.046338919216742e-07, "log_odds_chosen": 1.5890624523162842, "log_odds_ratio": -0.3854003846645355, "logits/chosen": -1.293554663658142, "logits/rejected": -1.094140648841858, "logps/chosen": -0.7030273675918579, "logps/rejected": -1.835546851158142, "loss": 0.7963, "nll_loss": 0.759765625, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.07026366889476776, "rewards/margins": 0.11328125, "rewards/rejected": -0.18344727158546448, "step": 12890 }, { "epoch": 0.9796848300740459, "grad_norm": 2.4943831968111874, "learning_rate": 7.04360725060499e-07, "log_odds_chosen": 1.529296875, "log_odds_ratio": -0.39497071504592896, "logits/chosen": -1.2705078125, "logits/rejected": -1.0929687023162842, "logps/chosen": -0.6976562738418579, "logps/rejected": -1.764062523841858, "loss": 0.8002, "nll_loss": 0.6871093511581421, "rewards/accuracies": 0.8125, "rewards/chosen": -0.06979980319738388, "rewards/margins": 0.10665283352136612, "rewards/rejected": -0.17644043266773224, "step": 12900 }, { "epoch": 0.9804442756787545, "grad_norm": 2.521769027654981, "learning_rate": 7.040878756506639e-07, "log_odds_chosen": 1.4698486328125, "log_odds_ratio": -0.406982421875, "logits/chosen": -1.362695336341858, "logits/rejected": -1.180273413658142, "logps/chosen": -0.70654296875, "logps/rejected": -1.7550780773162842, "loss": 0.8017, "nll_loss": 0.8026367425918579, "rewards/accuracies": 0.8125, "rewards/chosen": -0.07070312649011612, "rewards/margins": 0.1048583984375, "rewards/rejected": -0.175537109375, "step": 12910 }, { "epoch": 0.9812037212834631, "grad_norm": 2.22564144342108, "learning_rate": 7.038153430777867e-07, "log_odds_chosen": 1.6201171875, "log_odds_ratio": -0.3583984375, "logits/chosen": -1.2599608898162842, "logits/rejected": -1.0544922351837158, "logps/chosen": -0.7017577886581421, "logps/rejected": -1.850000023841858, "loss": 0.7889, "nll_loss": 0.74755859375, "rewards/accuracies": 0.84375, "rewards/chosen": -0.07020263373851776, "rewards/margins": 0.11478271335363388, "rewards/rejected": -0.18496093153953552, "step": 12920 }, { "epoch": 0.9819631668881716, "grad_norm": 2.52921612688629, "learning_rate": 7.035431267291484e-07, "log_odds_chosen": 1.8654296398162842, "log_odds_ratio": -0.3659423887729645, "logits/chosen": -1.2667968273162842, "logits/rejected": -1.1044921875, "logps/chosen": -0.6767578125, "logps/rejected": -2.091015577316284, "loss": 0.7888, "nll_loss": 0.76123046875, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.06761474907398224, "rewards/margins": 0.14168700575828552, "rewards/rejected": -0.209228515625, "step": 12930 }, { "epoch": 0.9827226124928802, "grad_norm": 2.203514256785541, "learning_rate": 7.032712259936877e-07, "log_odds_chosen": 1.679296851158142, "log_odds_ratio": -0.35693359375, "logits/chosen": -1.404687523841858, "logits/rejected": -1.139257788658142, "logps/chosen": -0.6859375238418579, "logps/rejected": -1.8621094226837158, "loss": 0.7931, "nll_loss": 0.71923828125, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -0.06855468451976776, "rewards/margins": 0.11770019680261612, "rewards/rejected": -0.18625488877296448, "step": 12940 }, { "epoch": 0.9834820580975887, "grad_norm": 2.1508035001624792, "learning_rate": 7.029996402619949e-07, "log_odds_chosen": 1.39501953125, "log_odds_ratio": -0.42021483182907104, "logits/chosen": -1.335351586341858, "logits/rejected": -1.1521484851837158, "logps/chosen": -0.6778320074081421, "logps/rejected": -1.6476562023162842, "loss": 0.7955, "nll_loss": 0.72802734375, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06782226264476776, "rewards/margins": 0.09692993015050888, "rewards/rejected": -0.16481932997703552, "step": 12950 }, { "epoch": 0.9842415037022973, "grad_norm": 2.398113871649109, "learning_rate": 7.027283689263065e-07, "log_odds_chosen": 1.4758789539337158, "log_odds_ratio": -0.4195312559604645, "logits/chosen": -1.280859351158142, "logits/rejected": -1.1404297351837158, "logps/chosen": -0.647167980670929, "logps/rejected": -1.705078125, "loss": 0.7956, "nll_loss": 0.779589831829071, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.06473388522863388, "rewards/margins": 0.10579834133386612, "rewards/rejected": -0.17062988877296448, "step": 12960 }, { "epoch": 0.9850009493070059, "grad_norm": 1.8812221012795658, "learning_rate": 7.024574113804996e-07, "log_odds_chosen": 1.7060546875, "log_odds_ratio": -0.39453125, "logits/chosen": -1.268164038658142, "logits/rejected": -1.097070336341858, "logps/chosen": -0.703125, "logps/rejected": -1.9718749523162842, "loss": 0.7856, "nll_loss": 0.7445312738418579, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.07034911960363388, "rewards/margins": 0.12674561142921448, "rewards/rejected": -0.19711914658546448, "step": 12970 }, { "epoch": 0.9857603949117144, "grad_norm": 2.6374771131184414, "learning_rate": 7.021867670200857e-07, "log_odds_chosen": 1.375097632408142, "log_odds_ratio": -0.4705566465854645, "logits/chosen": -1.2970702648162842, "logits/rejected": -1.165624976158142, "logps/chosen": -0.7476562261581421, "logps/rejected": -1.735937476158142, "loss": 0.7813, "nll_loss": 0.739941418170929, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.07480468600988388, "rewards/margins": 0.09877929836511612, "rewards/rejected": -0.17365722358226776, "step": 12980 }, { "epoch": 0.986519840516423, "grad_norm": 1.9503344107506733, "learning_rate": 7.019164352422057e-07, "log_odds_chosen": 1.551171898841858, "log_odds_ratio": -0.4036621153354645, "logits/chosen": -1.2136719226837158, "logits/rejected": -1.0519530773162842, "logps/chosen": -0.7909179925918579, "logps/rejected": -1.9230468273162842, "loss": 0.7961, "nll_loss": 0.8065429925918579, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.07918701320886612, "rewards/margins": 0.1131591796875, "rewards/rejected": -0.19243164360523224, "step": 12990 }, { "epoch": 0.9872792861211316, "grad_norm": 1.707434520228766, "learning_rate": 7.016464154456234e-07, "log_odds_chosen": 1.5421874523162842, "log_odds_ratio": -0.40410155057907104, "logits/chosen": -1.3367187976837158, "logits/rejected": -1.112890601158142, "logps/chosen": -0.6919921636581421, "logps/rejected": -1.7628905773162842, "loss": 0.7624, "nll_loss": 0.7240234613418579, "rewards/accuracies": 0.8125, "rewards/chosen": -0.06920166313648224, "rewards/margins": 0.10698242485523224, "rewards/rejected": -0.17636719346046448, "step": 13000 }, { "epoch": 0.9880387317258401, "grad_norm": 2.335946719274414, "learning_rate": 7.013767070307207e-07, "log_odds_chosen": 1.4328124523162842, "log_odds_ratio": -0.4361816346645355, "logits/chosen": -1.244140625, "logits/rejected": -1.114843726158142, "logps/chosen": -0.720410168170929, "logps/rejected": -1.76953125, "loss": 0.8035, "nll_loss": 0.778027355670929, "rewards/accuracies": 0.75, "rewards/chosen": -0.07200928032398224, "rewards/margins": 0.10491027683019638, "rewards/rejected": -0.17695312201976776, "step": 13010 }, { "epoch": 0.9887981773305486, "grad_norm": 2.0369612579336516, "learning_rate": 7.011073093994919e-07, "log_odds_chosen": 1.45855712890625, "log_odds_ratio": -0.43226319551467896, "logits/chosen": -1.337499976158142, "logits/rejected": -1.122656226158142, "logps/chosen": -0.7542968988418579, "logps/rejected": -1.8289062976837158, "loss": 0.7737, "nll_loss": 0.7894531488418579, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.075439453125, "rewards/margins": 0.10755614936351776, "rewards/rejected": -0.18281249701976776, "step": 13020 }, { "epoch": 0.9895576229352573, "grad_norm": 2.2688951844202143, "learning_rate": 7.008382219555372e-07, "log_odds_chosen": 1.6408202648162842, "log_odds_ratio": -0.3875488340854645, "logits/chosen": -1.2216796875, "logits/rejected": -1.0583984851837158, "logps/chosen": -0.7251952886581421, "logps/rejected": -1.9500000476837158, "loss": 0.7826, "nll_loss": 0.7992187738418579, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.07254638522863388, "rewards/margins": 0.12258300930261612, "rewards/rejected": -0.19533690810203552, "step": 13030 }, { "epoch": 0.9903170685399658, "grad_norm": 2.6264075435058416, "learning_rate": 7.005694441040588e-07, "log_odds_chosen": 1.7414062023162842, "log_odds_ratio": -0.3316406309604645, "logits/chosen": -1.373437523841858, "logits/rejected": -1.0861327648162842, "logps/chosen": -0.6529296636581421, "logps/rejected": -1.8503906726837158, "loss": 0.774, "nll_loss": 0.669921875, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.06527099758386612, "rewards/margins": 0.11977539211511612, "rewards/rejected": -0.18505859375, "step": 13040 }, { "epoch": 0.9910765141446743, "grad_norm": 1.7451825500664833, "learning_rate": 7.003009752518536e-07, "log_odds_chosen": 1.6716797351837158, "log_odds_ratio": -0.4100097715854645, "logits/chosen": -1.261328101158142, "logits/rejected": -1.090234398841858, "logps/chosen": -0.718554675579071, "logps/rejected": -1.9757812023162842, "loss": 0.7748, "nll_loss": 0.7637695074081421, "rewards/accuracies": 0.78125, "rewards/chosen": -0.07192382961511612, "rewards/margins": 0.125732421875, "rewards/rejected": -0.19748535752296448, "step": 13050 }, { "epoch": 0.991835959749383, "grad_norm": 3.029694389166155, "learning_rate": 7.000328148073091e-07, "log_odds_chosen": 1.6096680164337158, "log_odds_ratio": -0.38774412870407104, "logits/chosen": -1.3097655773162842, "logits/rejected": -1.112890601158142, "logps/chosen": -0.6742187738418579, "logps/rejected": -1.8605468273162842, "loss": 0.792, "nll_loss": 0.753710925579071, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.06745605170726776, "rewards/margins": 0.11864013969898224, "rewards/rejected": -0.18605956435203552, "step": 13060 }, { "epoch": 0.9925954053540915, "grad_norm": 2.183430005340469, "learning_rate": 6.997649621803973e-07, "log_odds_chosen": 1.7373046875, "log_odds_ratio": -0.35795897245407104, "logits/chosen": -1.330078125, "logits/rejected": -1.1658203601837158, "logps/chosen": -0.6039062738418579, "logps/rejected": -1.829687476158142, "loss": 0.7558, "nll_loss": 0.689746081829071, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -0.06036376953125, "rewards/margins": 0.12248535454273224, "rewards/rejected": -0.18293456733226776, "step": 13070 }, { "epoch": 0.9933548509588, "grad_norm": 2.1102049790539734, "learning_rate": 6.994974167826689e-07, "log_odds_chosen": 1.662695288658142, "log_odds_ratio": -0.404052734375, "logits/chosen": -1.289453148841858, "logits/rejected": -1.046484351158142, "logps/chosen": -0.7132812738418579, "logps/rejected": -1.94140625, "loss": 0.7577, "nll_loss": 0.7396484613418579, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.07132568210363388, "rewards/margins": 0.12286376953125, "rewards/rejected": -0.194091796875, "step": 13080 }, { "epoch": 0.9941142965635087, "grad_norm": 2.014050293674789, "learning_rate": 6.99230178027249e-07, "log_odds_chosen": 1.853124976158142, "log_odds_ratio": -0.3688720762729645, "logits/chosen": -1.2462890148162842, "logits/rejected": -1.0896484851837158, "logps/chosen": -0.64453125, "logps/rejected": -2.005078077316284, "loss": 0.7398, "nll_loss": 0.686328113079071, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.06442870944738388, "rewards/margins": 0.13601073622703552, "rewards/rejected": -0.20048828423023224, "step": 13090 }, { "epoch": 0.9948737421682172, "grad_norm": 2.3982605506521404, "learning_rate": 6.989632453288303e-07, "log_odds_chosen": 1.6642577648162842, "log_odds_ratio": -0.3695312440395355, "logits/chosen": -1.3408203125, "logits/rejected": -1.1697266101837158, "logps/chosen": -0.6986328363418579, "logps/rejected": -1.8533203601837158, "loss": 0.7922, "nll_loss": 0.7593749761581421, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.06990966945886612, "rewards/margins": 0.11540527641773224, "rewards/rejected": -0.18522949516773224, "step": 13100 }, { "epoch": 0.9956331877729258, "grad_norm": 2.311373784876927, "learning_rate": 6.98696618103669e-07, "log_odds_chosen": 1.9421875476837158, "log_odds_ratio": -0.298583984375, "logits/chosen": -1.2275390625, "logits/rejected": -1.010156273841858, "logps/chosen": -0.612988293170929, "logps/rejected": -1.987890601158142, "loss": 0.7541, "nll_loss": 0.6744140386581421, "rewards/accuracies": 0.862500011920929, "rewards/chosen": -0.061279296875, "rewards/margins": 0.13756103813648224, "rewards/rejected": -0.19877929985523224, "step": 13110 }, { "epoch": 0.9963926333776343, "grad_norm": 1.8857516751864365, "learning_rate": 6.984302957695782e-07, "log_odds_chosen": 1.632421851158142, "log_odds_ratio": -0.37812501192092896, "logits/chosen": -1.2998046875, "logits/rejected": -1.158203125, "logps/chosen": -0.6826171875, "logps/rejected": -1.826171875, "loss": 0.7778, "nll_loss": 0.6957031488418579, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.06826172024011612, "rewards/margins": 0.11445312201976776, "rewards/rejected": -0.18271484971046448, "step": 13120 }, { "epoch": 0.9971520789823429, "grad_norm": 1.9895817577813075, "learning_rate": 6.981642777459237e-07, "log_odds_chosen": 1.723242163658142, "log_odds_ratio": -0.4080566465854645, "logits/chosen": -1.2599608898162842, "logits/rejected": -1.0283203125, "logps/chosen": -0.716015636920929, "logps/rejected": -1.987890601158142, "loss": 0.7849, "nll_loss": 0.7406250238418579, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.07159423828125, "rewards/margins": 0.12709960341453552, "rewards/rejected": -0.19875487685203552, "step": 13130 }, { "epoch": 0.9979115245870515, "grad_norm": 2.4193303625505784, "learning_rate": 6.978985634536182e-07, "log_odds_chosen": 1.71240234375, "log_odds_ratio": -0.37993162870407104, "logits/chosen": -1.200781226158142, "logits/rejected": -1.034570336341858, "logps/chosen": -0.668261706829071, "logps/rejected": -1.9025390148162842, "loss": 0.7761, "nll_loss": 0.772167980670929, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.06680908054113388, "rewards/margins": 0.12327270209789276, "rewards/rejected": -0.19008788466453552, "step": 13140 }, { "epoch": 0.99867097019176, "grad_norm": 2.1077303208973404, "learning_rate": 6.976331523151157e-07, "log_odds_chosen": 1.6135742664337158, "log_odds_ratio": -0.37255859375, "logits/chosen": -1.328710913658142, "logits/rejected": -1.1521484851837158, "logps/chosen": -0.614062488079071, "logps/rejected": -1.7199218273162842, "loss": 0.7979, "nll_loss": 0.6620117425918579, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -0.06143798679113388, "rewards/margins": 0.11043091118335724, "rewards/rejected": -0.17196044325828552, "step": 13150 }, { "epoch": 0.9994304157964686, "grad_norm": 2.22718540382866, "learning_rate": 6.973680437544066e-07, "log_odds_chosen": 1.8894531726837158, "log_odds_ratio": -0.3351074159145355, "logits/chosen": -1.277929663658142, "logits/rejected": -1.1123046875, "logps/chosen": -0.664257824420929, "logps/rejected": -2.07421875, "loss": 0.7515, "nll_loss": 0.715136706829071, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.06636963039636612, "rewards/margins": 0.14105224609375, "rewards/rejected": -0.2073974609375, "step": 13160 }, { "epoch": 0.9999620277197646, "step": 13167, "total_flos": 0.0, "train_loss": 0.0, "train_runtime": 20.5184, "train_samples_per_second": 82142.017, "train_steps_per_second": 641.716 } ], "logging_steps": 10, "max_steps": 13167, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 5000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }