{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 100, "global_step": 25000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 2e-09, "logits/chosen": -2.7378978729248047, "logits/rejected": -2.7575535774230957, "logps/chosen": -31.56254005432129, "logps/rejected": -34.877418518066406, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.0, "learning_rate": 2e-08, "logits/chosen": -2.8437275886535645, "logits/rejected": -2.8519980907440186, "logps/chosen": -31.87986183166504, "logps/rejected": -35.95986557006836, "loss": 0.6932, "rewards/accuracies": 0.4305555522441864, "rewards/chosen": 9.270243026548997e-06, "rewards/margins": -0.00017068462329916656, "rewards/rejected": 0.00017995487723965198, "step": 10 }, { "epoch": 0.0, "learning_rate": 4e-08, "logits/chosen": -2.851492404937744, "logits/rejected": -2.860441207885742, "logps/chosen": -31.55997085571289, "logps/rejected": -35.41954803466797, "loss": 0.6933, "rewards/accuracies": 0.4625000059604645, "rewards/chosen": 0.0002020239771809429, "rewards/margins": -0.00022845840430818498, "rewards/rejected": 0.0004304823523852974, "step": 20 }, { "epoch": 0.0, "learning_rate": 6.000000000000001e-08, "logits/chosen": -2.8377509117126465, "logits/rejected": -2.8477494716644287, "logps/chosen": -31.6408748626709, "logps/rejected": -35.3211784362793, "loss": 0.6931, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": 0.0001960716035682708, "rewards/margins": 4.014970545540564e-05, "rewards/rejected": 0.00015592190902680159, "step": 30 }, { "epoch": 0.0, "learning_rate": 8e-08, "logits/chosen": -2.8091392517089844, "logits/rejected": -2.8211140632629395, "logps/chosen": -31.450403213500977, "logps/rejected": -34.92453384399414, "loss": 0.6932, "rewards/accuracies": 0.4625000059604645, "rewards/chosen": -0.0006295361672528088, "rewards/margins": -8.136746328091249e-05, "rewards/rejected": -0.0005481686675921082, "step": 40 }, { "epoch": 0.0, "learning_rate": 1.0000000000000001e-07, "logits/chosen": -2.887484073638916, "logits/rejected": -2.893914222717285, "logps/chosen": -31.657039642333984, "logps/rejected": -35.211997985839844, "loss": 0.6932, "rewards/accuracies": 0.4625000059604645, "rewards/chosen": -0.00028370239306241274, "rewards/margins": -4.4233085645828396e-05, "rewards/rejected": -0.00023946927103679627, "step": 50 }, { "epoch": 0.0, "learning_rate": 1.2000000000000002e-07, "logits/chosen": -2.8394775390625, "logits/rejected": -2.847330093383789, "logps/chosen": -31.739593505859375, "logps/rejected": -35.61301803588867, "loss": 0.693, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -7.67548190196976e-05, "rewards/margins": 0.00026599931879900396, "rewards/rejected": -0.00034275412326678634, "step": 60 }, { "epoch": 0.0, "learning_rate": 1.4e-07, "logits/chosen": -2.8610146045684814, "logits/rejected": -2.8706271648406982, "logps/chosen": -31.541015625, "logps/rejected": -34.93061065673828, "loss": 0.6928, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.00025295853265561163, "rewards/margins": 0.0006930510280653834, "rewards/rejected": -0.00044009252451360226, "step": 70 }, { "epoch": 0.0, "learning_rate": 1.6e-07, "logits/chosen": -2.814054489135742, "logits/rejected": -2.8265459537506104, "logps/chosen": -31.53386878967285, "logps/rejected": -35.178977966308594, "loss": 0.6923, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.00021788975573144853, "rewards/margins": 0.001674074330367148, "rewards/rejected": -0.001891964115202427, "step": 80 }, { "epoch": 0.0, "learning_rate": 1.8e-07, "logits/chosen": -2.862257480621338, "logits/rejected": -2.8696205615997314, "logps/chosen": -31.788599014282227, "logps/rejected": -35.600608825683594, "loss": 0.6922, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.00026896881172433496, "rewards/margins": 0.0019238230306655169, "rewards/rejected": -0.002192791784182191, "step": 90 }, { "epoch": 0.0, "learning_rate": 2.0000000000000002e-07, "logits/chosen": -2.8508925437927246, "logits/rejected": -2.857975482940674, "logps/chosen": -31.689062118530273, "logps/rejected": -35.67528533935547, "loss": 0.6917, "rewards/accuracies": 0.862500011920929, "rewards/chosen": -0.0003110704419668764, "rewards/margins": 0.002977342577651143, "rewards/rejected": -0.0032884131651371717, "step": 100 }, { "epoch": 0.0, "eval_logits/chosen": -2.8907737731933594, "eval_logits/rejected": -2.8963623046875, "eval_logps/chosen": -31.672496795654297, "eval_logps/rejected": -36.563232421875, "eval_loss": 0.6904774308204651, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -0.00012544631317723542, "eval_rewards/margins": 0.005351169966161251, "eval_rewards/rejected": -0.0054766153916716576, "eval_runtime": 2.5381, "eval_samples_per_second": 1.97, "eval_steps_per_second": 0.394, "step": 100 }, { "epoch": 0.0, "learning_rate": 2.2e-07, "logits/chosen": -2.840686321258545, "logits/rejected": -2.851572275161743, "logps/chosen": -31.843679428100586, "logps/rejected": -35.9117317199707, "loss": 0.6909, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.00046255686902441084, "rewards/margins": 0.004524545278400183, "rewards/rejected": -0.004987102933228016, "step": 110 }, { "epoch": 0.0, "learning_rate": 2.4000000000000003e-07, "logits/chosen": -2.851132392883301, "logits/rejected": -2.8620097637176514, "logps/chosen": -31.579242706298828, "logps/rejected": -35.718963623046875, "loss": 0.69, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.0002421905955998227, "rewards/margins": 0.006371693219989538, "rewards/rejected": -0.006613883189857006, "step": 120 }, { "epoch": 0.01, "learning_rate": 2.6e-07, "logits/chosen": -2.882373571395874, "logits/rejected": -2.88938570022583, "logps/chosen": -31.985538482666016, "logps/rejected": -36.466407775878906, "loss": 0.6888, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.0002875800128094852, "rewards/margins": 0.008631178177893162, "rewards/rejected": -0.008918757550418377, "step": 130 }, { "epoch": 0.01, "learning_rate": 2.8e-07, "logits/chosen": -2.865797519683838, "logits/rejected": -2.872168779373169, "logps/chosen": -31.894283294677734, "logps/rejected": -36.985172271728516, "loss": 0.6877, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.0007256481912918389, "rewards/margins": 0.010884960182011127, "rewards/rejected": -0.011610607616603374, "step": 140 }, { "epoch": 0.01, "learning_rate": 3.0000000000000004e-07, "logits/chosen": -2.845139980316162, "logits/rejected": -2.853494644165039, "logps/chosen": -31.675251007080078, "logps/rejected": -36.79846954345703, "loss": 0.6865, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.0007051869761198759, "rewards/margins": 0.013261301442980766, "rewards/rejected": -0.013966487720608711, "step": 150 }, { "epoch": 0.01, "learning_rate": 3.2e-07, "logits/chosen": -2.8281171321868896, "logits/rejected": -2.8388850688934326, "logps/chosen": -31.937744140625, "logps/rejected": -37.26773452758789, "loss": 0.6852, "rewards/accuracies": 0.9375, "rewards/chosen": -0.0008424732950516045, "rewards/margins": 0.015992898494005203, "rewards/rejected": -0.016835371032357216, "step": 160 }, { "epoch": 0.01, "learning_rate": 3.4000000000000003e-07, "logits/chosen": -2.878615617752075, "logits/rejected": -2.885582447052002, "logps/chosen": -31.896270751953125, "logps/rejected": -37.909324645996094, "loss": 0.6833, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.00010471078712726012, "rewards/margins": 0.01982172206044197, "rewards/rejected": -0.019717011600732803, "step": 170 }, { "epoch": 0.01, "learning_rate": 3.6e-07, "logits/chosen": -2.871368646621704, "logits/rejected": -2.879415512084961, "logps/chosen": -31.864959716796875, "logps/rejected": -38.27887725830078, "loss": 0.6803, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.0006934488192200661, "rewards/margins": 0.025849204510450363, "rewards/rejected": -0.02515575848519802, "step": 180 }, { "epoch": 0.01, "learning_rate": 3.8e-07, "logits/chosen": -2.8525795936584473, "logits/rejected": -2.860992908477783, "logps/chosen": -31.651220321655273, "logps/rejected": -38.335655212402344, "loss": 0.68, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.0003419060376472771, "rewards/margins": 0.026519078761339188, "rewards/rejected": -0.026177171617746353, "step": 190 }, { "epoch": 0.01, "learning_rate": 4.0000000000000003e-07, "logits/chosen": -2.8633503913879395, "logits/rejected": -2.872957229614258, "logps/chosen": -31.91057777404785, "logps/rejected": -38.78657531738281, "loss": 0.6767, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.0017606725450605154, "rewards/margins": 0.03333817049860954, "rewards/rejected": -0.031577497720718384, "step": 200 }, { "epoch": 0.01, "eval_logits/chosen": -2.889622926712036, "eval_logits/rejected": -2.8965280055999756, "eval_logps/chosen": -31.5100040435791, "eval_logps/rejected": -39.384334564208984, "eval_loss": 0.6757729649543762, "eval_rewards/accuracies": 0.800000011920929, "eval_rewards/chosen": 0.0014994887169450521, "eval_rewards/margins": 0.03518717363476753, "eval_rewards/rejected": -0.03368768468499184, "eval_runtime": 2.5589, "eval_samples_per_second": 1.954, "eval_steps_per_second": 0.391, "step": 200 }, { "epoch": 0.01, "learning_rate": 4.2000000000000006e-07, "logits/chosen": -2.8569555282592773, "logits/rejected": -2.866086721420288, "logps/chosen": -31.34256362915039, "logps/rejected": -38.8200798034668, "loss": 0.6751, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 0.003228578018024564, "rewards/margins": 0.036589138209819794, "rewards/rejected": -0.0333605632185936, "step": 210 }, { "epoch": 0.01, "learning_rate": 4.4e-07, "logits/chosen": -2.8553261756896973, "logits/rejected": -2.8630785942077637, "logps/chosen": -31.168132781982422, "logps/rejected": -39.94483184814453, "loss": 0.6714, "rewards/accuracies": 0.9375, "rewards/chosen": 0.0065407766960561275, "rewards/margins": 0.044131867587566376, "rewards/rejected": -0.03759108856320381, "step": 220 }, { "epoch": 0.01, "learning_rate": 4.6000000000000004e-07, "logits/chosen": -2.846247434616089, "logits/rejected": -2.856968402862549, "logps/chosen": -30.52225112915039, "logps/rejected": -39.384674072265625, "loss": 0.6672, "rewards/accuracies": 0.9375, "rewards/chosen": 0.010297889821231365, "rewards/margins": 0.052807487547397614, "rewards/rejected": -0.04250960052013397, "step": 230 }, { "epoch": 0.01, "learning_rate": 4.800000000000001e-07, "logits/chosen": -2.837066650390625, "logits/rejected": -2.8476226329803467, "logps/chosen": -30.318273544311523, "logps/rejected": -40.50540542602539, "loss": 0.662, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.014961367473006248, "rewards/margins": 0.0633385181427002, "rewards/rejected": -0.0483771488070488, "step": 240 }, { "epoch": 0.01, "learning_rate": 5.000000000000001e-07, "logits/chosen": -2.8326704502105713, "logits/rejected": -2.8433525562286377, "logps/chosen": -29.7247314453125, "logps/rejected": -40.64832305908203, "loss": 0.6583, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.021330002695322037, "rewards/margins": 0.07122501730918884, "rewards/rejected": -0.049895014613866806, "step": 250 }, { "epoch": 0.01, "learning_rate": 5.2e-07, "logits/chosen": -2.8418846130371094, "logits/rejected": -2.8497989177703857, "logps/chosen": -28.69199562072754, "logps/rejected": -40.495052337646484, "loss": 0.6524, "rewards/accuracies": 0.9375, "rewards/chosen": 0.03087989054620266, "rewards/margins": 0.08361528068780899, "rewards/rejected": -0.052735380828380585, "step": 260 }, { "epoch": 0.01, "learning_rate": 5.4e-07, "logits/chosen": -2.8455681800842285, "logits/rejected": -2.8548922538757324, "logps/chosen": -27.655506134033203, "logps/rejected": -40.262977600097656, "loss": 0.6516, "rewards/accuracies": 0.862500011920929, "rewards/chosen": 0.03986174613237381, "rewards/margins": 0.08565817773342133, "rewards/rejected": -0.04579642042517662, "step": 270 }, { "epoch": 0.01, "learning_rate": 5.6e-07, "logits/chosen": -2.840118169784546, "logits/rejected": -2.8523902893066406, "logps/chosen": -25.311412811279297, "logps/rejected": -40.77899932861328, "loss": 0.6366, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 0.06186418607831001, "rewards/margins": 0.11726684868335724, "rewards/rejected": -0.05540265515446663, "step": 280 }, { "epoch": 0.01, "learning_rate": 5.800000000000001e-07, "logits/chosen": -2.837265729904175, "logits/rejected": -2.8484153747558594, "logps/chosen": -23.158449172973633, "logps/rejected": -40.87108612060547, "loss": 0.6245, "rewards/accuracies": 0.9375, "rewards/chosen": 0.08424028009176254, "rewards/margins": 0.14348194003105164, "rewards/rejected": -0.0592416450381279, "step": 290 }, { "epoch": 0.01, "learning_rate": 6.000000000000001e-07, "logits/chosen": -2.851079225540161, "logits/rejected": -2.8617031574249268, "logps/chosen": -21.73841667175293, "logps/rejected": -41.17694854736328, "loss": 0.6195, "rewards/accuracies": 0.887499988079071, "rewards/chosen": 0.09914528578519821, "rewards/margins": 0.15509627759456635, "rewards/rejected": -0.055950991809368134, "step": 300 }, { "epoch": 0.01, "eval_logits/chosen": -2.889470100402832, "eval_logits/rejected": -2.897376298904419, "eval_logps/chosen": -22.866273880004883, "eval_logps/rejected": -40.901634216308594, "eval_loss": 0.628918468952179, "eval_rewards/accuracies": 0.800000011920929, "eval_rewards/chosen": 0.08793680369853973, "eval_rewards/margins": 0.1367974728345871, "eval_rewards/rejected": -0.04886067658662796, "eval_runtime": 2.5403, "eval_samples_per_second": 1.968, "eval_steps_per_second": 0.394, "step": 300 }, { "epoch": 0.01, "learning_rate": 6.200000000000001e-07, "logits/chosen": -2.8426337242126465, "logits/rejected": -2.8550333976745605, "logps/chosen": -20.176361083984375, "logps/rejected": -42.40431594848633, "loss": 0.6051, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.11432721465826035, "rewards/margins": 0.18596036732196808, "rewards/rejected": -0.07163316011428833, "step": 310 }, { "epoch": 0.01, "learning_rate": 6.4e-07, "logits/chosen": -2.851113796234131, "logits/rejected": -2.8631577491760254, "logps/chosen": -19.8941707611084, "logps/rejected": -43.91278839111328, "loss": 0.5989, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 0.1177489310503006, "rewards/margins": 0.20075125992298126, "rewards/rejected": -0.08300231397151947, "step": 320 }, { "epoch": 0.01, "learning_rate": 6.6e-07, "logits/chosen": -2.8265151977539062, "logits/rejected": -2.8408312797546387, "logps/chosen": -18.866727828979492, "logps/rejected": -46.19036865234375, "loss": 0.5823, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.12779247760772705, "rewards/margins": 0.23716330528259277, "rewards/rejected": -0.10937084257602692, "step": 330 }, { "epoch": 0.01, "learning_rate": 6.800000000000001e-07, "logits/chosen": -2.848740339279175, "logits/rejected": -2.8619203567504883, "logps/chosen": -19.625049591064453, "logps/rejected": -45.49297332763672, "loss": 0.5911, "rewards/accuracies": 0.875, "rewards/chosen": 0.12091821432113647, "rewards/margins": 0.22173161804676056, "rewards/rejected": -0.10081341117620468, "step": 340 }, { "epoch": 0.01, "learning_rate": 7.000000000000001e-07, "logits/chosen": -2.830193042755127, "logits/rejected": -2.8451104164123535, "logps/chosen": -17.979398727416992, "logps/rejected": -48.468345642089844, "loss": 0.57, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 0.13713839650154114, "rewards/margins": 0.2683044373989105, "rewards/rejected": -0.1311660259962082, "step": 350 }, { "epoch": 0.01, "learning_rate": 7.2e-07, "logits/chosen": -2.8458235263824463, "logits/rejected": -2.8601250648498535, "logps/chosen": -16.797710418701172, "logps/rejected": -49.2154541015625, "loss": 0.5641, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.14685463905334473, "rewards/margins": 0.2838887572288513, "rewards/rejected": -0.13703413307666779, "step": 360 }, { "epoch": 0.01, "learning_rate": 7.4e-07, "logits/chosen": -2.8137898445129395, "logits/rejected": -2.828002452850342, "logps/chosen": -16.245784759521484, "logps/rejected": -49.996665954589844, "loss": 0.5595, "rewards/accuracies": 0.887499988079071, "rewards/chosen": 0.15269091725349426, "rewards/margins": 0.29616498947143555, "rewards/rejected": -0.1434740573167801, "step": 370 }, { "epoch": 0.02, "learning_rate": 7.6e-07, "logits/chosen": -2.8495373725891113, "logits/rejected": -2.8643441200256348, "logps/chosen": -14.146245956420898, "logps/rejected": -53.251609802246094, "loss": 0.5357, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 0.17447564005851746, "rewards/margins": 0.352342426776886, "rewards/rejected": -0.17786678671836853, "step": 380 }, { "epoch": 0.02, "learning_rate": 7.8e-07, "logits/chosen": -2.8257575035095215, "logits/rejected": -2.841547727584839, "logps/chosen": -13.256139755249023, "logps/rejected": -54.307098388671875, "loss": 0.5272, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 0.18534918129444122, "rewards/margins": 0.37510982155799866, "rewards/rejected": -0.18976061046123505, "step": 390 }, { "epoch": 0.02, "learning_rate": 8.000000000000001e-07, "logits/chosen": -2.8352346420288086, "logits/rejected": -2.851823329925537, "logps/chosen": -12.786114692687988, "logps/rejected": -54.5097770690918, "loss": 0.5265, "rewards/accuracies": 0.887499988079071, "rewards/chosen": 0.18759022653102875, "rewards/margins": 0.38273242115974426, "rewards/rejected": -0.19514216482639313, "step": 400 }, { "epoch": 0.02, "eval_logits/chosen": -2.8791277408599854, "eval_logits/rejected": -2.8898181915283203, "eval_logps/chosen": -15.178817749023438, "eval_logps/rejected": -53.76172637939453, "eval_loss": 0.5498184561729431, "eval_rewards/accuracies": 0.800000011920929, "eval_rewards/chosen": 0.16481134295463562, "eval_rewards/margins": 0.3422728478908539, "eval_rewards/rejected": -0.17746153473854065, "eval_runtime": 2.5414, "eval_samples_per_second": 1.967, "eval_steps_per_second": 0.393, "step": 400 }, { "epoch": 0.02, "learning_rate": 8.200000000000001e-07, "logits/chosen": -2.8354573249816895, "logits/rejected": -2.8512320518493652, "logps/chosen": -12.816737174987793, "logps/rejected": -55.143836975097656, "loss": 0.5282, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.1893976926803589, "rewards/margins": 0.38598987460136414, "rewards/rejected": -0.19659221172332764, "step": 410 }, { "epoch": 0.02, "learning_rate": 8.400000000000001e-07, "logits/chosen": -2.8391196727752686, "logits/rejected": -2.858541250228882, "logps/chosen": -8.85454273223877, "logps/rejected": -60.33222579956055, "loss": 0.4881, "rewards/accuracies": 0.9375, "rewards/chosen": 0.22644583880901337, "rewards/margins": 0.47693929076194763, "rewards/rejected": -0.25049346685409546, "step": 420 }, { "epoch": 0.02, "learning_rate": 8.6e-07, "logits/chosen": -2.8603148460388184, "logits/rejected": -2.8742501735687256, "logps/chosen": -10.87490463256836, "logps/rejected": -59.370506286621094, "loss": 0.5057, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.21016235649585724, "rewards/margins": 0.44817495346069336, "rewards/rejected": -0.23801259696483612, "step": 430 }, { "epoch": 0.02, "learning_rate": 8.8e-07, "logits/chosen": -2.774691104888916, "logits/rejected": -2.802006721496582, "logps/chosen": -6.1744160652160645, "logps/rejected": -67.0235824584961, "loss": 0.4525, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.25415223836898804, "rewards/margins": 0.572658896446228, "rewards/rejected": -0.3185066282749176, "step": 440 }, { "epoch": 0.02, "learning_rate": 9.000000000000001e-07, "logits/chosen": -2.7922871112823486, "logits/rejected": -2.8160386085510254, "logps/chosen": -7.116026878356934, "logps/rejected": -67.9571533203125, "loss": 0.4577, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 0.2449255734682083, "rewards/margins": 0.5689839124679565, "rewards/rejected": -0.3240583837032318, "step": 450 }, { "epoch": 0.02, "learning_rate": 9.200000000000001e-07, "logits/chosen": -2.836318254470825, "logits/rejected": -2.855250835418701, "logps/chosen": -5.783654689788818, "logps/rejected": -71.7186279296875, "loss": 0.4367, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.2601434588432312, "rewards/margins": 0.6253237128257751, "rewards/rejected": -0.36518028378486633, "step": 460 }, { "epoch": 0.02, "learning_rate": 9.400000000000001e-07, "logits/chosen": -2.844541549682617, "logits/rejected": -2.866375207901001, "logps/chosen": -5.991224765777588, "logps/rejected": -74.46247863769531, "loss": 0.4309, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 0.2559830844402313, "rewards/margins": 0.6459441184997559, "rewards/rejected": -0.38996103405952454, "step": 470 }, { "epoch": 0.02, "learning_rate": 9.600000000000001e-07, "logits/chosen": -2.8035550117492676, "logits/rejected": -2.82908296585083, "logps/chosen": -4.389423847198486, "logps/rejected": -79.68538665771484, "loss": 0.4047, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.2741895616054535, "rewards/margins": 0.7141346335411072, "rewards/rejected": -0.4399450719356537, "step": 480 }, { "epoch": 0.02, "learning_rate": 9.800000000000001e-07, "logits/chosen": -2.812251329421997, "logits/rejected": -2.833768844604492, "logps/chosen": -6.338814735412598, "logps/rejected": -78.16831970214844, "loss": 0.4229, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 0.25374746322631836, "rewards/margins": 0.682456374168396, "rewards/rejected": -0.42870885133743286, "step": 490 }, { "epoch": 0.02, "learning_rate": 1.0000000000000002e-06, "logits/chosen": -2.833005905151367, "logits/rejected": -2.8491594791412354, "logps/chosen": -7.892645359039307, "logps/rejected": -80.2509994506836, "loss": 0.4265, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.23966047167778015, "rewards/margins": 0.6854254603385925, "rewards/rejected": -0.44576495885849, "step": 500 }, { "epoch": 0.02, "eval_logits/chosen": -2.8704168796539307, "eval_logits/rejected": -2.8837385177612305, "eval_logps/chosen": -13.660011291503906, "eval_logps/rejected": -74.49039459228516, "eval_loss": 0.4915219843387604, "eval_rewards/accuracies": 0.800000011920929, "eval_rewards/chosen": 0.17999939620494843, "eval_rewards/margins": 0.5647476315498352, "eval_rewards/rejected": -0.3847481906414032, "eval_runtime": 2.5431, "eval_samples_per_second": 1.966, "eval_steps_per_second": 0.393, "step": 500 }, { "epoch": 0.02, "learning_rate": 1.02e-06, "logits/chosen": -2.840994119644165, "logits/rejected": -2.8585689067840576, "logps/chosen": -5.797226905822754, "logps/rejected": -84.76994323730469, "loss": 0.4006, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 0.2596047520637512, "rewards/margins": 0.7512787580490112, "rewards/rejected": -0.49167400598526, "step": 510 }, { "epoch": 0.02, "learning_rate": 1.04e-06, "logits/chosen": -2.8017475605010986, "logits/rejected": -2.8251404762268066, "logps/chosen": -8.294705390930176, "logps/rejected": -84.1483383178711, "loss": 0.4167, "rewards/accuracies": 0.887499988079071, "rewards/chosen": 0.23337304592132568, "rewards/margins": 0.7237992286682129, "rewards/rejected": -0.4904262125492096, "step": 520 }, { "epoch": 0.02, "learning_rate": 1.06e-06, "logits/chosen": -2.810163974761963, "logits/rejected": -2.834770679473877, "logps/chosen": -4.533754825592041, "logps/rejected": -90.81745910644531, "loss": 0.3737, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.27057787775993347, "rewards/margins": 0.8292282819747925, "rewards/rejected": -0.5586503744125366, "step": 530 }, { "epoch": 0.02, "learning_rate": 1.08e-06, "logits/chosen": -2.852900981903076, "logits/rejected": -2.869657516479492, "logps/chosen": -7.05509090423584, "logps/rejected": -91.04242706298828, "loss": 0.3906, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 0.2466249167919159, "rewards/margins": 0.8031465411186218, "rewards/rejected": -0.5565215945243835, "step": 540 }, { "epoch": 0.02, "learning_rate": 1.1e-06, "logits/chosen": -2.8418619632720947, "logits/rejected": -2.8629984855651855, "logps/chosen": -2.9332709312438965, "logps/rejected": -98.77378845214844, "loss": 0.3388, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2866368293762207, "rewards/margins": 0.9234557151794434, "rewards/rejected": -0.636819064617157, "step": 550 }, { "epoch": 0.02, "learning_rate": 1.12e-06, "logits/chosen": -2.82724666595459, "logits/rejected": -2.84657621383667, "logps/chosen": -5.234626293182373, "logps/rejected": -98.744140625, "loss": 0.3571, "rewards/accuracies": 0.9375, "rewards/chosen": 0.26350724697113037, "rewards/margins": 0.898236870765686, "rewards/rejected": -0.6347296237945557, "step": 560 }, { "epoch": 0.02, "learning_rate": 1.14e-06, "logits/chosen": -2.8091483116149902, "logits/rejected": -2.8304107189178467, "logps/chosen": -5.249286651611328, "logps/rejected": -101.9556655883789, "loss": 0.3498, "rewards/accuracies": 0.9375, "rewards/chosen": 0.2657969892024994, "rewards/margins": 0.9273422360420227, "rewards/rejected": -0.6615451574325562, "step": 570 }, { "epoch": 0.02, "learning_rate": 1.1600000000000001e-06, "logits/chosen": -2.790942907333374, "logits/rejected": -2.8113741874694824, "logps/chosen": -5.275379657745361, "logps/rejected": -104.60810852050781, "loss": 0.3415, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.26571571826934814, "rewards/margins": 0.9537063837051392, "rewards/rejected": -0.6879907250404358, "step": 580 }, { "epoch": 0.02, "learning_rate": 1.1800000000000001e-06, "logits/chosen": -2.780268907546997, "logits/rejected": -2.8024113178253174, "logps/chosen": -2.439600706100464, "logps/rejected": -110.0733413696289, "loss": 0.3064, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.29178616404533386, "rewards/margins": 1.0446232557296753, "rewards/rejected": -0.7528371214866638, "step": 590 }, { "epoch": 0.02, "learning_rate": 1.2000000000000002e-06, "logits/chosen": -2.7821903228759766, "logits/rejected": -2.802605628967285, "logps/chosen": -9.826620101928711, "logps/rejected": -102.77278900146484, "loss": 0.3822, "rewards/accuracies": 0.887499988079071, "rewards/chosen": 0.2189265787601471, "rewards/margins": 0.8913952708244324, "rewards/rejected": -0.6724687814712524, "step": 600 }, { "epoch": 0.02, "eval_logits/chosen": -2.8638622760772705, "eval_logits/rejected": -2.8766462802886963, "eval_logps/chosen": -17.306840896606445, "eval_logps/rejected": -95.9889907836914, "eval_loss": 0.4623832702636719, "eval_rewards/accuracies": 0.800000011920929, "eval_rewards/chosen": 0.14353112876415253, "eval_rewards/margins": 0.7432652711868286, "eval_rewards/rejected": -0.5997341871261597, "eval_runtime": 2.5357, "eval_samples_per_second": 1.972, "eval_steps_per_second": 0.394, "step": 600 }, { "epoch": 0.02, "learning_rate": 1.2200000000000002e-06, "logits/chosen": -2.8192691802978516, "logits/rejected": -2.8391613960266113, "logps/chosen": -5.285656452178955, "logps/rejected": -112.41468811035156, "loss": 0.3261, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.26181596517562866, "rewards/margins": 1.0333595275878906, "rewards/rejected": -0.771543562412262, "step": 610 }, { "epoch": 0.02, "learning_rate": 1.2400000000000002e-06, "logits/chosen": -2.8264529705047607, "logits/rejected": -2.8439574241638184, "logps/chosen": -4.605085849761963, "logps/rejected": -114.93780517578125, "loss": 0.315, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.271725594997406, "rewards/margins": 1.0633752346038818, "rewards/rejected": -0.7916496992111206, "step": 620 }, { "epoch": 0.03, "learning_rate": 1.26e-06, "logits/chosen": -2.7877204418182373, "logits/rejected": -2.8088600635528564, "logps/chosen": -7.8699774742126465, "logps/rejected": -110.46846771240234, "loss": 0.3475, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.2378402054309845, "rewards/margins": 0.9893569946289062, "rewards/rejected": -0.7515168786048889, "step": 630 }, { "epoch": 0.03, "learning_rate": 1.28e-06, "logits/chosen": -2.837090015411377, "logits/rejected": -2.8522086143493652, "logps/chosen": -5.440021991729736, "logps/rejected": -115.82767486572266, "loss": 0.3184, "rewards/accuracies": 0.9375, "rewards/chosen": 0.2632838785648346, "rewards/margins": 1.0648844242095947, "rewards/rejected": -0.8016005754470825, "step": 640 }, { "epoch": 0.03, "learning_rate": 1.3e-06, "logits/chosen": -2.8325998783111572, "logits/rejected": -2.8476850986480713, "logps/chosen": -5.666572093963623, "logps/rejected": -114.93052673339844, "loss": 0.3222, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 0.26070231199264526, "rewards/margins": 1.0585978031158447, "rewards/rejected": -0.7978953719139099, "step": 650 }, { "epoch": 0.03, "learning_rate": 1.32e-06, "logits/chosen": -2.8294150829315186, "logits/rejected": -2.843332290649414, "logps/chosen": -4.656705379486084, "logps/rejected": -117.8843994140625, "loss": 0.3081, "rewards/accuracies": 0.9375, "rewards/chosen": 0.27124640345573425, "rewards/margins": 1.0952842235565186, "rewards/rejected": -0.8240379095077515, "step": 660 }, { "epoch": 0.03, "learning_rate": 1.34e-06, "logits/chosen": -2.813687562942505, "logits/rejected": -2.8287830352783203, "logps/chosen": -6.055461883544922, "logps/rejected": -117.27398681640625, "loss": 0.3218, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 0.25777754187583923, "rewards/margins": 1.0705653429031372, "rewards/rejected": -0.8127878308296204, "step": 670 }, { "epoch": 0.03, "learning_rate": 1.3600000000000001e-06, "logits/chosen": -2.8305511474609375, "logits/rejected": -2.847665786743164, "logps/chosen": -3.576225996017456, "logps/rejected": -123.47029113769531, "loss": 0.2861, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.2802112400531769, "rewards/margins": 1.162218451499939, "rewards/rejected": -0.8820074200630188, "step": 680 }, { "epoch": 0.03, "learning_rate": 1.3800000000000001e-06, "logits/chosen": -2.8184750080108643, "logits/rejected": -2.835768699645996, "logps/chosen": -4.080394268035889, "logps/rejected": -123.95877838134766, "loss": 0.2899, "rewards/accuracies": 0.9375, "rewards/chosen": 0.27529793977737427, "rewards/margins": 1.1634957790374756, "rewards/rejected": -0.8881980180740356, "step": 690 }, { "epoch": 0.03, "learning_rate": 1.4000000000000001e-06, "logits/chosen": -2.8218908309936523, "logits/rejected": -2.836843729019165, "logps/chosen": -6.666535377502441, "logps/rejected": -122.4844741821289, "loss": 0.3164, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 0.24970369040966034, "rewards/margins": 1.1155344247817993, "rewards/rejected": -0.8658306002616882, "step": 700 }, { "epoch": 0.03, "eval_logits/chosen": -2.864266872406006, "eval_logits/rejected": -2.875173807144165, "eval_logps/chosen": -17.776647567749023, "eval_logps/rejected": -109.68687438964844, "eval_loss": 0.43485140800476074, "eval_rewards/accuracies": 0.800000011920929, "eval_rewards/chosen": 0.13883306086063385, "eval_rewards/margins": 0.8755461573600769, "eval_rewards/rejected": -0.7367132306098938, "eval_runtime": 2.5367, "eval_samples_per_second": 1.971, "eval_steps_per_second": 0.394, "step": 700 }, { "epoch": 0.03, "learning_rate": 1.42e-06, "logits/chosen": -2.844935417175293, "logits/rejected": -2.858140707015991, "logps/chosen": -8.028076171875, "logps/rejected": -122.30613708496094, "loss": 0.3241, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 0.2376345843076706, "rewards/margins": 1.1100366115570068, "rewards/rejected": -0.8724021911621094, "step": 710 }, { "epoch": 0.03, "learning_rate": 1.44e-06, "logits/chosen": -2.7900023460388184, "logits/rejected": -2.8094356060028076, "logps/chosen": -3.2359237670898438, "logps/rejected": -129.42636108398438, "loss": 0.271, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.2824572026729584, "rewards/margins": 1.2281405925750732, "rewards/rejected": -0.945683479309082, "step": 720 }, { "epoch": 0.03, "learning_rate": 1.46e-06, "logits/chosen": -2.821424961090088, "logits/rejected": -2.836238384246826, "logps/chosen": -6.463691711425781, "logps/rejected": -123.7864761352539, "loss": 0.3104, "rewards/accuracies": 0.887499988079071, "rewards/chosen": 0.25306111574172974, "rewards/margins": 1.1346869468688965, "rewards/rejected": -0.8816258311271667, "step": 730 }, { "epoch": 0.03, "learning_rate": 1.48e-06, "logits/chosen": -2.8041460514068604, "logits/rejected": -2.818166971206665, "logps/chosen": -4.807553768157959, "logps/rejected": -125.6934585571289, "loss": 0.2934, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 0.26797643303871155, "rewards/margins": 1.173134446144104, "rewards/rejected": -0.9051580429077148, "step": 740 }, { "epoch": 0.03, "learning_rate": 1.5e-06, "logits/chosen": -2.779296636581421, "logits/rejected": -2.79634690284729, "logps/chosen": -0.9039721488952637, "logps/rejected": -135.78916931152344, "loss": 0.2403, "rewards/accuracies": 1.0, "rewards/chosen": 0.3065469264984131, "rewards/margins": 1.311240315437317, "rewards/rejected": -1.0046932697296143, "step": 750 }, { "epoch": 0.03, "learning_rate": 1.52e-06, "logits/chosen": -2.8138394355773926, "logits/rejected": -2.826781749725342, "logps/chosen": -6.468068599700928, "logps/rejected": -132.6434326171875, "loss": 0.2904, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 0.2557832598686218, "rewards/margins": 1.223661184310913, "rewards/rejected": -0.9678778648376465, "step": 760 }, { "epoch": 0.03, "learning_rate": 1.54e-06, "logits/chosen": -2.8202385902404785, "logits/rejected": -2.8357481956481934, "logps/chosen": -3.149953842163086, "logps/rejected": -135.87928771972656, "loss": 0.2586, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.2869695723056793, "rewards/margins": 1.2877401113510132, "rewards/rejected": -1.0007705688476562, "step": 770 }, { "epoch": 0.03, "learning_rate": 1.56e-06, "logits/chosen": -2.838630199432373, "logits/rejected": -2.849020481109619, "logps/chosen": -7.5944929122924805, "logps/rejected": -130.7025146484375, "loss": 0.3069, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.24056629836559296, "rewards/margins": 1.191831350326538, "rewards/rejected": -0.9512648582458496, "step": 780 }, { "epoch": 0.03, "learning_rate": 1.5800000000000001e-06, "logits/chosen": -2.7992000579833984, "logits/rejected": -2.8114240169525146, "logps/chosen": -4.532530307769775, "logps/rejected": -132.79415893554688, "loss": 0.2767, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.2713344991207123, "rewards/margins": 1.2494734525680542, "rewards/rejected": -0.97813880443573, "step": 790 }, { "epoch": 0.03, "learning_rate": 1.6000000000000001e-06, "logits/chosen": -2.852445363998413, "logits/rejected": -2.8626627922058105, "logps/chosen": -3.5878841876983643, "logps/rejected": -135.88998413085938, "loss": 0.2646, "rewards/accuracies": 0.9375, "rewards/chosen": 0.28134867548942566, "rewards/margins": 1.284764289855957, "rewards/rejected": -1.003415584564209, "step": 800 }, { "epoch": 0.03, "eval_logits/chosen": -2.866072416305542, "eval_logits/rejected": -2.8745527267456055, "eval_logps/chosen": -19.902584075927734, "eval_logps/rejected": -119.71656799316406, "eval_loss": 0.43273869156837463, "eval_rewards/accuracies": 0.800000011920929, "eval_rewards/chosen": 0.11757369339466095, "eval_rewards/margins": 0.9545836448669434, "eval_rewards/rejected": -0.8370100259780884, "eval_runtime": 2.5332, "eval_samples_per_second": 1.974, "eval_steps_per_second": 0.395, "step": 800 }, { "epoch": 0.03, "learning_rate": 1.6200000000000002e-06, "logits/chosen": -2.7932746410369873, "logits/rejected": -2.806813955307007, "logps/chosen": -6.693448543548584, "logps/rejected": -136.7832489013672, "loss": 0.2825, "rewards/accuracies": 0.9375, "rewards/chosen": 0.25298938155174255, "rewards/margins": 1.2659637928009033, "rewards/rejected": -1.0129742622375488, "step": 810 }, { "epoch": 0.03, "learning_rate": 1.6400000000000002e-06, "logits/chosen": -2.787155866622925, "logits/rejected": -2.8015549182891846, "logps/chosen": -4.373723983764648, "logps/rejected": -138.78652954101562, "loss": 0.2598, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.2725609242916107, "rewards/margins": 1.3106110095977783, "rewards/rejected": -1.0380501747131348, "step": 820 }, { "epoch": 0.03, "learning_rate": 1.6600000000000002e-06, "logits/chosen": -2.8064827919006348, "logits/rejected": -2.819863796234131, "logps/chosen": -3.5665950775146484, "logps/rejected": -141.33102416992188, "loss": 0.2511, "rewards/accuracies": 0.9375, "rewards/chosen": 0.28050410747528076, "rewards/margins": 1.3387818336486816, "rewards/rejected": -1.0582777261734009, "step": 830 }, { "epoch": 0.03, "learning_rate": 1.6800000000000002e-06, "logits/chosen": -2.817418336868286, "logits/rejected": -2.8313846588134766, "logps/chosen": -2.252744197845459, "logps/rejected": -145.3087158203125, "loss": 0.2343, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.2936623692512512, "rewards/margins": 1.3898221254348755, "rewards/rejected": -1.0961596965789795, "step": 840 }, { "epoch": 0.03, "learning_rate": 1.7000000000000002e-06, "logits/chosen": -2.8284406661987305, "logits/rejected": -2.840109348297119, "logps/chosen": -5.103249549865723, "logps/rejected": -142.70741271972656, "loss": 0.2625, "rewards/accuracies": 0.9375, "rewards/chosen": 0.26402419805526733, "rewards/margins": 1.338487982749939, "rewards/rejected": -1.0744638442993164, "step": 850 }, { "epoch": 0.03, "learning_rate": 1.72e-06, "logits/chosen": -2.813197612762451, "logits/rejected": -2.8260269165039062, "logps/chosen": -4.9756364822387695, "logps/rejected": -144.3242950439453, "loss": 0.2592, "rewards/accuracies": 0.9375, "rewards/chosen": 0.2666425108909607, "rewards/margins": 1.354537010192871, "rewards/rejected": -1.0878945589065552, "step": 860 }, { "epoch": 0.03, "learning_rate": 1.74e-06, "logits/chosen": -2.794604539871216, "logits/rejected": -2.8052635192871094, "logps/chosen": -4.487209320068359, "logps/rejected": -144.85519409179688, "loss": 0.2542, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.27251192927360535, "rewards/margins": 1.3654396533966064, "rewards/rejected": -1.0929278135299683, "step": 870 }, { "epoch": 0.04, "learning_rate": 1.76e-06, "logits/chosen": -2.8187928199768066, "logits/rejected": -2.832338571548462, "logps/chosen": -5.85897159576416, "logps/rejected": -144.75564575195312, "loss": 0.2659, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 0.25857943296432495, "rewards/margins": 1.3476403951644897, "rewards/rejected": -1.0890610218048096, "step": 880 }, { "epoch": 0.04, "learning_rate": 1.7800000000000001e-06, "logits/chosen": -2.831149101257324, "logits/rejected": -2.8425889015197754, "logps/chosen": -5.525818824768066, "logps/rejected": -144.9384307861328, "loss": 0.2612, "rewards/accuracies": 0.9375, "rewards/chosen": 0.2624892294406891, "rewards/margins": 1.3581664562225342, "rewards/rejected": -1.095677375793457, "step": 890 }, { "epoch": 0.04, "learning_rate": 1.8000000000000001e-06, "logits/chosen": -2.825331211090088, "logits/rejected": -2.8358824253082275, "logps/chosen": -2.9861559867858887, "logps/rejected": -147.10546875, "loss": 0.2358, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.28719478845596313, "rewards/margins": 1.4057575464248657, "rewards/rejected": -1.118562936782837, "step": 900 }, { "epoch": 0.04, "eval_logits/chosen": -2.871781587600708, "eval_logits/rejected": -2.879202127456665, "eval_logps/chosen": -20.95585823059082, "eval_logps/rejected": -129.0234832763672, "eval_loss": 0.4248077869415283, "eval_rewards/accuracies": 0.800000011920929, "eval_rewards/chosen": 0.10704095661640167, "eval_rewards/margins": 1.0371202230453491, "eval_rewards/rejected": -0.9300791621208191, "eval_runtime": 2.5351, "eval_samples_per_second": 1.972, "eval_steps_per_second": 0.394, "step": 900 }, { "epoch": 0.04, "learning_rate": 1.8200000000000002e-06, "logits/chosen": -2.8098978996276855, "logits/rejected": -2.8206849098205566, "logps/chosen": -3.8079190254211426, "logps/rejected": -148.92869567871094, "loss": 0.2413, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.2790614068508148, "rewards/margins": 1.412480115890503, "rewards/rejected": -1.1334187984466553, "step": 910 }, { "epoch": 0.04, "learning_rate": 1.8400000000000002e-06, "logits/chosen": -2.8188936710357666, "logits/rejected": -2.8311846256256104, "logps/chosen": -2.2249808311462402, "logps/rejected": -156.1125946044922, "loss": 0.2151, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.29501065611839294, "rewards/margins": 1.5023648738861084, "rewards/rejected": -1.2073543071746826, "step": 920 }, { "epoch": 0.04, "learning_rate": 1.8600000000000002e-06, "logits/chosen": -2.8503007888793945, "logits/rejected": -2.8604423999786377, "logps/chosen": -6.375435829162598, "logps/rejected": -152.13748168945312, "loss": 0.2563, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.25546616315841675, "rewards/margins": 1.4190584421157837, "rewards/rejected": -1.1635921001434326, "step": 930 }, { "epoch": 0.04, "learning_rate": 1.8800000000000002e-06, "logits/chosen": -2.831444263458252, "logits/rejected": -2.8406014442443848, "logps/chosen": -3.665541172027588, "logps/rejected": -154.6426544189453, "loss": 0.2281, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.27952760457992554, "rewards/margins": 1.4757583141326904, "rewards/rejected": -1.1962306499481201, "step": 940 }, { "epoch": 0.04, "learning_rate": 1.9000000000000002e-06, "logits/chosen": -2.799205780029297, "logits/rejected": -2.8107001781463623, "logps/chosen": -4.6928791999816895, "logps/rejected": -150.57565307617188, "loss": 0.2476, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 0.2716704308986664, "rewards/margins": 1.417458415031433, "rewards/rejected": -1.1457879543304443, "step": 950 }, { "epoch": 0.04, "learning_rate": 1.9200000000000003e-06, "logits/chosen": -2.816527843475342, "logits/rejected": -2.8250370025634766, "logps/chosen": -6.3966569900512695, "logps/rejected": -149.07923889160156, "loss": 0.2621, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.25400084257125854, "rewards/margins": 1.388352632522583, "rewards/rejected": -1.1343519687652588, "step": 960 }, { "epoch": 0.04, "learning_rate": 1.94e-06, "logits/chosen": -2.7978243827819824, "logits/rejected": -2.8097643852233887, "logps/chosen": -2.814222812652588, "logps/rejected": -158.85586547851562, "loss": 0.2155, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.28803014755249023, "rewards/margins": 1.5255552530288696, "rewards/rejected": -1.237525224685669, "step": 970 }, { "epoch": 0.04, "learning_rate": 1.9600000000000003e-06, "logits/chosen": -2.811749219894409, "logits/rejected": -2.8198513984680176, "logps/chosen": -9.303021430969238, "logps/rejected": -147.51974487304688, "loss": 0.2904, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 0.22490759193897247, "rewards/margins": 1.3438174724578857, "rewards/rejected": -1.1189100742340088, "step": 980 }, { "epoch": 0.04, "learning_rate": 1.98e-06, "logits/chosen": -2.8047471046447754, "logits/rejected": -2.814192295074463, "logps/chosen": -4.208864212036133, "logps/rejected": -158.22879028320312, "loss": 0.2274, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.27552667260169983, "rewards/margins": 1.4985954761505127, "rewards/rejected": -1.2230688333511353, "step": 990 }, { "epoch": 0.04, "learning_rate": 2.0000000000000003e-06, "logits/chosen": -2.8649916648864746, "logits/rejected": -2.873910665512085, "logps/chosen": -5.001629829406738, "logps/rejected": -153.1854248046875, "loss": 0.2427, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 0.26830264925956726, "rewards/margins": 1.4437980651855469, "rewards/rejected": -1.1754953861236572, "step": 1000 }, { "epoch": 0.04, "eval_logits/chosen": -2.8730030059814453, "eval_logits/rejected": -2.8785181045532227, "eval_logps/chosen": -20.163196563720703, "eval_logps/rejected": -136.72605895996094, "eval_loss": 0.40572553873062134, "eval_rewards/accuracies": 0.800000011920929, "eval_rewards/chosen": 0.11496754735708237, "eval_rewards/margins": 1.1220725774765015, "eval_rewards/rejected": -1.0071051120758057, "eval_runtime": 2.5397, "eval_samples_per_second": 1.969, "eval_steps_per_second": 0.394, "step": 1000 }, { "epoch": 0.04, "learning_rate": 2.02e-06, "logits/chosen": -2.8269872665405273, "logits/rejected": -2.8337838649749756, "logps/chosen": -0.977016806602478, "logps/rejected": -164.6626739501953, "loss": 0.1891, "rewards/accuracies": 1.0, "rewards/chosen": 0.30668166279792786, "rewards/margins": 1.6010364294052124, "rewards/rejected": -1.294355034828186, "step": 1010 }, { "epoch": 0.04, "learning_rate": 2.04e-06, "logits/chosen": -2.814464569091797, "logits/rejected": -2.8253042697906494, "logps/chosen": -6.908135414123535, "logps/rejected": -154.9573974609375, "loss": 0.2573, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 0.24652886390686035, "rewards/margins": 1.4428061246871948, "rewards/rejected": -1.1962772607803345, "step": 1020 }, { "epoch": 0.04, "learning_rate": 2.06e-06, "logits/chosen": -2.7937171459198, "logits/rejected": -2.802722454071045, "logps/chosen": -4.200863838195801, "logps/rejected": -158.29115295410156, "loss": 0.2281, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.2745644152164459, "rewards/margins": 1.4989993572235107, "rewards/rejected": -1.2244348526000977, "step": 1030 }, { "epoch": 0.04, "learning_rate": 2.08e-06, "logits/chosen": -2.8223493099212646, "logits/rejected": -2.8298580646514893, "logps/chosen": -1.7230730056762695, "logps/rejected": -161.9768524169922, "loss": 0.1995, "rewards/accuracies": 1.0, "rewards/chosen": 0.3010903596878052, "rewards/margins": 1.5687527656555176, "rewards/rejected": -1.2676622867584229, "step": 1040 }, { "epoch": 0.04, "learning_rate": 2.1000000000000002e-06, "logits/chosen": -2.799314260482788, "logits/rejected": -2.8088879585266113, "logps/chosen": -2.8070197105407715, "logps/rejected": -158.96058654785156, "loss": 0.2194, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.28778737783432007, "rewards/margins": 1.521186113357544, "rewards/rejected": -1.2333987951278687, "step": 1050 }, { "epoch": 0.04, "learning_rate": 2.12e-06, "logits/chosen": -2.8217194080352783, "logits/rejected": -2.8327133655548096, "logps/chosen": -1.9483855962753296, "logps/rejected": -168.0953369140625, "loss": 0.1943, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2971464693546295, "rewards/margins": 1.6234709024429321, "rewards/rejected": -1.326324462890625, "step": 1060 }, { "epoch": 0.04, "learning_rate": 2.1400000000000003e-06, "logits/chosen": -2.806267738342285, "logits/rejected": -2.814929723739624, "logps/chosen": -5.836215019226074, "logps/rejected": -162.7094268798828, "loss": 0.2332, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.26132142543792725, "rewards/margins": 1.5314531326293945, "rewards/rejected": -1.2701314687728882, "step": 1070 }, { "epoch": 0.04, "learning_rate": 2.16e-06, "logits/chosen": -2.847303867340088, "logits/rejected": -2.8556580543518066, "logps/chosen": -7.644167900085449, "logps/rejected": -161.17544555664062, "loss": 0.2529, "rewards/accuracies": 0.9375, "rewards/chosen": 0.24166938662528992, "rewards/margins": 1.500256896018982, "rewards/rejected": -1.2585874795913696, "step": 1080 }, { "epoch": 0.04, "learning_rate": 2.1800000000000003e-06, "logits/chosen": -2.810529947280884, "logits/rejected": -2.8191683292388916, "logps/chosen": -3.9824860095977783, "logps/rejected": -165.3909912109375, "loss": 0.2164, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.2763037085533142, "rewards/margins": 1.5771318674087524, "rewards/rejected": -1.300828218460083, "step": 1090 }, { "epoch": 0.04, "learning_rate": 2.2e-06, "logits/chosen": -2.8309614658355713, "logits/rejected": -2.8383357524871826, "logps/chosen": -4.318147659301758, "logps/rejected": -162.86227416992188, "loss": 0.2167, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27344492077827454, "rewards/margins": 1.5463900566101074, "rewards/rejected": -1.2729451656341553, "step": 1100 }, { "epoch": 0.04, "eval_logits/chosen": -2.875310182571411, "eval_logits/rejected": -2.880439043045044, "eval_logps/chosen": -19.828020095825195, "eval_logps/rejected": -144.75282287597656, "eval_loss": 0.3903087079524994, "eval_rewards/accuracies": 0.800000011920929, "eval_rewards/chosen": 0.1183193176984787, "eval_rewards/margins": 1.205691933631897, "eval_rewards/rejected": -1.0873725414276123, "eval_runtime": 2.5356, "eval_samples_per_second": 1.972, "eval_steps_per_second": 0.394, "step": 1100 }, { "epoch": 0.04, "learning_rate": 2.2200000000000003e-06, "logits/chosen": -2.8003411293029785, "logits/rejected": -2.8085780143737793, "logps/chosen": -3.6789841651916504, "logps/rejected": -163.96282958984375, "loss": 0.2157, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.27974388003349304, "rewards/margins": 1.5646860599517822, "rewards/rejected": -1.2849422693252563, "step": 1110 }, { "epoch": 0.04, "learning_rate": 2.24e-06, "logits/chosen": -2.8197388648986816, "logits/rejected": -2.8279731273651123, "logps/chosen": -3.9272987842559814, "logps/rejected": -167.36135864257812, "loss": 0.2132, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.27462905645370483, "rewards/margins": 1.5960724353790283, "rewards/rejected": -1.3214433193206787, "step": 1120 }, { "epoch": 0.05, "learning_rate": 2.2600000000000004e-06, "logits/chosen": -2.801616907119751, "logits/rejected": -2.810380458831787, "logps/chosen": -3.7108187675476074, "logps/rejected": -166.77774047851562, "loss": 0.2122, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.28018099069595337, "rewards/margins": 1.5918943881988525, "rewards/rejected": -1.311713457107544, "step": 1130 }, { "epoch": 0.05, "learning_rate": 2.28e-06, "logits/chosen": -2.79058575630188, "logits/rejected": -2.802330493927002, "logps/chosen": -2.5894081592559814, "logps/rejected": -174.4743194580078, "loss": 0.187, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2885397672653198, "rewards/margins": 1.678278923034668, "rewards/rejected": -1.3897391557693481, "step": 1140 }, { "epoch": 0.05, "learning_rate": 2.3000000000000004e-06, "logits/chosen": -2.8417327404022217, "logits/rejected": -2.8483223915100098, "logps/chosen": -6.751503944396973, "logps/rejected": -169.75811767578125, "loss": 0.2266, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.2523830831050873, "rewards/margins": 1.5950281620025635, "rewards/rejected": -1.3426451683044434, "step": 1150 }, { "epoch": 0.05, "learning_rate": 2.3200000000000002e-06, "logits/chosen": -2.8154919147491455, "logits/rejected": -2.8229076862335205, "logps/chosen": -7.562078952789307, "logps/rejected": -163.306396484375, "loss": 0.2479, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.24303407967090607, "rewards/margins": 1.5205808877944946, "rewards/rejected": -1.277547001838684, "step": 1160 }, { "epoch": 0.05, "learning_rate": 2.3400000000000005e-06, "logits/chosen": -2.833536148071289, "logits/rejected": -2.840592384338379, "logps/chosen": -3.0235328674316406, "logps/rejected": -173.162353515625, "loss": 0.1903, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.28498825430870056, "rewards/margins": 1.667970895767212, "rewards/rejected": -1.382982611656189, "step": 1170 }, { "epoch": 0.05, "learning_rate": 2.3600000000000003e-06, "logits/chosen": -2.7976291179656982, "logits/rejected": -2.80814528465271, "logps/chosen": -3.7944846153259277, "logps/rejected": -173.11965942382812, "loss": 0.2023, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.2781563103199005, "rewards/margins": 1.6540307998657227, "rewards/rejected": -1.3758742809295654, "step": 1180 }, { "epoch": 0.05, "learning_rate": 2.38e-06, "logits/chosen": -2.8287105560302734, "logits/rejected": -2.838430881500244, "logps/chosen": -0.7163937091827393, "logps/rejected": -181.09072875976562, "loss": 0.1623, "rewards/accuracies": 1.0, "rewards/chosen": 0.31108665466308594, "rewards/margins": 1.764979600906372, "rewards/rejected": -1.4538929462432861, "step": 1190 }, { "epoch": 0.05, "learning_rate": 2.4000000000000003e-06, "logits/chosen": -2.836683511734009, "logits/rejected": -2.842560291290283, "logps/chosen": -3.505491256713867, "logps/rejected": -177.05224609375, "loss": 0.1888, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2816828489303589, "rewards/margins": 1.6990468502044678, "rewards/rejected": -1.4173643589019775, "step": 1200 }, { "epoch": 0.05, "eval_logits/chosen": -2.8718607425689697, "eval_logits/rejected": -2.8783957958221436, "eval_logps/chosen": -20.804935455322266, "eval_logps/rejected": -151.0920867919922, "eval_loss": 0.39085134863853455, "eval_rewards/accuracies": 0.800000011920929, "eval_rewards/chosen": 0.10855019092559814, "eval_rewards/margins": 1.2593154907226562, "eval_rewards/rejected": -1.1507651805877686, "eval_runtime": 2.5339, "eval_samples_per_second": 1.973, "eval_steps_per_second": 0.395, "step": 1200 }, { "epoch": 0.05, "learning_rate": 2.42e-06, "logits/chosen": -2.834646701812744, "logits/rejected": -2.8428475856781006, "logps/chosen": -2.933666706085205, "logps/rejected": -176.5664825439453, "loss": 0.1824, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2890808582305908, "rewards/margins": 1.6960302591323853, "rewards/rejected": -1.4069496393203735, "step": 1210 }, { "epoch": 0.05, "learning_rate": 2.4400000000000004e-06, "logits/chosen": -2.8467020988464355, "logits/rejected": -2.853048324584961, "logps/chosen": -4.124712944030762, "logps/rejected": -175.82562255859375, "loss": 0.1889, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2758190631866455, "rewards/margins": 1.6793323755264282, "rewards/rejected": -1.4035131931304932, "step": 1220 }, { "epoch": 0.05, "learning_rate": 2.46e-06, "logits/chosen": -2.849972724914551, "logits/rejected": -2.8560643196105957, "logps/chosen": -5.106858253479004, "logps/rejected": -174.467041015625, "loss": 0.2085, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.26454851031303406, "rewards/margins": 1.6560680866241455, "rewards/rejected": -1.391519546508789, "step": 1230 }, { "epoch": 0.05, "learning_rate": 2.4800000000000004e-06, "logits/chosen": -2.811800003051758, "logits/rejected": -2.821699619293213, "logps/chosen": -1.2178165912628174, "logps/rejected": -179.27908325195312, "loss": 0.1739, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.30335181951522827, "rewards/margins": 1.7421386241912842, "rewards/rejected": -1.4387871026992798, "step": 1240 }, { "epoch": 0.05, "learning_rate": 2.5e-06, "logits/chosen": -2.8383541107177734, "logits/rejected": -2.8474202156066895, "logps/chosen": -2.1533443927764893, "logps/rejected": -182.19259643554688, "loss": 0.1731, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.29590675234794617, "rewards/margins": 1.7589952945709229, "rewards/rejected": -1.4630887508392334, "step": 1250 }, { "epoch": 0.05, "learning_rate": 2.52e-06, "logits/chosen": -2.8423092365264893, "logits/rejected": -2.848371744155884, "logps/chosen": -3.675971508026123, "logps/rejected": -181.45184326171875, "loss": 0.1847, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2798725962638855, "rewards/margins": 1.7396968603134155, "rewards/rejected": -1.4598243236541748, "step": 1260 }, { "epoch": 0.05, "learning_rate": 2.5400000000000002e-06, "logits/chosen": -2.8068795204162598, "logits/rejected": -2.8155057430267334, "logps/chosen": -3.143465757369995, "logps/rejected": -184.03912353515625, "loss": 0.179, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2851407527923584, "rewards/margins": 1.7716739177703857, "rewards/rejected": -1.486533284187317, "step": 1270 }, { "epoch": 0.05, "learning_rate": 2.56e-06, "logits/chosen": -2.822482109069824, "logits/rejected": -2.828336477279663, "logps/chosen": -0.890036404132843, "logps/rejected": -184.98265075683594, "loss": 0.1588, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.3080163598060608, "rewards/margins": 1.8033316135406494, "rewards/rejected": -1.495315432548523, "step": 1280 }, { "epoch": 0.05, "learning_rate": 2.5800000000000003e-06, "logits/chosen": -2.794123888015747, "logits/rejected": -2.8058462142944336, "logps/chosen": -2.8954508304595947, "logps/rejected": -185.87635803222656, "loss": 0.1738, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28937068581581116, "rewards/margins": 1.7910429239273071, "rewards/rejected": -1.5016721487045288, "step": 1290 }, { "epoch": 0.05, "learning_rate": 2.6e-06, "logits/chosen": -2.8029305934906006, "logits/rejected": -2.8146142959594727, "logps/chosen": -3.2052204608917236, "logps/rejected": -183.8682861328125, "loss": 0.17, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28479841351509094, "rewards/margins": 1.768410325050354, "rewards/rejected": -1.483611822128296, "step": 1300 }, { "epoch": 0.05, "eval_logits/chosen": -2.875568389892578, "eval_logits/rejected": -2.8826210498809814, "eval_logps/chosen": -23.400354385375977, "eval_logps/rejected": -157.95225524902344, "eval_loss": 0.40134286880493164, "eval_rewards/accuracies": 0.800000011920929, "eval_rewards/chosen": 0.08259598910808563, "eval_rewards/margins": 1.3019627332687378, "eval_rewards/rejected": -1.2193667888641357, "eval_runtime": 2.548, "eval_samples_per_second": 1.962, "eval_steps_per_second": 0.392, "step": 1300 }, { "epoch": 0.05, "learning_rate": 2.6200000000000003e-06, "logits/chosen": -2.8116376399993896, "logits/rejected": -2.82365083694458, "logps/chosen": -0.33060967922210693, "logps/rejected": -191.24649047851562, "loss": 0.1464, "rewards/accuracies": 1.0, "rewards/chosen": 0.31692782044410706, "rewards/margins": 1.8729209899902344, "rewards/rejected": -1.5559930801391602, "step": 1310 }, { "epoch": 0.05, "learning_rate": 2.64e-06, "logits/chosen": -2.833270311355591, "logits/rejected": -2.8445465564727783, "logps/chosen": -4.937705993652344, "logps/rejected": -190.39535522460938, "loss": 0.1814, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2659699320793152, "rewards/margins": 1.8176586627960205, "rewards/rejected": -1.5516886711120605, "step": 1320 }, { "epoch": 0.05, "learning_rate": 2.6600000000000004e-06, "logits/chosen": -2.7963461875915527, "logits/rejected": -2.808882713317871, "logps/chosen": -0.5264667272567749, "logps/rejected": -192.98988342285156, "loss": 0.1434, "rewards/accuracies": 1.0, "rewards/chosen": 0.31050002574920654, "rewards/margins": 1.8907581567764282, "rewards/rejected": -1.5802582502365112, "step": 1330 }, { "epoch": 0.05, "learning_rate": 2.68e-06, "logits/chosen": -2.8271450996398926, "logits/rejected": -2.8352839946746826, "logps/chosen": -6.588844299316406, "logps/rejected": -185.74948120117188, "loss": 0.2065, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.2515345811843872, "rewards/margins": 1.7547848224639893, "rewards/rejected": -1.5032503604888916, "step": 1340 }, { "epoch": 0.05, "learning_rate": 2.7000000000000004e-06, "logits/chosen": -2.8147852420806885, "logits/rejected": -2.821302652359009, "logps/chosen": -4.814528465270996, "logps/rejected": -184.24880981445312, "loss": 0.182, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.2700776159763336, "rewards/margins": 1.754799485206604, "rewards/rejected": -1.4847218990325928, "step": 1350 }, { "epoch": 0.05, "learning_rate": 2.7200000000000002e-06, "logits/chosen": -2.7980477809906006, "logits/rejected": -2.8053336143493652, "logps/chosen": -10.145551681518555, "logps/rejected": -175.10057067871094, "loss": 0.2582, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 0.21596181392669678, "rewards/margins": 1.6128339767456055, "rewards/rejected": -1.3968720436096191, "step": 1360 }, { "epoch": 0.05, "learning_rate": 2.7400000000000004e-06, "logits/chosen": -2.8263187408447266, "logits/rejected": -2.837437152862549, "logps/chosen": -3.5470261573791504, "logps/rejected": -188.22207641601562, "loss": 0.179, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2830365300178528, "rewards/margins": 1.8115851879119873, "rewards/rejected": -1.5285487174987793, "step": 1370 }, { "epoch": 0.06, "learning_rate": 2.7600000000000003e-06, "logits/chosen": -2.837512493133545, "logits/rejected": -2.8449387550354004, "logps/chosen": -3.8321094512939453, "logps/rejected": -190.2117156982422, "loss": 0.176, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2791665196418762, "rewards/margins": 1.8271478414535522, "rewards/rejected": -1.5479815006256104, "step": 1380 }, { "epoch": 0.06, "learning_rate": 2.7800000000000005e-06, "logits/chosen": -2.8122668266296387, "logits/rejected": -2.8211870193481445, "logps/chosen": -3.035006284713745, "logps/rejected": -187.45263671875, "loss": 0.1705, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28410252928733826, "rewards/margins": 1.8064813613891602, "rewards/rejected": -1.522378921508789, "step": 1390 }, { "epoch": 0.06, "learning_rate": 2.8000000000000003e-06, "logits/chosen": -2.8264896869659424, "logits/rejected": -2.83465576171875, "logps/chosen": -0.7876213192939758, "logps/rejected": -198.2043914794922, "loss": 0.139, "rewards/accuracies": 1.0, "rewards/chosen": 0.3092033863067627, "rewards/margins": 1.9320440292358398, "rewards/rejected": -1.6228405237197876, "step": 1400 }, { "epoch": 0.06, "eval_logits/chosen": -2.8794522285461426, "eval_logits/rejected": -2.8859400749206543, "eval_logps/chosen": -20.621538162231445, "eval_logps/rejected": -163.32106018066406, "eval_loss": 0.3722394108772278, "eval_rewards/accuracies": 0.800000011920929, "eval_rewards/chosen": 0.11038414388895035, "eval_rewards/margins": 1.3834389448165894, "eval_rewards/rejected": -1.2730547189712524, "eval_runtime": 2.5418, "eval_samples_per_second": 1.967, "eval_steps_per_second": 0.393, "step": 1400 }, { "epoch": 0.06, "learning_rate": 2.82e-06, "logits/chosen": -2.8250699043273926, "logits/rejected": -2.8292105197906494, "logps/chosen": -3.3454136848449707, "logps/rejected": -188.0550537109375, "loss": 0.1782, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.28207311034202576, "rewards/margins": 1.8114128112792969, "rewards/rejected": -1.5293397903442383, "step": 1410 }, { "epoch": 0.06, "learning_rate": 2.84e-06, "logits/chosen": -2.8599843978881836, "logits/rejected": -2.8643951416015625, "logps/chosen": -1.7943531274795532, "logps/rejected": -195.1711883544922, "loss": 0.15, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.29915890097618103, "rewards/margins": 1.8945789337158203, "rewards/rejected": -1.5954201221466064, "step": 1420 }, { "epoch": 0.06, "learning_rate": 2.86e-06, "logits/chosen": -2.8301732540130615, "logits/rejected": -2.8373732566833496, "logps/chosen": -4.135129451751709, "logps/rejected": -195.9446258544922, "loss": 0.1685, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2753157615661621, "rewards/margins": 1.8801805973052979, "rewards/rejected": -1.6048648357391357, "step": 1430 }, { "epoch": 0.06, "learning_rate": 2.88e-06, "logits/chosen": -2.8175315856933594, "logits/rejected": -2.8267691135406494, "logps/chosen": -2.8621408939361572, "logps/rejected": -192.6162567138672, "loss": 0.1625, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2872825562953949, "rewards/margins": 1.859283208847046, "rewards/rejected": -1.5720007419586182, "step": 1440 }, { "epoch": 0.06, "learning_rate": 2.9e-06, "logits/chosen": -2.8273637294769287, "logits/rejected": -2.8358919620513916, "logps/chosen": -0.3048400580883026, "logps/rejected": -202.20689392089844, "loss": 0.1307, "rewards/accuracies": 1.0, "rewards/chosen": 0.31350868940353394, "rewards/margins": 1.9808746576309204, "rewards/rejected": -1.6673657894134521, "step": 1450 }, { "epoch": 0.06, "learning_rate": 2.92e-06, "logits/chosen": -2.860515594482422, "logits/rejected": -2.866631269454956, "logps/chosen": -2.5549349784851074, "logps/rejected": -198.80551147460938, "loss": 0.152, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2920229732990265, "rewards/margins": 1.92401123046875, "rewards/rejected": -1.6319881677627563, "step": 1460 }, { "epoch": 0.06, "learning_rate": 2.9400000000000002e-06, "logits/chosen": -2.776981830596924, "logits/rejected": -2.791102409362793, "logps/chosen": -5.3455963134765625, "logps/rejected": -191.94699096679688, "loss": 0.1873, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2623938024044037, "rewards/margins": 1.8318376541137695, "rewards/rejected": -1.569443941116333, "step": 1470 }, { "epoch": 0.06, "learning_rate": 2.96e-06, "logits/chosen": -2.8262991905212402, "logits/rejected": -2.8339614868164062, "logps/chosen": -3.2720131874084473, "logps/rejected": -196.62173461914062, "loss": 0.1592, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.284067302942276, "rewards/margins": 1.8991466760635376, "rewards/rejected": -1.6150792837142944, "step": 1480 }, { "epoch": 0.06, "learning_rate": 2.9800000000000003e-06, "logits/chosen": -2.8241467475891113, "logits/rejected": -2.832089900970459, "logps/chosen": -1.0166598558425903, "logps/rejected": -200.533447265625, "loss": 0.1362, "rewards/accuracies": 1.0, "rewards/chosen": 0.3059515953063965, "rewards/margins": 1.9558141231536865, "rewards/rejected": -1.6498628854751587, "step": 1490 }, { "epoch": 0.06, "learning_rate": 3e-06, "logits/chosen": -2.8403725624084473, "logits/rejected": -2.8501338958740234, "logps/chosen": -2.9792163372039795, "logps/rejected": -202.36837768554688, "loss": 0.1525, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28829652070999146, "rewards/margins": 1.9555822610855103, "rewards/rejected": -1.667285680770874, "step": 1500 }, { "epoch": 0.06, "eval_logits/chosen": -2.8890914916992188, "eval_logits/rejected": -2.896177053451538, "eval_logps/chosen": -17.76885986328125, "eval_logps/rejected": -169.07681274414062, "eval_loss": 0.34497103095054626, "eval_rewards/accuracies": 0.800000011920929, "eval_rewards/chosen": 0.1389109492301941, "eval_rewards/margins": 1.469523310661316, "eval_rewards/rejected": -1.330612301826477, "eval_runtime": 2.5395, "eval_samples_per_second": 1.969, "eval_steps_per_second": 0.394, "step": 1500 }, { "epoch": 0.06, "learning_rate": 3.0200000000000003e-06, "logits/chosen": -2.863537549972534, "logits/rejected": -2.870637893676758, "logps/chosen": -3.795405149459839, "logps/rejected": -201.4073486328125, "loss": 0.1591, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.28018996119499207, "rewards/margins": 1.9394581317901611, "rewards/rejected": -1.6592683792114258, "step": 1510 }, { "epoch": 0.06, "learning_rate": 3.04e-06, "logits/chosen": -2.8756237030029297, "logits/rejected": -2.8831980228424072, "logps/chosen": -2.104443073272705, "logps/rejected": -202.64254760742188, "loss": 0.1473, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2981029152870178, "rewards/margins": 1.9653863906860352, "rewards/rejected": -1.6672834157943726, "step": 1520 }, { "epoch": 0.06, "learning_rate": 3.0600000000000003e-06, "logits/chosen": -2.8476932048797607, "logits/rejected": -2.8542919158935547, "logps/chosen": -4.021399974822998, "logps/rejected": -196.18971252441406, "loss": 0.1717, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2776428163051605, "rewards/margins": 1.8829669952392578, "rewards/rejected": -1.605324149131775, "step": 1530 }, { "epoch": 0.06, "learning_rate": 3.08e-06, "logits/chosen": -2.847120761871338, "logits/rejected": -2.855746030807495, "logps/chosen": -0.4984745383262634, "logps/rejected": -208.36026000976562, "loss": 0.1235, "rewards/accuracies": 1.0, "rewards/chosen": 0.3116949498653412, "rewards/margins": 2.0420479774475098, "rewards/rejected": -1.7303529977798462, "step": 1540 }, { "epoch": 0.06, "learning_rate": 3.1000000000000004e-06, "logits/chosen": -2.870846748352051, "logits/rejected": -2.875617504119873, "logps/chosen": -3.8666415214538574, "logps/rejected": -201.18795776367188, "loss": 0.1539, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2798447012901306, "rewards/margins": 1.935583472251892, "rewards/rejected": -1.6557388305664062, "step": 1550 }, { "epoch": 0.06, "learning_rate": 3.12e-06, "logits/chosen": -2.863752841949463, "logits/rejected": -2.870763063430786, "logps/chosen": -0.39195558428764343, "logps/rejected": -211.9021453857422, "loss": 0.1189, "rewards/accuracies": 1.0, "rewards/chosen": 0.3156086206436157, "rewards/margins": 2.0767180919647217, "rewards/rejected": -1.7611099481582642, "step": 1560 }, { "epoch": 0.06, "learning_rate": 3.1400000000000004e-06, "logits/chosen": -2.86383318901062, "logits/rejected": -2.871152877807617, "logps/chosen": -2.046583414077759, "logps/rejected": -207.2179718017578, "loss": 0.1362, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.29753416776657104, "rewards/margins": 2.0144786834716797, "rewards/rejected": -1.7169443368911743, "step": 1570 }, { "epoch": 0.06, "learning_rate": 3.1600000000000002e-06, "logits/chosen": -2.8474373817443848, "logits/rejected": -2.854494333267212, "logps/chosen": -9.483784675598145, "logps/rejected": -200.8838348388672, "loss": 0.2023, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.22285044193267822, "rewards/margins": 1.8738820552825928, "rewards/rejected": -1.6510318517684937, "step": 1580 }, { "epoch": 0.06, "learning_rate": 3.1800000000000005e-06, "logits/chosen": -2.8351259231567383, "logits/rejected": -2.840198040008545, "logps/chosen": -5.651516914367676, "logps/rejected": -206.65597534179688, "loss": 0.1605, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.26093393564224243, "rewards/margins": 1.9744606018066406, "rewards/rejected": -1.713526725769043, "step": 1590 }, { "epoch": 0.06, "learning_rate": 3.2000000000000003e-06, "logits/chosen": -2.859328508377075, "logits/rejected": -2.86586332321167, "logps/chosen": -4.510318279266357, "logps/rejected": -201.73916625976562, "loss": 0.1622, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.27104508876800537, "rewards/margins": 1.9365192651748657, "rewards/rejected": -1.6654741764068604, "step": 1600 }, { "epoch": 0.06, "eval_logits/chosen": -2.899118185043335, "eval_logits/rejected": -2.9042744636535645, "eval_logps/chosen": -20.701135635375977, "eval_logps/rejected": -175.1273193359375, "eval_loss": 0.35563692450523376, "eval_rewards/accuracies": 0.800000011920929, "eval_rewards/chosen": 0.10958818346261978, "eval_rewards/margins": 1.5007054805755615, "eval_rewards/rejected": -1.3911174535751343, "eval_runtime": 2.5521, "eval_samples_per_second": 1.959, "eval_steps_per_second": 0.392, "step": 1600 }, { "epoch": 0.06, "learning_rate": 3.2200000000000005e-06, "logits/chosen": -2.8798177242279053, "logits/rejected": -2.884556770324707, "logps/chosen": -2.367927074432373, "logps/rejected": -206.21316528320312, "loss": 0.1342, "rewards/accuracies": 1.0, "rewards/chosen": 0.29192954301834106, "rewards/margins": 2.000114917755127, "rewards/rejected": -1.7081854343414307, "step": 1610 }, { "epoch": 0.06, "learning_rate": 3.2400000000000003e-06, "logits/chosen": -2.8411731719970703, "logits/rejected": -2.8492112159729004, "logps/chosen": -3.0539586544036865, "logps/rejected": -205.5394287109375, "loss": 0.1455, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2874152958393097, "rewards/margins": 1.9886928796768188, "rewards/rejected": -1.701277732849121, "step": 1620 }, { "epoch": 0.07, "learning_rate": 3.2600000000000006e-06, "logits/chosen": -2.849393367767334, "logits/rejected": -2.8578455448150635, "logps/chosen": -4.8343610763549805, "logps/rejected": -207.52188110351562, "loss": 0.1655, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.2691221833229065, "rewards/margins": 1.98992919921875, "rewards/rejected": -1.7208068370819092, "step": 1630 }, { "epoch": 0.07, "learning_rate": 3.2800000000000004e-06, "logits/chosen": -2.8483803272247314, "logits/rejected": -2.856879711151123, "logps/chosen": -6.139628887176514, "logps/rejected": -207.5807342529297, "loss": 0.1724, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.25666120648384094, "rewards/margins": 1.9780679941177368, "rewards/rejected": -1.7214069366455078, "step": 1640 }, { "epoch": 0.07, "learning_rate": 3.3000000000000006e-06, "logits/chosen": -2.8512606620788574, "logits/rejected": -2.8609201908111572, "logps/chosen": -4.567070960998535, "logps/rejected": -205.49087524414062, "loss": 0.1642, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.27059099078178406, "rewards/margins": 1.972476601600647, "rewards/rejected": -1.7018855810165405, "step": 1650 }, { "epoch": 0.07, "learning_rate": 3.3200000000000004e-06, "logits/chosen": -2.8613815307617188, "logits/rejected": -2.868727207183838, "logps/chosen": -0.9582642316818237, "logps/rejected": -214.0738067626953, "loss": 0.1192, "rewards/accuracies": 1.0, "rewards/chosen": 0.3060641586780548, "rewards/margins": 2.090634822845459, "rewards/rejected": -1.7845706939697266, "step": 1660 }, { "epoch": 0.07, "learning_rate": 3.3400000000000006e-06, "logits/chosen": -2.848097324371338, "logits/rejected": -2.854762554168701, "logps/chosen": -2.1159961223602295, "logps/rejected": -214.03121948242188, "loss": 0.1215, "rewards/accuracies": 1.0, "rewards/chosen": 0.29638728499412537, "rewards/margins": 2.081578493118286, "rewards/rejected": -1.785191297531128, "step": 1670 }, { "epoch": 0.07, "learning_rate": 3.3600000000000004e-06, "logits/chosen": -2.869337558746338, "logits/rejected": -2.875823974609375, "logps/chosen": -1.273223638534546, "logps/rejected": -211.5435791015625, "loss": 0.1288, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.30279064178466797, "rewards/margins": 2.0637240409851074, "rewards/rejected": -1.7609336376190186, "step": 1680 }, { "epoch": 0.07, "learning_rate": 3.3800000000000007e-06, "logits/chosen": -2.858736515045166, "logits/rejected": -2.86680006980896, "logps/chosen": -0.8977063894271851, "logps/rejected": -214.53555297851562, "loss": 0.121, "rewards/accuracies": 1.0, "rewards/chosen": 0.3070111870765686, "rewards/margins": 2.0976758003234863, "rewards/rejected": -1.7906646728515625, "step": 1690 }, { "epoch": 0.07, "learning_rate": 3.4000000000000005e-06, "logits/chosen": -2.850975275039673, "logits/rejected": -2.859386920928955, "logps/chosen": -2.213099718093872, "logps/rejected": -215.11245727539062, "loss": 0.1242, "rewards/accuracies": 1.0, "rewards/chosen": 0.2970926761627197, "rewards/margins": 2.088529586791992, "rewards/rejected": -1.7914369106292725, "step": 1700 }, { "epoch": 0.07, "eval_logits/chosen": -2.9063706398010254, "eval_logits/rejected": -2.911102771759033, "eval_logps/chosen": -24.389156341552734, "eval_logps/rejected": -179.65121459960938, "eval_loss": 0.37798169255256653, "eval_rewards/accuracies": 0.800000011920929, "eval_rewards/chosen": 0.07270797342061996, "eval_rewards/margins": 1.5090643167495728, "eval_rewards/rejected": -1.4363564252853394, "eval_runtime": 2.5371, "eval_samples_per_second": 1.971, "eval_steps_per_second": 0.394, "step": 1700 }, { "epoch": 0.07, "learning_rate": 3.4200000000000007e-06, "logits/chosen": -2.867418050765991, "logits/rejected": -2.8739476203918457, "logps/chosen": -3.0625133514404297, "logps/rejected": -211.71688842773438, "loss": 0.1419, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.28410372138023376, "rewards/margins": 2.0507187843322754, "rewards/rejected": -1.7666149139404297, "step": 1710 }, { "epoch": 0.07, "learning_rate": 3.44e-06, "logits/chosen": -2.847611904144287, "logits/rejected": -2.855407476425171, "logps/chosen": -2.963460922241211, "logps/rejected": -207.51309204101562, "loss": 0.1482, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.28835606575012207, "rewards/margins": 2.0079569816589355, "rewards/rejected": -1.7196009159088135, "step": 1720 }, { "epoch": 0.07, "learning_rate": 3.46e-06, "logits/chosen": -2.854668378829956, "logits/rejected": -2.862818956375122, "logps/chosen": -3.0509796142578125, "logps/rejected": -214.7109832763672, "loss": 0.1228, "rewards/accuracies": 1.0, "rewards/chosen": 0.28438299894332886, "rewards/margins": 2.0796380043029785, "rewards/rejected": -1.7952549457550049, "step": 1730 }, { "epoch": 0.07, "learning_rate": 3.48e-06, "logits/chosen": -2.8818135261535645, "logits/rejected": -2.8902316093444824, "logps/chosen": -2.682547092437744, "logps/rejected": -214.7310333251953, "loss": 0.1291, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28948450088500977, "rewards/margins": 2.0820248126983643, "rewards/rejected": -1.792540192604065, "step": 1740 }, { "epoch": 0.07, "learning_rate": 3.5e-06, "logits/chosen": -2.8280014991760254, "logits/rejected": -2.835878372192383, "logps/chosen": -1.4479225873947144, "logps/rejected": -213.6439666748047, "loss": 0.1271, "rewards/accuracies": 1.0, "rewards/chosen": 0.3007112145423889, "rewards/margins": 2.0829734802246094, "rewards/rejected": -1.7822622060775757, "step": 1750 }, { "epoch": 0.07, "learning_rate": 3.52e-06, "logits/chosen": -2.8295793533325195, "logits/rejected": -2.8357646465301514, "logps/chosen": -1.749804139137268, "logps/rejected": -217.343994140625, "loss": 0.1222, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.30233973264694214, "rewards/margins": 2.1161789894104004, "rewards/rejected": -1.8138395547866821, "step": 1760 }, { "epoch": 0.07, "learning_rate": 3.54e-06, "logits/chosen": -2.8770627975463867, "logits/rejected": -2.8821632862091064, "logps/chosen": -7.243563175201416, "logps/rejected": -207.7464599609375, "loss": 0.1728, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2440401315689087, "rewards/margins": 1.9668314456939697, "rewards/rejected": -1.722791075706482, "step": 1770 }, { "epoch": 0.07, "learning_rate": 3.5600000000000002e-06, "logits/chosen": -2.8530025482177734, "logits/rejected": -2.85933256149292, "logps/chosen": -2.3582863807678223, "logps/rejected": -220.5322265625, "loss": 0.1153, "rewards/accuracies": 1.0, "rewards/chosen": 0.2938287854194641, "rewards/margins": 2.1419811248779297, "rewards/rejected": -1.8481525182724, "step": 1780 }, { "epoch": 0.07, "learning_rate": 3.58e-06, "logits/chosen": -2.8612074851989746, "logits/rejected": -2.866806745529175, "logps/chosen": -3.3916893005371094, "logps/rejected": -216.95230102539062, "loss": 0.1228, "rewards/accuracies": 1.0, "rewards/chosen": 0.2816562056541443, "rewards/margins": 2.100107192993164, "rewards/rejected": -1.818450689315796, "step": 1790 }, { "epoch": 0.07, "learning_rate": 3.6000000000000003e-06, "logits/chosen": -2.8782477378845215, "logits/rejected": -2.884580135345459, "logps/chosen": -2.2281510829925537, "logps/rejected": -220.4797821044922, "loss": 0.1232, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2938682436943054, "rewards/margins": 2.1457133293151855, "rewards/rejected": -1.8518450260162354, "step": 1800 }, { "epoch": 0.07, "eval_logits/chosen": -2.9152421951293945, "eval_logits/rejected": -2.9201042652130127, "eval_logps/chosen": -23.91687774658203, "eval_logps/rejected": -184.58616638183594, "eval_loss": 0.3636830747127533, "eval_rewards/accuracies": 0.800000011920929, "eval_rewards/chosen": 0.07743076235055923, "eval_rewards/margins": 1.5631368160247803, "eval_rewards/rejected": -1.485706090927124, "eval_runtime": 2.5418, "eval_samples_per_second": 1.967, "eval_steps_per_second": 0.393, "step": 1800 }, { "epoch": 0.07, "learning_rate": 3.62e-06, "logits/chosen": -2.8677568435668945, "logits/rejected": -2.8761191368103027, "logps/chosen": -1.290468454360962, "logps/rejected": -222.1938018798828, "loss": 0.1112, "rewards/accuracies": 1.0, "rewards/chosen": 0.3032197058200836, "rewards/margins": 2.172358751296997, "rewards/rejected": -1.8691389560699463, "step": 1810 }, { "epoch": 0.07, "learning_rate": 3.6400000000000003e-06, "logits/chosen": -2.8662495613098145, "logits/rejected": -2.8751721382141113, "logps/chosen": -3.802825927734375, "logps/rejected": -219.2920684814453, "loss": 0.1339, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2778162956237793, "rewards/margins": 2.119009017944336, "rewards/rejected": -1.841192603111267, "step": 1820 }, { "epoch": 0.07, "learning_rate": 3.66e-06, "logits/chosen": -2.9144134521484375, "logits/rejected": -2.9220311641693115, "logps/chosen": -2.315789222717285, "logps/rejected": -222.5942840576172, "loss": 0.1201, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2950668931007385, "rewards/margins": 2.1636054515838623, "rewards/rejected": -1.8685386180877686, "step": 1830 }, { "epoch": 0.07, "learning_rate": 3.6800000000000003e-06, "logits/chosen": -2.853952169418335, "logits/rejected": -2.8633639812469482, "logps/chosen": -3.7141947746276855, "logps/rejected": -222.67080688476562, "loss": 0.1361, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.28083768486976624, "rewards/margins": 2.1522507667541504, "rewards/rejected": -1.871413230895996, "step": 1840 }, { "epoch": 0.07, "learning_rate": 3.7e-06, "logits/chosen": -2.8829073905944824, "logits/rejected": -2.891287326812744, "logps/chosen": -1.0284494161605835, "logps/rejected": -220.7974395751953, "loss": 0.1154, "rewards/accuracies": 1.0, "rewards/chosen": 0.30580803751945496, "rewards/margins": 2.160071849822998, "rewards/rejected": -1.8542636632919312, "step": 1850 }, { "epoch": 0.07, "learning_rate": 3.7200000000000004e-06, "logits/chosen": -2.8927836418151855, "logits/rejected": -2.902541399002075, "logps/chosen": -2.880946397781372, "logps/rejected": -227.48095703125, "loss": 0.1087, "rewards/accuracies": 1.0, "rewards/chosen": 0.2911922335624695, "rewards/margins": 2.2060909271240234, "rewards/rejected": -1.9148986339569092, "step": 1860 }, { "epoch": 0.07, "learning_rate": 3.74e-06, "logits/chosen": -2.8417210578918457, "logits/rejected": -2.853752374649048, "logps/chosen": -3.2475948333740234, "logps/rejected": -221.7072296142578, "loss": 0.1323, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2817545533180237, "rewards/margins": 2.1481945514678955, "rewards/rejected": -1.8664400577545166, "step": 1870 }, { "epoch": 0.08, "learning_rate": 3.7600000000000004e-06, "logits/chosen": -2.8565447330474854, "logits/rejected": -2.86478590965271, "logps/chosen": -1.2450348138809204, "logps/rejected": -222.84878540039062, "loss": 0.1123, "rewards/accuracies": 1.0, "rewards/chosen": 0.30260351300239563, "rewards/margins": 2.174870014190674, "rewards/rejected": -1.872266411781311, "step": 1880 }, { "epoch": 0.08, "learning_rate": 3.7800000000000002e-06, "logits/chosen": -2.8765995502471924, "logits/rejected": -2.883763551712036, "logps/chosen": -9.237138748168945, "logps/rejected": -225.90011596679688, "loss": 0.1503, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2266441136598587, "rewards/margins": 2.1274874210357666, "rewards/rejected": -1.9008432626724243, "step": 1890 }, { "epoch": 0.08, "learning_rate": 3.8000000000000005e-06, "logits/chosen": -2.8834590911865234, "logits/rejected": -2.890244722366333, "logps/chosen": -3.6419155597686768, "logps/rejected": -225.4192352294922, "loss": 0.1141, "rewards/accuracies": 1.0, "rewards/chosen": 0.2809681296348572, "rewards/margins": 2.1792688369750977, "rewards/rejected": -1.8983008861541748, "step": 1900 }, { "epoch": 0.08, "eval_logits/chosen": -2.9199979305267334, "eval_logits/rejected": -2.9257216453552246, "eval_logps/chosen": -21.39789390563965, "eval_logps/rejected": -192.25894165039062, "eval_loss": 0.32317715883255005, "eval_rewards/accuracies": 0.800000011920929, "eval_rewards/chosen": 0.10262060165405273, "eval_rewards/margins": 1.6650543212890625, "eval_rewards/rejected": -1.5624336004257202, "eval_runtime": 2.5341, "eval_samples_per_second": 1.973, "eval_steps_per_second": 0.395, "step": 1900 }, { "epoch": 0.08, "learning_rate": 3.820000000000001e-06, "logits/chosen": -2.894005060195923, "logits/rejected": -2.9009058475494385, "logps/chosen": -1.4935048818588257, "logps/rejected": -231.05996704101562, "loss": 0.1007, "rewards/accuracies": 1.0, "rewards/chosen": 0.30105850100517273, "rewards/margins": 2.2583539485931396, "rewards/rejected": -1.9572956562042236, "step": 1910 }, { "epoch": 0.08, "learning_rate": 3.8400000000000005e-06, "logits/chosen": -2.881826162338257, "logits/rejected": -2.8897719383239746, "logps/chosen": -1.5685993432998657, "logps/rejected": -229.76339721679688, "loss": 0.1029, "rewards/accuracies": 1.0, "rewards/chosen": 0.29886841773986816, "rewards/margins": 2.2463154792785645, "rewards/rejected": -1.9474470615386963, "step": 1920 }, { "epoch": 0.08, "learning_rate": 3.86e-06, "logits/chosen": -2.876983404159546, "logits/rejected": -2.884234666824341, "logps/chosen": -7.178792476654053, "logps/rejected": -226.2073211669922, "loss": 0.142, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24563045799732208, "rewards/margins": 2.1572327613830566, "rewards/rejected": -1.9116026163101196, "step": 1930 }, { "epoch": 0.08, "learning_rate": 3.88e-06, "logits/chosen": -2.882366180419922, "logits/rejected": -2.8899776935577393, "logps/chosen": -4.249680042266846, "logps/rejected": -227.25473022460938, "loss": 0.1309, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27726733684539795, "rewards/margins": 2.190783977508545, "rewards/rejected": -1.9135169982910156, "step": 1940 }, { "epoch": 0.08, "learning_rate": 3.900000000000001e-06, "logits/chosen": -2.8690695762634277, "logits/rejected": -2.87953519821167, "logps/chosen": -3.0256283283233643, "logps/rejected": -226.8081512451172, "loss": 0.1235, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28469210863113403, "rewards/margins": 2.2018849849700928, "rewards/rejected": -1.917192816734314, "step": 1950 }, { "epoch": 0.08, "learning_rate": 3.920000000000001e-06, "logits/chosen": -2.888272523880005, "logits/rejected": -2.8984262943267822, "logps/chosen": -1.3259893655776978, "logps/rejected": -232.4541778564453, "loss": 0.1004, "rewards/accuracies": 1.0, "rewards/chosen": 0.30396440625190735, "rewards/margins": 2.276197910308838, "rewards/rejected": -1.972233772277832, "step": 1960 }, { "epoch": 0.08, "learning_rate": 3.94e-06, "logits/chosen": -2.888093948364258, "logits/rejected": -2.8968138694763184, "logps/chosen": -4.289365768432617, "logps/rejected": -230.21377563476562, "loss": 0.1349, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.27365022897720337, "rewards/margins": 2.2241361141204834, "rewards/rejected": -1.9504858255386353, "step": 1970 }, { "epoch": 0.08, "learning_rate": 3.96e-06, "logits/chosen": -2.8857922554016113, "logits/rejected": -2.89497447013855, "logps/chosen": -11.628125190734863, "logps/rejected": -225.2532196044922, "loss": 0.2027, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.20207861065864563, "rewards/margins": 2.0981321334838867, "rewards/rejected": -1.8960535526275635, "step": 1980 }, { "epoch": 0.08, "learning_rate": 3.980000000000001e-06, "logits/chosen": -2.8838119506835938, "logits/rejected": -2.8925416469573975, "logps/chosen": -3.7772343158721924, "logps/rejected": -232.6337890625, "loss": 0.1256, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2811170518398285, "rewards/margins": 2.2492101192474365, "rewards/rejected": -1.968092918395996, "step": 1990 }, { "epoch": 0.08, "learning_rate": 4.000000000000001e-06, "logits/chosen": -2.880845785140991, "logits/rejected": -2.8873775005340576, "logps/chosen": -7.36428165435791, "logps/rejected": -228.8857879638672, "loss": 0.1525, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24489319324493408, "rewards/margins": 2.1794159412384033, "rewards/rejected": -1.9345228672027588, "step": 2000 }, { "epoch": 0.08, "eval_logits/chosen": -2.9304699897766113, "eval_logits/rejected": -2.935450792312622, "eval_logps/chosen": -22.478303909301758, "eval_logps/rejected": -200.29678344726562, "eval_loss": 0.3076796233654022, "eval_rewards/accuracies": 0.800000011920929, "eval_rewards/chosen": 0.0918165072798729, "eval_rewards/margins": 1.734628677368164, "eval_rewards/rejected": -1.6428120136260986, "eval_runtime": 2.5479, "eval_samples_per_second": 1.962, "eval_steps_per_second": 0.392, "step": 2000 }, { "epoch": 0.08, "learning_rate": 4.0200000000000005e-06, "logits/chosen": -2.880174160003662, "logits/rejected": -2.8894295692443848, "logps/chosen": -2.799675464630127, "logps/rejected": -234.4752960205078, "loss": 0.1033, "rewards/accuracies": 1.0, "rewards/chosen": 0.2883533239364624, "rewards/margins": 2.279087543487549, "rewards/rejected": -1.9907344579696655, "step": 2010 }, { "epoch": 0.08, "learning_rate": 4.04e-06, "logits/chosen": -2.8881449699401855, "logits/rejected": -2.8955368995666504, "logps/chosen": -5.0290327072143555, "logps/rejected": -226.7039031982422, "loss": 0.1394, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.26569539308547974, "rewards/margins": 2.1781840324401855, "rewards/rejected": -1.912488341331482, "step": 2020 }, { "epoch": 0.08, "learning_rate": 4.060000000000001e-06, "logits/chosen": -2.8666679859161377, "logits/rejected": -2.876887798309326, "logps/chosen": -1.3502347469329834, "logps/rejected": -235.25448608398438, "loss": 0.0988, "rewards/accuracies": 1.0, "rewards/chosen": 0.3035372197628021, "rewards/margins": 2.302825927734375, "rewards/rejected": -1.999288558959961, "step": 2030 }, { "epoch": 0.08, "learning_rate": 4.08e-06, "logits/chosen": -2.867825508117676, "logits/rejected": -2.879314422607422, "logps/chosen": -3.9436841011047363, "logps/rejected": -236.670654296875, "loss": 0.1184, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2758646309375763, "rewards/margins": 2.2873973846435547, "rewards/rejected": -2.0115325450897217, "step": 2040 }, { "epoch": 0.08, "learning_rate": 4.1e-06, "logits/chosen": -2.87078595161438, "logits/rejected": -2.8817501068115234, "logps/chosen": -3.4344470500946045, "logps/rejected": -233.98092651367188, "loss": 0.1239, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2821028530597687, "rewards/margins": 2.2659332752227783, "rewards/rejected": -1.9838306903839111, "step": 2050 }, { "epoch": 0.08, "learning_rate": 4.12e-06, "logits/chosen": -2.849569797515869, "logits/rejected": -2.8600449562072754, "logps/chosen": -3.570810317993164, "logps/rejected": -238.5326385498047, "loss": 0.1179, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28117647767066956, "rewards/margins": 2.3152737617492676, "rewards/rejected": -2.0340969562530518, "step": 2060 }, { "epoch": 0.08, "learning_rate": 4.14e-06, "logits/chosen": -2.9009947776794434, "logits/rejected": -2.906818151473999, "logps/chosen": -1.116943120956421, "logps/rejected": -240.27908325195312, "loss": 0.093, "rewards/accuracies": 1.0, "rewards/chosen": 0.30614161491394043, "rewards/margins": 2.3509178161621094, "rewards/rejected": -2.044776439666748, "step": 2070 }, { "epoch": 0.08, "learning_rate": 4.16e-06, "logits/chosen": -2.902418851852417, "logits/rejected": -2.908914089202881, "logps/chosen": -4.712584018707275, "logps/rejected": -234.48764038085938, "loss": 0.1217, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.27223798632621765, "rewards/margins": 2.2589657306671143, "rewards/rejected": -1.9867279529571533, "step": 2080 }, { "epoch": 0.08, "learning_rate": 4.18e-06, "logits/chosen": -2.9091451168060303, "logits/rejected": -2.914649486541748, "logps/chosen": -3.6657042503356934, "logps/rejected": -240.7332000732422, "loss": 0.1186, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2821826636791229, "rewards/margins": 2.331425905227661, "rewards/rejected": -2.0492427349090576, "step": 2090 }, { "epoch": 0.08, "learning_rate": 4.2000000000000004e-06, "logits/chosen": -2.8996682167053223, "logits/rejected": -2.9054317474365234, "logps/chosen": -3.6533889770507812, "logps/rejected": -236.3352508544922, "loss": 0.1236, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2790566086769104, "rewards/margins": 2.290548801422119, "rewards/rejected": -2.0114920139312744, "step": 2100 }, { "epoch": 0.08, "eval_logits/chosen": -2.9440362453460693, "eval_logits/rejected": -2.9431889057159424, "eval_logps/chosen": -31.002208709716797, "eval_logps/rejected": -201.67376708984375, "eval_loss": 0.40238356590270996, "eval_rewards/accuracies": 0.800000011920929, "eval_rewards/chosen": 0.006577420048415661, "eval_rewards/margins": 1.6631596088409424, "eval_rewards/rejected": -1.6565821170806885, "eval_runtime": 2.5403, "eval_samples_per_second": 1.968, "eval_steps_per_second": 0.394, "step": 2100 }, { "epoch": 0.08, "learning_rate": 4.22e-06, "logits/chosen": -2.9114327430725098, "logits/rejected": -2.9168343544006348, "logps/chosen": -4.802510738372803, "logps/rejected": -236.4150390625, "loss": 0.1336, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2674771845340729, "rewards/margins": 2.2790274620056152, "rewards/rejected": -2.011550188064575, "step": 2110 }, { "epoch": 0.08, "learning_rate": 4.24e-06, "logits/chosen": -2.9085607528686523, "logits/rejected": -2.9127986431121826, "logps/chosen": -4.17403507232666, "logps/rejected": -230.22866821289062, "loss": 0.1402, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.2755618393421173, "rewards/margins": 2.2190515995025635, "rewards/rejected": -1.943489670753479, "step": 2120 }, { "epoch": 0.09, "learning_rate": 4.26e-06, "logits/chosen": -2.9107890129089355, "logits/rejected": -2.9154105186462402, "logps/chosen": -3.96148419380188, "logps/rejected": -230.8666229248047, "loss": 0.1344, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2768270969390869, "rewards/margins": 2.229801893234253, "rewards/rejected": -1.9529749155044556, "step": 2130 }, { "epoch": 0.09, "learning_rate": 4.2800000000000005e-06, "logits/chosen": -2.9179463386535645, "logits/rejected": -2.928199291229248, "logps/chosen": -1.8572899103164673, "logps/rejected": -239.9588165283203, "loss": 0.0963, "rewards/accuracies": 1.0, "rewards/chosen": 0.2992614805698395, "rewards/margins": 2.3422892093658447, "rewards/rejected": -2.043027639389038, "step": 2140 }, { "epoch": 0.09, "learning_rate": 4.3e-06, "logits/chosen": -2.855104684829712, "logits/rejected": -2.873234272003174, "logps/chosen": -0.35500583052635193, "logps/rejected": -245.635498046875, "loss": 0.0867, "rewards/accuracies": 1.0, "rewards/chosen": 0.3124508857727051, "rewards/margins": 2.413022994995117, "rewards/rejected": -2.100572109222412, "step": 2150 }, { "epoch": 0.09, "learning_rate": 4.32e-06, "logits/chosen": -2.898282051086426, "logits/rejected": -2.912907361984253, "logps/chosen": -1.9608014822006226, "logps/rejected": -241.63613891601562, "loss": 0.1083, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2975894510746002, "rewards/margins": 2.3602213859558105, "rewards/rejected": -2.0626323223114014, "step": 2160 }, { "epoch": 0.09, "learning_rate": 4.34e-06, "logits/chosen": -2.858966827392578, "logits/rejected": -2.871777057647705, "logps/chosen": -8.59228229522705, "logps/rejected": -229.00942993164062, "loss": 0.1846, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.2308976948261261, "rewards/margins": 2.1687228679656982, "rewards/rejected": -1.9378252029418945, "step": 2170 }, { "epoch": 0.09, "learning_rate": 4.360000000000001e-06, "logits/chosen": -2.8531863689422607, "logits/rejected": -2.8695454597473145, "logps/chosen": -0.7690185308456421, "logps/rejected": -245.15829467773438, "loss": 0.0954, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.3097197413444519, "rewards/margins": 2.404114246368408, "rewards/rejected": -2.0943944454193115, "step": 2180 }, { "epoch": 0.09, "learning_rate": 4.38e-06, "logits/chosen": -2.917773962020874, "logits/rejected": -2.92891001701355, "logps/chosen": -0.9318639636039734, "logps/rejected": -243.00784301757812, "loss": 0.0973, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.30899152159690857, "rewards/margins": 2.3817927837371826, "rewards/rejected": -2.072801113128662, "step": 2190 }, { "epoch": 0.09, "learning_rate": 4.4e-06, "logits/chosen": -2.910006046295166, "logits/rejected": -2.922790765762329, "logps/chosen": -6.459696292877197, "logps/rejected": -244.05142211914062, "loss": 0.1467, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.25305670499801636, "rewards/margins": 2.3401906490325928, "rewards/rejected": -2.0871341228485107, "step": 2200 }, { "epoch": 0.09, "eval_logits/chosen": -2.942652463912964, "eval_logits/rejected": -2.9484615325927734, "eval_logps/chosen": -24.46652603149414, "eval_logps/rejected": -209.4658966064453, "eval_loss": 0.32989227771759033, "eval_rewards/accuracies": 0.800000011920929, "eval_rewards/chosen": 0.0719342976808548, "eval_rewards/margins": 1.8064377307891846, "eval_rewards/rejected": -1.734503149986267, "eval_runtime": 2.5424, "eval_samples_per_second": 1.967, "eval_steps_per_second": 0.393, "step": 2200 }, { "epoch": 0.09, "learning_rate": 4.42e-06, "logits/chosen": -2.8855478763580322, "logits/rejected": -2.899013042449951, "logps/chosen": -0.6904338598251343, "logps/rejected": -244.23190307617188, "loss": 0.0933, "rewards/accuracies": 1.0, "rewards/chosen": 0.31311023235321045, "rewards/margins": 2.3980441093444824, "rewards/rejected": -2.0849339962005615, "step": 2210 }, { "epoch": 0.09, "learning_rate": 4.440000000000001e-06, "logits/chosen": -2.914344072341919, "logits/rejected": -2.9275410175323486, "logps/chosen": -0.5179017782211304, "logps/rejected": -250.8892059326172, "loss": 0.0834, "rewards/accuracies": 1.0, "rewards/chosen": 0.31062552332878113, "rewards/margins": 2.465390205383301, "rewards/rejected": -2.1547646522521973, "step": 2220 }, { "epoch": 0.09, "learning_rate": 4.4600000000000005e-06, "logits/chosen": -2.879368543624878, "logits/rejected": -2.8904190063476562, "logps/chosen": -1.1397054195404053, "logps/rejected": -242.33224487304688, "loss": 0.1014, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.3038514256477356, "rewards/margins": 2.373854875564575, "rewards/rejected": -2.070003032684326, "step": 2230 }, { "epoch": 0.09, "learning_rate": 4.48e-06, "logits/chosen": -2.9214510917663574, "logits/rejected": -2.929487466812134, "logps/chosen": -3.5173182487487793, "logps/rejected": -248.56405639648438, "loss": 0.114, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28234440088272095, "rewards/margins": 2.41058349609375, "rewards/rejected": -2.128239154815674, "step": 2240 }, { "epoch": 0.09, "learning_rate": 4.5e-06, "logits/chosen": -2.8900792598724365, "logits/rejected": -2.900667428970337, "logps/chosen": -3.028165578842163, "logps/rejected": -250.20932006835938, "loss": 0.0948, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2885078191757202, "rewards/margins": 2.434756278991699, "rewards/rejected": -2.1462483406066895, "step": 2250 }, { "epoch": 0.09, "learning_rate": 4.520000000000001e-06, "logits/chosen": -2.8774497509002686, "logits/rejected": -2.8897578716278076, "logps/chosen": -2.6005990505218506, "logps/rejected": -250.6619110107422, "loss": 0.1048, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2898808419704437, "rewards/margins": 2.4438042640686035, "rewards/rejected": -2.153923511505127, "step": 2260 }, { "epoch": 0.09, "learning_rate": 4.540000000000001e-06, "logits/chosen": -2.898271083831787, "logits/rejected": -2.9105048179626465, "logps/chosen": -2.155888080596924, "logps/rejected": -251.6062469482422, "loss": 0.0989, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.295857310295105, "rewards/margins": 2.4556992053985596, "rewards/rejected": -2.159841775894165, "step": 2270 }, { "epoch": 0.09, "learning_rate": 4.56e-06, "logits/chosen": -2.9002315998077393, "logits/rejected": -2.9119772911071777, "logps/chosen": -0.30956435203552246, "logps/rejected": -256.9794921875, "loss": 0.078, "rewards/accuracies": 1.0, "rewards/chosen": 0.31390053033828735, "rewards/margins": 2.527012348175049, "rewards/rejected": -2.2131118774414062, "step": 2280 }, { "epoch": 0.09, "learning_rate": 4.58e-06, "logits/chosen": -2.901566743850708, "logits/rejected": -2.9124112129211426, "logps/chosen": -3.679454803466797, "logps/rejected": -252.70150756835938, "loss": 0.1119, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2824423015117645, "rewards/margins": 2.452200412750244, "rewards/rejected": -2.1697583198547363, "step": 2290 }, { "epoch": 0.09, "learning_rate": 4.600000000000001e-06, "logits/chosen": -2.9068350791931152, "logits/rejected": -2.9200117588043213, "logps/chosen": -6.174842357635498, "logps/rejected": -248.494873046875, "loss": 0.1314, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.25505223870277405, "rewards/margins": 2.3836076259613037, "rewards/rejected": -2.1285555362701416, "step": 2300 }, { "epoch": 0.09, "eval_logits/chosen": -2.9549360275268555, "eval_logits/rejected": -2.960585594177246, "eval_logps/chosen": -27.424602508544922, "eval_logps/rejected": -216.0565643310547, "eval_loss": 0.3408864736557007, "eval_rewards/accuracies": 0.800000011920929, "eval_rewards/chosen": 0.04235351085662842, "eval_rewards/margins": 1.8427636623382568, "eval_rewards/rejected": -1.8004100322723389, "eval_runtime": 2.5369, "eval_samples_per_second": 1.971, "eval_steps_per_second": 0.394, "step": 2300 }, { "epoch": 0.09, "learning_rate": 4.620000000000001e-06, "logits/chosen": -2.9099338054656982, "logits/rejected": -2.923081159591675, "logps/chosen": -0.4179440438747406, "logps/rejected": -259.6936950683594, "loss": 0.0758, "rewards/accuracies": 1.0, "rewards/chosen": 0.31431493163108826, "rewards/margins": 2.5529282093048096, "rewards/rejected": -2.2386131286621094, "step": 2310 }, { "epoch": 0.09, "learning_rate": 4.6400000000000005e-06, "logits/chosen": -2.9152426719665527, "logits/rejected": -2.9273438453674316, "logps/chosen": -0.7640389204025269, "logps/rejected": -251.2838897705078, "loss": 0.0853, "rewards/accuracies": 1.0, "rewards/chosen": 0.3088214099407196, "rewards/margins": 2.4645071029663086, "rewards/rejected": -2.1556856632232666, "step": 2320 }, { "epoch": 0.09, "learning_rate": 4.66e-06, "logits/chosen": -2.894893169403076, "logits/rejected": -2.9052560329437256, "logps/chosen": -4.123711585998535, "logps/rejected": -254.2692108154297, "loss": 0.1113, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2774103283882141, "rewards/margins": 2.46560001373291, "rewards/rejected": -2.188189744949341, "step": 2330 }, { "epoch": 0.09, "learning_rate": 4.680000000000001e-06, "logits/chosen": -2.907576084136963, "logits/rejected": -2.919492483139038, "logps/chosen": -3.2175357341766357, "logps/rejected": -257.1834411621094, "loss": 0.1057, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2856447994709015, "rewards/margins": 2.5024991035461426, "rewards/rejected": -2.2168545722961426, "step": 2340 }, { "epoch": 0.09, "learning_rate": 4.7e-06, "logits/chosen": -2.900947093963623, "logits/rejected": -2.913073778152466, "logps/chosen": -6.7078118324279785, "logps/rejected": -250.6683807373047, "loss": 0.1393, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2506968379020691, "rewards/margins": 2.4076366424560547, "rewards/rejected": -2.156939744949341, "step": 2350 }, { "epoch": 0.09, "learning_rate": 4.7200000000000005e-06, "logits/chosen": -2.89741849899292, "logits/rejected": -2.9070355892181396, "logps/chosen": -9.704840660095215, "logps/rejected": -250.95474243164062, "loss": 0.1702, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.22177815437316895, "rewards/margins": 2.3750011920928955, "rewards/rejected": -2.1532225608825684, "step": 2360 }, { "epoch": 0.09, "learning_rate": 4.74e-06, "logits/chosen": -2.9074714183807373, "logits/rejected": -2.9198410511016846, "logps/chosen": -3.5288662910461426, "logps/rejected": -256.94854736328125, "loss": 0.1086, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2817321717739105, "rewards/margins": 2.4986720085144043, "rewards/rejected": -2.216939926147461, "step": 2370 }, { "epoch": 0.1, "learning_rate": 4.76e-06, "logits/chosen": -2.875518321990967, "logits/rejected": -2.8905136585235596, "logps/chosen": -0.26971444487571716, "logps/rejected": -259.9581298828125, "loss": 0.0766, "rewards/accuracies": 1.0, "rewards/chosen": 0.31088802218437195, "rewards/margins": 2.554607629776001, "rewards/rejected": -2.2437195777893066, "step": 2380 }, { "epoch": 0.1, "learning_rate": 4.78e-06, "logits/chosen": -2.9354164600372314, "logits/rejected": -2.9443180561065674, "logps/chosen": -0.9426226615905762, "logps/rejected": -259.5198669433594, "loss": 0.0778, "rewards/accuracies": 1.0, "rewards/chosen": 0.3081773817539215, "rewards/margins": 2.547635078430176, "rewards/rejected": -2.239457607269287, "step": 2390 }, { "epoch": 0.1, "learning_rate": 4.800000000000001e-06, "logits/chosen": -2.933061361312866, "logits/rejected": -2.944437026977539, "logps/chosen": -0.9381445646286011, "logps/rejected": -258.87408447265625, "loss": 0.0827, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.3058001399040222, "rewards/margins": 2.5452492237091064, "rewards/rejected": -2.2394492626190186, "step": 2400 }, { "epoch": 0.1, "eval_logits/chosen": -2.9642539024353027, "eval_logits/rejected": -2.9690146446228027, "eval_logps/chosen": -28.865020751953125, "eval_logps/rejected": -219.72817993164062, "eval_loss": 0.3525654077529907, "eval_rewards/accuracies": 0.800000011920929, "eval_rewards/chosen": 0.02794935740530491, "eval_rewards/margins": 1.8650753498077393, "eval_rewards/rejected": -1.8371257781982422, "eval_runtime": 2.5392, "eval_samples_per_second": 1.969, "eval_steps_per_second": 0.394, "step": 2400 }, { "epoch": 0.1, "learning_rate": 4.8200000000000004e-06, "logits/chosen": -2.930541515350342, "logits/rejected": -2.9411559104919434, "logps/chosen": -3.1964287757873535, "logps/rejected": -255.1358184814453, "loss": 0.0917, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28316301107406616, "rewards/margins": 2.48319411277771, "rewards/rejected": -2.200031042098999, "step": 2410 }, { "epoch": 0.1, "learning_rate": 4.84e-06, "logits/chosen": -2.922476053237915, "logits/rejected": -2.936741352081299, "logps/chosen": -0.4417743682861328, "logps/rejected": -266.0160217285156, "loss": 0.071, "rewards/accuracies": 1.0, "rewards/chosen": 0.3114129900932312, "rewards/margins": 2.6157948970794678, "rewards/rejected": -2.304381847381592, "step": 2420 }, { "epoch": 0.1, "learning_rate": 4.86e-06, "logits/chosen": -2.9151620864868164, "logits/rejected": -2.9280247688293457, "logps/chosen": -8.3615140914917, "logps/rejected": -251.51171875, "loss": 0.1506, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.23205485939979553, "rewards/margins": 2.39455246925354, "rewards/rejected": -2.1624975204467773, "step": 2430 }, { "epoch": 0.1, "learning_rate": 4.880000000000001e-06, "logits/chosen": -2.9263792037963867, "logits/rejected": -2.9378440380096436, "logps/chosen": -1.7975679636001587, "logps/rejected": -256.6875915527344, "loss": 0.0894, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2985270619392395, "rewards/margins": 2.5087318420410156, "rewards/rejected": -2.210204839706421, "step": 2440 }, { "epoch": 0.1, "learning_rate": 4.9000000000000005e-06, "logits/chosen": -2.916597366333008, "logits/rejected": -2.9286980628967285, "logps/chosen": -10.32237720489502, "logps/rejected": -256.8524169921875, "loss": 0.1691, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.21249043941497803, "rewards/margins": 2.432523012161255, "rewards/rejected": -2.2200324535369873, "step": 2450 }, { "epoch": 0.1, "learning_rate": 4.92e-06, "logits/chosen": -2.9199366569519043, "logits/rejected": -2.9309046268463135, "logps/chosen": -8.556720733642578, "logps/rejected": -243.2650604248047, "loss": 0.1729, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.23059014976024628, "rewards/margins": 2.3118815422058105, "rewards/rejected": -2.081291675567627, "step": 2460 }, { "epoch": 0.1, "learning_rate": 4.94e-06, "logits/chosen": -2.935798168182373, "logits/rejected": -2.9477298259735107, "logps/chosen": -3.15940260887146, "logps/rejected": -259.3836975097656, "loss": 0.105, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2848740220069885, "rewards/margins": 2.5266075134277344, "rewards/rejected": -2.2417335510253906, "step": 2470 }, { "epoch": 0.1, "learning_rate": 4.960000000000001e-06, "logits/chosen": -2.9360594749450684, "logits/rejected": -2.9439454078674316, "logps/chosen": -2.6440885066986084, "logps/rejected": -259.58563232421875, "loss": 0.0975, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2908412218093872, "rewards/margins": 2.5330803394317627, "rewards/rejected": -2.242238998413086, "step": 2480 }, { "epoch": 0.1, "learning_rate": 4.980000000000001e-06, "logits/chosen": -2.893969774246216, "logits/rejected": -2.9010419845581055, "logps/chosen": -3.0619242191314697, "logps/rejected": -258.99749755859375, "loss": 0.0915, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28488820791244507, "rewards/margins": 2.5238099098205566, "rewards/rejected": -2.238921642303467, "step": 2490 }, { "epoch": 0.1, "learning_rate": 5e-06, "logits/chosen": -2.920306444168091, "logits/rejected": -2.926865339279175, "logps/chosen": -6.128819465637207, "logps/rejected": -256.3418884277344, "loss": 0.132, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.25361496210098267, "rewards/margins": 2.4651331901550293, "rewards/rejected": -2.2115180492401123, "step": 2500 }, { "epoch": 0.1, "eval_logits/chosen": -2.9774951934814453, "eval_logits/rejected": -2.9840028285980225, "eval_logps/chosen": -15.314695358276367, "eval_logps/rejected": -230.94912719726562, "eval_loss": 0.20295581221580505, "eval_rewards/accuracies": 0.800000011920929, "eval_rewards/chosen": 0.16345258057117462, "eval_rewards/margins": 2.112787961959839, "eval_rewards/rejected": -1.9493356943130493, "eval_runtime": 2.5464, "eval_samples_per_second": 1.964, "eval_steps_per_second": 0.393, "step": 2500 }, { "epoch": 0.1, "learning_rate": 4.999997563061038e-06, "logits/chosen": -2.931420087814331, "logits/rejected": -2.942343235015869, "logps/chosen": -4.019248962402344, "logps/rejected": -258.52606201171875, "loss": 0.1068, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.27781176567077637, "rewards/margins": 2.508692502975464, "rewards/rejected": -2.2308807373046875, "step": 2510 }, { "epoch": 0.1, "learning_rate": 4.999990252248902e-06, "logits/chosen": -2.9369263648986816, "logits/rejected": -2.949777126312256, "logps/chosen": -5.556292533874512, "logps/rejected": -259.5150146484375, "loss": 0.1223, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2618443965911865, "rewards/margins": 2.5028815269470215, "rewards/rejected": -2.241036891937256, "step": 2520 }, { "epoch": 0.1, "learning_rate": 4.999978067577844e-06, "logits/chosen": -2.9553375244140625, "logits/rejected": -2.9664769172668457, "logps/chosen": -6.690278053283691, "logps/rejected": -262.77862548828125, "loss": 0.1343, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.25276094675064087, "rewards/margins": 2.524426221847534, "rewards/rejected": -2.271665096282959, "step": 2530 }, { "epoch": 0.1, "learning_rate": 4.999961009071621e-06, "logits/chosen": -2.90276837348938, "logits/rejected": -2.9128100872039795, "logps/chosen": -2.8904833793640137, "logps/rejected": -262.2496337890625, "loss": 0.1001, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28965675830841064, "rewards/margins": 2.555020332336426, "rewards/rejected": -2.2653632164001465, "step": 2540 }, { "epoch": 0.1, "learning_rate": 4.999939076763487e-06, "logits/chosen": -2.9190592765808105, "logits/rejected": -2.9320576190948486, "logps/chosen": -1.011108160018921, "logps/rejected": -265.8113708496094, "loss": 0.0736, "rewards/accuracies": 1.0, "rewards/chosen": 0.3051157593727112, "rewards/margins": 2.610159397125244, "rewards/rejected": -2.3050436973571777, "step": 2550 }, { "epoch": 0.1, "learning_rate": 4.999912270696202e-06, "logits/chosen": -2.90120267868042, "logits/rejected": -2.9128077030181885, "logps/chosen": -3.130305528640747, "logps/rejected": -261.26593017578125, "loss": 0.1029, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2844390273094177, "rewards/margins": 2.543980121612549, "rewards/rejected": -2.2595407962799072, "step": 2560 }, { "epoch": 0.1, "learning_rate": 4.999880590922025e-06, "logits/chosen": -2.89697265625, "logits/rejected": -2.9100916385650635, "logps/chosen": -0.6354149580001831, "logps/rejected": -267.5892028808594, "loss": 0.0712, "rewards/accuracies": 1.0, "rewards/chosen": 0.31009534001350403, "rewards/margins": 2.6312198638916016, "rewards/rejected": -2.32112455368042, "step": 2570 }, { "epoch": 0.1, "learning_rate": 4.999844037502717e-06, "logits/chosen": -2.9104385375976562, "logits/rejected": -2.9229159355163574, "logps/chosen": -0.8323253393173218, "logps/rejected": -262.0885009765625, "loss": 0.083, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.3101959824562073, "rewards/margins": 2.571075916290283, "rewards/rejected": -2.2608799934387207, "step": 2580 }, { "epoch": 0.1, "learning_rate": 4.999802610509541e-06, "logits/chosen": -2.9474949836730957, "logits/rejected": -2.959007740020752, "logps/chosen": -3.585397243499756, "logps/rejected": -259.90423583984375, "loss": 0.1083, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2829609811306, "rewards/margins": 2.5275044441223145, "rewards/rejected": -2.2445435523986816, "step": 2590 }, { "epoch": 0.1, "learning_rate": 4.999756310023261e-06, "logits/chosen": -2.9085545539855957, "logits/rejected": -2.9205751419067383, "logps/chosen": -1.9085439443588257, "logps/rejected": -264.78570556640625, "loss": 0.0851, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.29713019728660583, "rewards/margins": 2.5920023918151855, "rewards/rejected": -2.2948720455169678, "step": 2600 }, { "epoch": 0.1, "eval_logits/chosen": -2.982832670211792, "eval_logits/rejected": -2.9916560649871826, "eval_logps/chosen": -2.6289947032928467, "eval_logps/rejected": -227.37228393554688, "eval_loss": 0.16876302659511566, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.2903096079826355, "eval_rewards/margins": 2.2038769721984863, "eval_rewards/rejected": -1.9135675430297852, "eval_runtime": 2.5525, "eval_samples_per_second": 1.959, "eval_steps_per_second": 0.392, "step": 2600 }, { "epoch": 0.1, "learning_rate": 4.999705136134143e-06, "logits/chosen": -2.936011791229248, "logits/rejected": -2.945693254470825, "logps/chosen": -2.083970546722412, "logps/rejected": -266.7441101074219, "loss": 0.0902, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2943379282951355, "rewards/margins": 2.611631393432617, "rewards/rejected": -2.317293405532837, "step": 2610 }, { "epoch": 0.1, "learning_rate": 4.999649088941951e-06, "logits/chosen": -2.9587647914886475, "logits/rejected": -2.970644474029541, "logps/chosen": -3.445868968963623, "logps/rejected": -261.3812255859375, "loss": 0.1074, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2826182246208191, "rewards/margins": 2.5397777557373047, "rewards/rejected": -2.2571589946746826, "step": 2620 }, { "epoch": 0.11, "learning_rate": 4.999588168555954e-06, "logits/chosen": -2.935114860534668, "logits/rejected": -2.945122241973877, "logps/chosen": -4.4407243728637695, "logps/rejected": -260.07171630859375, "loss": 0.1078, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.27256685495376587, "rewards/margins": 2.5173683166503906, "rewards/rejected": -2.2448012828826904, "step": 2630 }, { "epoch": 0.11, "learning_rate": 4.99952237509492e-06, "logits/chosen": -2.924142837524414, "logits/rejected": -2.9340734481811523, "logps/chosen": -1.8130245208740234, "logps/rejected": -269.7516174316406, "loss": 0.0711, "rewards/accuracies": 1.0, "rewards/chosen": 0.2991049885749817, "rewards/margins": 2.646573066711426, "rewards/rejected": -2.347468376159668, "step": 2640 }, { "epoch": 0.11, "learning_rate": 4.999451708687114e-06, "logits/chosen": -2.9283523559570312, "logits/rejected": -2.93810772895813, "logps/chosen": -17.27212905883789, "logps/rejected": -251.00045776367188, "loss": 0.2388, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 0.14252153038978577, "rewards/margins": 2.3024232387542725, "rewards/rejected": -2.1599018573760986, "step": 2650 }, { "epoch": 0.11, "learning_rate": 4.999376169470306e-06, "logits/chosen": -2.943747043609619, "logits/rejected": -2.9547934532165527, "logps/chosen": -0.11216460168361664, "logps/rejected": -267.51287841796875, "loss": 0.0708, "rewards/accuracies": 1.0, "rewards/chosen": 0.3138822913169861, "rewards/margins": 2.638270378112793, "rewards/rejected": -2.324388265609741, "step": 2660 }, { "epoch": 0.11, "learning_rate": 4.999295757591762e-06, "logits/chosen": -2.9261958599090576, "logits/rejected": -2.9366745948791504, "logps/chosen": -0.551460325717926, "logps/rejected": -263.65411376953125, "loss": 0.0758, "rewards/accuracies": 1.0, "rewards/chosen": 0.31261593103408813, "rewards/margins": 2.5933632850646973, "rewards/rejected": -2.280747175216675, "step": 2670 }, { "epoch": 0.11, "learning_rate": 4.99921047320825e-06, "logits/chosen": -2.9270405769348145, "logits/rejected": -2.93703556060791, "logps/chosen": -8.807626724243164, "logps/rejected": -261.7132263183594, "loss": 0.1497, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.22682873904705048, "rewards/margins": 2.4943718910217285, "rewards/rejected": -2.267543315887451, "step": 2680 }, { "epoch": 0.11, "learning_rate": 4.9991203164860365e-06, "logits/chosen": -2.92804217338562, "logits/rejected": -2.939551591873169, "logps/chosen": -10.753358840942383, "logps/rejected": -253.6354217529297, "loss": 0.179, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.2124747484922409, "rewards/margins": 2.3888869285583496, "rewards/rejected": -2.1764121055603027, "step": 2690 }, { "epoch": 0.11, "learning_rate": 4.999025287600886e-06, "logits/chosen": -2.9464733600616455, "logits/rejected": -2.957101821899414, "logps/chosen": -8.260920524597168, "logps/rejected": -255.9329071044922, "loss": 0.1549, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.23642060160636902, "rewards/margins": 2.436845302581787, "rewards/rejected": -2.2004246711730957, "step": 2700 }, { "epoch": 0.11, "eval_logits/chosen": -2.9883482456207275, "eval_logits/rejected": -2.9974241256713867, "eval_logps/chosen": -3.7234089374542236, "eval_logps/rejected": -228.5042724609375, "eval_loss": 0.17531220614910126, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.2793654501438141, "eval_rewards/margins": 2.2042524814605713, "eval_rewards/rejected": -1.92488694190979, "eval_runtime": 2.5459, "eval_samples_per_second": 1.964, "eval_steps_per_second": 0.393, "step": 2700 }, { "epoch": 0.11, "learning_rate": 4.998925386738063e-06, "logits/chosen": -2.9557876586914062, "logits/rejected": -2.9700448513031006, "logps/chosen": -1.079310417175293, "logps/rejected": -266.4962463378906, "loss": 0.0816, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.3053280711174011, "rewards/margins": 2.612246036529541, "rewards/rejected": -2.306917905807495, "step": 2710 }, { "epoch": 0.11, "learning_rate": 4.998820614092328e-06, "logits/chosen": -2.9825539588928223, "logits/rejected": -2.996671438217163, "logps/chosen": -2.0379600524902344, "logps/rejected": -262.11639404296875, "loss": 0.0961, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.29784300923347473, "rewards/margins": 2.561117649078369, "rewards/rejected": -2.263274669647217, "step": 2720 }, { "epoch": 0.11, "learning_rate": 4.998710969867942e-06, "logits/chosen": -2.9530506134033203, "logits/rejected": -2.9670920372009277, "logps/chosen": -2.818333864212036, "logps/rejected": -264.53497314453125, "loss": 0.0981, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2910110354423523, "rewards/margins": 2.5784239768981934, "rewards/rejected": -2.2874131202697754, "step": 2730 }, { "epoch": 0.11, "learning_rate": 4.998596454278661e-06, "logits/chosen": -2.9239227771759033, "logits/rejected": -2.9375576972961426, "logps/chosen": -8.601677894592285, "logps/rejected": -255.7655487060547, "loss": 0.1473, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.23032689094543457, "rewards/margins": 2.4356980323791504, "rewards/rejected": -2.205371379852295, "step": 2740 }, { "epoch": 0.11, "learning_rate": 4.99847706754774e-06, "logits/chosen": -2.9380738735198975, "logits/rejected": -2.9509501457214355, "logps/chosen": -1.807461142539978, "logps/rejected": -270.5403137207031, "loss": 0.0701, "rewards/accuracies": 1.0, "rewards/chosen": 0.298919141292572, "rewards/margins": 2.649597644805908, "rewards/rejected": -2.3506784439086914, "step": 2750 }, { "epoch": 0.11, "learning_rate": 4.998352809907928e-06, "logits/chosen": -2.929011821746826, "logits/rejected": -2.9453961849212646, "logps/chosen": -7.600255489349365, "logps/rejected": -261.4618225097656, "loss": 0.1367, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.2410949170589447, "rewards/margins": 2.503282070159912, "rewards/rejected": -2.2621874809265137, "step": 2760 }, { "epoch": 0.11, "learning_rate": 4.9982236816014735e-06, "logits/chosen": -2.9435577392578125, "logits/rejected": -2.9599549770355225, "logps/chosen": -2.6569409370422363, "logps/rejected": -258.93267822265625, "loss": 0.0912, "rewards/accuracies": 1.0, "rewards/chosen": 0.2924373149871826, "rewards/margins": 2.528324604034424, "rewards/rejected": -2.2358877658843994, "step": 2770 }, { "epoch": 0.11, "learning_rate": 4.998089682880117e-06, "logits/chosen": -2.9482903480529785, "logits/rejected": -2.959702730178833, "logps/chosen": -0.9188389778137207, "logps/rejected": -267.1348571777344, "loss": 0.0754, "rewards/accuracies": 1.0, "rewards/chosen": 0.30619946122169495, "rewards/margins": 2.6256141662597656, "rewards/rejected": -2.3194148540496826, "step": 2780 }, { "epoch": 0.11, "learning_rate": 4.997950814005098e-06, "logits/chosen": -2.9309134483337402, "logits/rejected": -2.9453907012939453, "logps/chosen": -3.307797908782959, "logps/rejected": -270.1351318359375, "loss": 0.0788, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28362271189689636, "rewards/margins": 2.6312460899353027, "rewards/rejected": -2.347623348236084, "step": 2790 }, { "epoch": 0.11, "learning_rate": 4.997807075247147e-06, "logits/chosen": -2.918440103530884, "logits/rejected": -2.936917781829834, "logps/chosen": -2.3056654930114746, "logps/rejected": -271.310302734375, "loss": 0.0822, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2924153506755829, "rewards/margins": 2.651768207550049, "rewards/rejected": -2.3593530654907227, "step": 2800 }, { "epoch": 0.11, "eval_logits/chosen": -2.9947869777679443, "eval_logits/rejected": -3.0047881603240967, "eval_logps/chosen": -12.332693099975586, "eval_logps/rejected": -229.33737182617188, "eval_loss": 0.22239509224891663, "eval_rewards/accuracies": 0.800000011920929, "eval_rewards/chosen": 0.19327259063720703, "eval_rewards/margins": 2.126490831375122, "eval_rewards/rejected": -1.933218240737915, "eval_runtime": 2.5386, "eval_samples_per_second": 1.97, "eval_steps_per_second": 0.394, "step": 2800 }, { "epoch": 0.11, "learning_rate": 4.997658466886489e-06, "logits/chosen": -2.9303290843963623, "logits/rejected": -2.9483509063720703, "logps/chosen": -3.8485920429229736, "logps/rejected": -263.7492980957031, "loss": 0.1065, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2781464159488678, "rewards/margins": 2.5598902702331543, "rewards/rejected": -2.2817435264587402, "step": 2810 }, { "epoch": 0.11, "learning_rate": 4.997504989212846e-06, "logits/chosen": -2.9323229789733887, "logits/rejected": -2.9504528045654297, "logps/chosen": -8.75068473815918, "logps/rejected": -259.11676025390625, "loss": 0.1581, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.22879405319690704, "rewards/margins": 2.4686357975006104, "rewards/rejected": -2.2398416996002197, "step": 2820 }, { "epoch": 0.11, "learning_rate": 4.997346642525429e-06, "logits/chosen": -2.933093309402466, "logits/rejected": -2.949470043182373, "logps/chosen": -1.0979712009429932, "logps/rejected": -269.8417663574219, "loss": 0.072, "rewards/accuracies": 1.0, "rewards/chosen": 0.30372458696365356, "rewards/margins": 2.6482279300689697, "rewards/rejected": -2.344503402709961, "step": 2830 }, { "epoch": 0.11, "learning_rate": 4.997183427132943e-06, "logits/chosen": -2.975076675415039, "logits/rejected": -2.9893410205841064, "logps/chosen": -2.3973422050476074, "logps/rejected": -268.19708251953125, "loss": 0.09, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.29400795698165894, "rewards/margins": 2.6217880249023438, "rewards/rejected": -2.32778000831604, "step": 2840 }, { "epoch": 0.11, "learning_rate": 4.9970153433535855e-06, "logits/chosen": -2.929691791534424, "logits/rejected": -2.946018695831299, "logps/chosen": -0.8474863767623901, "logps/rejected": -271.335693359375, "loss": 0.0692, "rewards/accuracies": 1.0, "rewards/chosen": 0.3078172504901886, "rewards/margins": 2.665538787841797, "rewards/rejected": -2.3577218055725098, "step": 2850 }, { "epoch": 0.11, "learning_rate": 4.996842391515045e-06, "logits/chosen": -2.9057459831237793, "logits/rejected": -2.924971342086792, "logps/chosen": -3.9829726219177246, "logps/rejected": -269.81866455078125, "loss": 0.0929, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2774185240268707, "rewards/margins": 2.620413303375244, "rewards/rejected": -2.3429949283599854, "step": 2860 }, { "epoch": 0.11, "learning_rate": 4.996664571954497e-06, "logits/chosen": -2.924872398376465, "logits/rejected": -2.944304943084717, "logps/chosen": -1.297390341758728, "logps/rejected": -266.0436096191406, "loss": 0.0869, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.30283477902412415, "rewards/margins": 2.608426332473755, "rewards/rejected": -2.305591583251953, "step": 2870 }, { "epoch": 0.12, "learning_rate": 4.996481885018613e-06, "logits/chosen": -2.93523907661438, "logits/rejected": -2.9543473720550537, "logps/chosen": -0.2802279591560364, "logps/rejected": -274.71221923828125, "loss": 0.0657, "rewards/accuracies": 1.0, "rewards/chosen": 0.31615376472473145, "rewards/margins": 2.705115556716919, "rewards/rejected": -2.3889615535736084, "step": 2880 }, { "epoch": 0.12, "learning_rate": 4.99629433106355e-06, "logits/chosen": -2.9099326133728027, "logits/rejected": -2.9301187992095947, "logps/chosen": -3.066620349884033, "logps/rejected": -264.38299560546875, "loss": 0.0943, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2856389582157135, "rewards/margins": 2.5741052627563477, "rewards/rejected": -2.288466215133667, "step": 2890 }, { "epoch": 0.12, "learning_rate": 4.996101910454953e-06, "logits/chosen": -2.9378252029418945, "logits/rejected": -2.9566726684570312, "logps/chosen": -2.199982166290283, "logps/rejected": -269.46044921875, "loss": 0.0862, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.29553502798080444, "rewards/margins": 2.6332554817199707, "rewards/rejected": -2.3377203941345215, "step": 2900 }, { "epoch": 0.12, "eval_logits/chosen": -2.9997313022613525, "eval_logits/rejected": -3.0074756145477295, "eval_logps/chosen": -9.748120307922363, "eval_logps/rejected": -230.3155059814453, "eval_loss": 0.2088640034198761, "eval_rewards/accuracies": 0.800000011920929, "eval_rewards/chosen": 0.21911832690238953, "eval_rewards/margins": 2.1621179580688477, "eval_rewards/rejected": -1.9429994821548462, "eval_runtime": 2.5398, "eval_samples_per_second": 1.969, "eval_steps_per_second": 0.394, "step": 2900 }, { "epoch": 0.12, "learning_rate": 4.995904623567956e-06, "logits/chosen": -2.9312145709991455, "logits/rejected": -2.9481277465820312, "logps/chosen": -3.5801608562469482, "logps/rejected": -272.2450866699219, "loss": 0.0994, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28452882170677185, "rewards/margins": 2.6526331901550293, "rewards/rejected": -2.3681044578552246, "step": 2910 }, { "epoch": 0.12, "learning_rate": 4.99570247078718e-06, "logits/chosen": -2.932469129562378, "logits/rejected": -2.9480414390563965, "logps/chosen": -9.139230728149414, "logps/rejected": -265.5406494140625, "loss": 0.1553, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.22564582526683807, "rewards/margins": 2.5279226303100586, "rewards/rejected": -2.302276611328125, "step": 2920 }, { "epoch": 0.12, "learning_rate": 4.995495452506733e-06, "logits/chosen": -2.964989423751831, "logits/rejected": -2.9798190593719482, "logps/chosen": -0.1591329127550125, "logps/rejected": -273.202392578125, "loss": 0.0676, "rewards/accuracies": 1.0, "rewards/chosen": 0.3130447268486023, "rewards/margins": 2.6881723403930664, "rewards/rejected": -2.3751275539398193, "step": 2930 }, { "epoch": 0.12, "learning_rate": 4.995283569130207e-06, "logits/chosen": -2.953829526901245, "logits/rejected": -2.9684512615203857, "logps/chosen": -5.145502090454102, "logps/rejected": -267.84967041015625, "loss": 0.1152, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2645096182823181, "rewards/margins": 2.58903431892395, "rewards/rejected": -2.3245246410369873, "step": 2940 }, { "epoch": 0.12, "learning_rate": 4.9950668210706795e-06, "logits/chosen": -2.958111524581909, "logits/rejected": -2.977039098739624, "logps/chosen": -3.6317760944366455, "logps/rejected": -268.91680908203125, "loss": 0.1021, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27986711263656616, "rewards/margins": 2.6198172569274902, "rewards/rejected": -2.3399498462677, "step": 2950 }, { "epoch": 0.12, "learning_rate": 4.9948452087507114e-06, "logits/chosen": -2.9722390174865723, "logits/rejected": -2.9894328117370605, "logps/chosen": -4.185362815856934, "logps/rejected": -264.9282531738281, "loss": 0.1124, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.27568769454956055, "rewards/margins": 2.5699996948242188, "rewards/rejected": -2.294312000274658, "step": 2960 }, { "epoch": 0.12, "learning_rate": 4.994618732602349e-06, "logits/chosen": -2.9579081535339355, "logits/rejected": -2.975651979446411, "logps/chosen": -0.834877610206604, "logps/rejected": -273.98687744140625, "loss": 0.069, "rewards/accuracies": 1.0, "rewards/chosen": 0.31072917580604553, "rewards/margins": 2.6930480003356934, "rewards/rejected": -2.3823189735412598, "step": 2970 }, { "epoch": 0.12, "learning_rate": 4.9943873930671175e-06, "logits/chosen": -2.953207015991211, "logits/rejected": -2.969761610031128, "logps/chosen": -5.444631099700928, "logps/rejected": -271.6313171386719, "loss": 0.1079, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2642351984977722, "rewards/margins": 2.6254377365112305, "rewards/rejected": -2.3612027168273926, "step": 2980 }, { "epoch": 0.12, "learning_rate": 4.994151190596025e-06, "logits/chosen": -2.939422607421875, "logits/rejected": -2.956604242324829, "logps/chosen": -0.5892351269721985, "logps/rejected": -272.9173889160156, "loss": 0.0682, "rewards/accuracies": 1.0, "rewards/chosen": 0.3119525909423828, "rewards/margins": 2.6897451877593994, "rewards/rejected": -2.3777928352355957, "step": 2990 }, { "epoch": 0.12, "learning_rate": 4.993910125649561e-06, "logits/chosen": -2.956782102584839, "logits/rejected": -2.97273325920105, "logps/chosen": -1.2262976169586182, "logps/rejected": -272.7721862792969, "loss": 0.0702, "rewards/accuracies": 1.0, "rewards/chosen": 0.30023688077926636, "rewards/margins": 2.6746017932891846, "rewards/rejected": -2.3743646144866943, "step": 3000 }, { "epoch": 0.12, "eval_logits/chosen": -3.00478458404541, "eval_logits/rejected": -3.0130059719085693, "eval_logps/chosen": -21.330486297607422, "eval_logps/rejected": -232.3101806640625, "eval_loss": 0.273472398519516, "eval_rewards/accuracies": 0.800000011920929, "eval_rewards/chosen": 0.10329467058181763, "eval_rewards/margins": 2.0662410259246826, "eval_rewards/rejected": -1.9629461765289307, "eval_runtime": 2.5404, "eval_samples_per_second": 1.968, "eval_steps_per_second": 0.394, "step": 3000 }, { "epoch": 0.12, "learning_rate": 4.993664198697694e-06, "logits/chosen": -2.9864158630371094, "logits/rejected": -3.0010952949523926, "logps/chosen": -8.116315841674805, "logps/rejected": -265.9324951171875, "loss": 0.1472, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.2348651885986328, "rewards/margins": 2.5436851978302, "rewards/rejected": -2.3088200092315674, "step": 3010 }, { "epoch": 0.12, "learning_rate": 4.993413410219872e-06, "logits/chosen": -2.947815418243408, "logits/rejected": -2.9655261039733887, "logps/chosen": -6.833758354187012, "logps/rejected": -270.7105407714844, "loss": 0.1293, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2472001612186432, "rewards/margins": 2.6041948795318604, "rewards/rejected": -2.35699462890625, "step": 3020 }, { "epoch": 0.12, "learning_rate": 4.993157760705018e-06, "logits/chosen": -2.9514195919036865, "logits/rejected": -2.9720423221588135, "logps/chosen": -0.18666866421699524, "logps/rejected": -270.39300537109375, "loss": 0.0708, "rewards/accuracies": 1.0, "rewards/chosen": 0.3131788372993469, "rewards/margins": 2.662532329559326, "rewards/rejected": -2.349353551864624, "step": 3030 }, { "epoch": 0.12, "learning_rate": 4.992897250651535e-06, "logits/chosen": -2.962658405303955, "logits/rejected": -2.9811949729919434, "logps/chosen": -0.056405920535326004, "logps/rejected": -276.2437438964844, "loss": 0.0649, "rewards/accuracies": 1.0, "rewards/chosen": 0.31692516803741455, "rewards/margins": 2.720716714859009, "rewards/rejected": -2.403791666030884, "step": 3040 }, { "epoch": 0.12, "learning_rate": 4.992631880567301e-06, "logits/chosen": -2.9683456420898438, "logits/rejected": -2.982821226119995, "logps/chosen": -6.694758415222168, "logps/rejected": -266.7032775878906, "loss": 0.1327, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24962279200553894, "rewards/margins": 2.564990520477295, "rewards/rejected": -2.3153676986694336, "step": 3050 }, { "epoch": 0.12, "learning_rate": 4.992361650969668e-06, "logits/chosen": -2.9305052757263184, "logits/rejected": -2.94580340385437, "logps/chosen": -0.4554418623447418, "logps/rejected": -274.55535888671875, "loss": 0.0665, "rewards/accuracies": 1.0, "rewards/chosen": 0.31094008684158325, "rewards/margins": 2.703557014465332, "rewards/rejected": -2.3926172256469727, "step": 3060 }, { "epoch": 0.12, "learning_rate": 4.992086562385462e-06, "logits/chosen": -2.962387800216675, "logits/rejected": -2.97969126701355, "logps/chosen": -4.890730381011963, "logps/rejected": -266.93536376953125, "loss": 0.1012, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2670697569847107, "rewards/margins": 2.5857291221618652, "rewards/rejected": -2.318659543991089, "step": 3070 }, { "epoch": 0.12, "learning_rate": 4.9918066153509835e-06, "logits/chosen": -2.9641566276550293, "logits/rejected": -2.980417490005493, "logps/chosen": -3.983004331588745, "logps/rejected": -263.8308410644531, "loss": 0.1051, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2755580544471741, "rewards/margins": 2.5621650218963623, "rewards/rejected": -2.286607027053833, "step": 3080 }, { "epoch": 0.12, "learning_rate": 4.9915218104120024e-06, "logits/chosen": -2.9254164695739746, "logits/rejected": -2.9469523429870605, "logps/chosen": -2.478398561477661, "logps/rejected": -270.51849365234375, "loss": 0.0778, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.29025134444236755, "rewards/margins": 2.6427130699157715, "rewards/rejected": -2.352461576461792, "step": 3090 }, { "epoch": 0.12, "learning_rate": 4.9912321481237616e-06, "logits/chosen": -2.9281842708587646, "logits/rejected": -2.949068546295166, "logps/chosen": -2.2854721546173096, "logps/rejected": -270.4503479003906, "loss": 0.0865, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2935202419757843, "rewards/margins": 2.6457607746124268, "rewards/rejected": -2.352240562438965, "step": 3100 }, { "epoch": 0.12, "eval_logits/chosen": -3.005786418914795, "eval_logits/rejected": -3.022015333175659, "eval_logps/chosen": -0.04936308413743973, "eval_logps/rejected": -264.8221740722656, "eval_loss": 0.07540267705917358, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3161059021949768, "eval_rewards/margins": 2.6041717529296875, "eval_rewards/rejected": -2.2880656719207764, "eval_runtime": 2.5543, "eval_samples_per_second": 1.957, "eval_steps_per_second": 0.391, "step": 3100 }, { "epoch": 0.12, "learning_rate": 4.990937629050972e-06, "logits/chosen": -2.954192638397217, "logits/rejected": -2.975335121154785, "logps/chosen": -5.2534661293029785, "logps/rejected": -272.81903076171875, "loss": 0.1053, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2643756568431854, "rewards/margins": 2.6343226432800293, "rewards/rejected": -2.3699467182159424, "step": 3110 }, { "epoch": 0.12, "learning_rate": 4.990638253767812e-06, "logits/chosen": -2.943417549133301, "logits/rejected": -2.9635746479034424, "logps/chosen": -4.4543352127075195, "logps/rejected": -272.0033874511719, "loss": 0.101, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2723621129989624, "rewards/margins": 2.6394670009613037, "rewards/rejected": -2.367105007171631, "step": 3120 }, { "epoch": 0.13, "learning_rate": 4.990334022857932e-06, "logits/chosen": -2.9342548847198486, "logits/rejected": -2.9533886909484863, "logps/chosen": -2.7687125205993652, "logps/rejected": -268.9676208496094, "loss": 0.0979, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.29004448652267456, "rewards/margins": 2.6234688758850098, "rewards/rejected": -2.3334240913391113, "step": 3130 }, { "epoch": 0.13, "learning_rate": 4.9900249369144435e-06, "logits/chosen": -2.9602808952331543, "logits/rejected": -2.9787139892578125, "logps/chosen": -3.531574249267578, "logps/rejected": -273.08837890625, "loss": 0.0987, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27913835644721985, "rewards/margins": 2.6584808826446533, "rewards/rejected": -2.379342555999756, "step": 3140 }, { "epoch": 0.13, "learning_rate": 4.989710996539926e-06, "logits/chosen": -2.9741334915161133, "logits/rejected": -2.9930593967437744, "logps/chosen": -0.07728681713342667, "logps/rejected": -278.52587890625, "loss": 0.0632, "rewards/accuracies": 1.0, "rewards/chosen": 0.3196086883544922, "rewards/margins": 2.7431468963623047, "rewards/rejected": -2.4235379695892334, "step": 3150 }, { "epoch": 0.13, "learning_rate": 4.989392202346423e-06, "logits/chosen": -2.9623122215270996, "logits/rejected": -2.9810168743133545, "logps/chosen": -0.06675361096858978, "logps/rejected": -275.8570251464844, "loss": 0.0677, "rewards/accuracies": 1.0, "rewards/chosen": 0.3165315091609955, "rewards/margins": 2.720493793487549, "rewards/rejected": -2.4039623737335205, "step": 3160 }, { "epoch": 0.13, "learning_rate": 4.98906855495544e-06, "logits/chosen": -2.9494965076446533, "logits/rejected": -2.9661126136779785, "logps/chosen": -1.9203599691390991, "logps/rejected": -269.1220703125, "loss": 0.0813, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.29769840836524963, "rewards/margins": 2.634481906890869, "rewards/rejected": -2.3367831707000732, "step": 3170 }, { "epoch": 0.13, "learning_rate": 4.988740054997943e-06, "logits/chosen": -2.9422767162323, "logits/rejected": -2.959115505218506, "logps/chosen": -4.798096656799316, "logps/rejected": -266.10833740234375, "loss": 0.1145, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.26633527874946594, "rewards/margins": 2.5749001502990723, "rewards/rejected": -2.3085646629333496, "step": 3180 }, { "epoch": 0.13, "learning_rate": 4.98840670311436e-06, "logits/chosen": -2.949901819229126, "logits/rejected": -2.9680442810058594, "logps/chosen": -1.9144941568374634, "logps/rejected": -271.45037841796875, "loss": 0.0836, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.29651060700416565, "rewards/margins": 2.6560425758361816, "rewards/rejected": -2.359531879425049, "step": 3190 }, { "epoch": 0.13, "learning_rate": 4.988068499954578e-06, "logits/chosen": -2.9539952278137207, "logits/rejected": -2.9729628562927246, "logps/chosen": -0.2918039858341217, "logps/rejected": -267.5617370605469, "loss": 0.0763, "rewards/accuracies": 1.0, "rewards/chosen": 0.3125467896461487, "rewards/margins": 2.634411334991455, "rewards/rejected": -2.321864604949951, "step": 3200 }, { "epoch": 0.13, "eval_logits/chosen": -3.013660192489624, "eval_logits/rejected": -3.0278830528259277, "eval_logps/chosen": -0.04352743551135063, "eval_logps/rejected": -265.8852844238281, "eval_loss": 0.07446904480457306, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3161642551422119, "eval_rewards/margins": 2.614861249923706, "eval_rewards/rejected": -2.298696994781494, "eval_runtime": 2.5397, "eval_samples_per_second": 1.969, "eval_steps_per_second": 0.394, "step": 3200 }, { "epoch": 0.13, "learning_rate": 4.987725446177941e-06, "logits/chosen": -2.9559671878814697, "logits/rejected": -2.971579074859619, "logps/chosen": -7.344213008880615, "logps/rejected": -269.2698059082031, "loss": 0.1317, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2414952963590622, "rewards/margins": 2.5867764949798584, "rewards/rejected": -2.3452811241149902, "step": 3210 }, { "epoch": 0.13, "learning_rate": 4.9873775424532515e-06, "logits/chosen": -2.9371516704559326, "logits/rejected": -2.954184055328369, "logps/chosen": -10.419638633728027, "logps/rejected": -265.3689270019531, "loss": 0.1632, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.21335700154304504, "rewards/margins": 2.513478994369507, "rewards/rejected": -2.300121784210205, "step": 3220 }, { "epoch": 0.13, "learning_rate": 4.987024789458762e-06, "logits/chosen": -2.972285509109497, "logits/rejected": -2.990304470062256, "logps/chosen": -1.279715657234192, "logps/rejected": -272.82318115234375, "loss": 0.0796, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.3047386705875397, "rewards/margins": 2.6811931133270264, "rewards/rejected": -2.3764541149139404, "step": 3230 }, { "epoch": 0.13, "learning_rate": 4.986667187882186e-06, "logits/chosen": -2.967745065689087, "logits/rejected": -2.981602907180786, "logps/chosen": -3.183326005935669, "logps/rejected": -270.27630615234375, "loss": 0.0997, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28593534231185913, "rewards/margins": 2.631054162979126, "rewards/rejected": -2.345118761062622, "step": 3240 }, { "epoch": 0.13, "learning_rate": 4.986304738420684e-06, "logits/chosen": -2.9633469581604004, "logits/rejected": -2.977207899093628, "logps/chosen": -0.0825023278594017, "logps/rejected": -276.0854797363281, "loss": 0.065, "rewards/accuracies": 1.0, "rewards/chosen": 0.3152485489845276, "rewards/margins": 2.723616361618042, "rewards/rejected": -2.4083681106567383, "step": 3250 }, { "epoch": 0.13, "learning_rate": 4.98593744178087e-06, "logits/chosen": -2.9854626655578613, "logits/rejected": -2.9981493949890137, "logps/chosen": -0.4490409791469574, "logps/rejected": -277.5818786621094, "loss": 0.065, "rewards/accuracies": 1.0, "rewards/chosen": 0.3150614798069, "rewards/margins": 2.7327699661254883, "rewards/rejected": -2.4177086353302, "step": 3260 }, { "epoch": 0.13, "learning_rate": 4.985565298678809e-06, "logits/chosen": -2.9477486610412598, "logits/rejected": -2.9621667861938477, "logps/chosen": -6.811443328857422, "logps/rejected": -271.9598693847656, "loss": 0.1283, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24910572171211243, "rewards/margins": 2.61403226852417, "rewards/rejected": -2.36492657661438, "step": 3270 }, { "epoch": 0.13, "learning_rate": 4.985188309840012e-06, "logits/chosen": -2.9708569049835205, "logits/rejected": -2.9844272136688232, "logps/chosen": -0.11964855343103409, "logps/rejected": -275.98614501953125, "loss": 0.0655, "rewards/accuracies": 1.0, "rewards/chosen": 0.31647297739982605, "rewards/margins": 2.7184948921203613, "rewards/rejected": -2.402021884918213, "step": 3280 }, { "epoch": 0.13, "learning_rate": 4.984806475999437e-06, "logits/chosen": -2.980682611465454, "logits/rejected": -2.9936306476593018, "logps/chosen": -3.130220890045166, "logps/rejected": -274.9905700683594, "loss": 0.0867, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28573402762413025, "rewards/margins": 2.6832540035247803, "rewards/rejected": -2.3975205421447754, "step": 3290 }, { "epoch": 0.13, "learning_rate": 4.984419797901491e-06, "logits/chosen": -2.94360089302063, "logits/rejected": -2.9615981578826904, "logps/chosen": -6.222545146942139, "logps/rejected": -271.21893310546875, "loss": 0.11, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.25492292642593384, "rewards/margins": 2.6165261268615723, "rewards/rejected": -2.361602783203125, "step": 3300 }, { "epoch": 0.13, "eval_logits/chosen": -3.0166969299316406, "eval_logits/rejected": -3.030679702758789, "eval_logps/chosen": -0.5935611724853516, "eval_logps/rejected": -236.8128204345703, "eval_loss": 0.14638371765613556, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31066393852233887, "eval_rewards/margins": 2.318636655807495, "eval_rewards/rejected": -2.007972478866577, "eval_runtime": 2.542, "eval_samples_per_second": 1.967, "eval_steps_per_second": 0.393, "step": 3300 }, { "epoch": 0.13, "learning_rate": 4.984028276300021e-06, "logits/chosen": -2.973843574523926, "logits/rejected": -2.9927449226379395, "logps/chosen": -0.2193298637866974, "logps/rejected": -270.81585693359375, "loss": 0.0741, "rewards/accuracies": 1.0, "rewards/chosen": 0.31232547760009766, "rewards/margins": 2.669264316558838, "rewards/rejected": -2.3569388389587402, "step": 3310 }, { "epoch": 0.13, "learning_rate": 4.983631911958319e-06, "logits/chosen": -2.9665818214416504, "logits/rejected": -2.987959384918213, "logps/chosen": -0.20186889171600342, "logps/rejected": -279.26092529296875, "loss": 0.0622, "rewards/accuracies": 1.0, "rewards/chosen": 0.31345781683921814, "rewards/margins": 2.7552967071533203, "rewards/rejected": -2.4418389797210693, "step": 3320 }, { "epoch": 0.13, "learning_rate": 4.983230705649118e-06, "logits/chosen": -2.9665396213531494, "logits/rejected": -2.986729621887207, "logps/chosen": -2.6522958278656006, "logps/rejected": -274.2337951660156, "loss": 0.0902, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2924780249595642, "rewards/margins": 2.6752967834472656, "rewards/rejected": -2.3828189373016357, "step": 3330 }, { "epoch": 0.13, "learning_rate": 4.982824658154589e-06, "logits/chosen": -2.9660797119140625, "logits/rejected": -2.9857985973358154, "logps/chosen": -1.9801725149154663, "logps/rejected": -274.17059326171875, "loss": 0.0778, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.29616373777389526, "rewards/margins": 2.6860413551330566, "rewards/rejected": -2.3898777961730957, "step": 3340 }, { "epoch": 0.13, "learning_rate": 4.9824137702663424e-06, "logits/chosen": -2.9727110862731934, "logits/rejected": -2.9906558990478516, "logps/chosen": -0.46564874053001404, "logps/rejected": -278.953369140625, "loss": 0.0634, "rewards/accuracies": 1.0, "rewards/chosen": 0.31235089898109436, "rewards/margins": 2.7491729259490967, "rewards/rejected": -2.436821937561035, "step": 3350 }, { "epoch": 0.13, "learning_rate": 4.981998042785427e-06, "logits/chosen": -2.960407018661499, "logits/rejected": -2.9824085235595703, "logps/chosen": -2.193103313446045, "logps/rejected": -272.1376647949219, "loss": 0.0934, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.29336220026016235, "rewards/margins": 2.6615943908691406, "rewards/rejected": -2.368231773376465, "step": 3360 }, { "epoch": 0.13, "learning_rate": 4.981577476522323e-06, "logits/chosen": -2.952545642852783, "logits/rejected": -2.974719524383545, "logps/chosen": -4.929967403411865, "logps/rejected": -267.60888671875, "loss": 0.1204, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2675926387310028, "rewards/margins": 2.5887351036071777, "rewards/rejected": -2.3211419582366943, "step": 3370 }, { "epoch": 0.14, "learning_rate": 4.9811520722969465e-06, "logits/chosen": -2.9535961151123047, "logits/rejected": -2.9728899002075195, "logps/chosen": -5.519834995269775, "logps/rejected": -270.03643798828125, "loss": 0.1208, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2601419985294342, "rewards/margins": 2.6086106300354004, "rewards/rejected": -2.348468542098999, "step": 3380 }, { "epoch": 0.14, "learning_rate": 4.980721830938645e-06, "logits/chosen": -2.942631721496582, "logits/rejected": -2.961989402770996, "logps/chosen": -4.645608425140381, "logps/rejected": -257.7747802734375, "loss": 0.1319, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.27081674337387085, "rewards/margins": 2.4921927452087402, "rewards/rejected": -2.2213757038116455, "step": 3390 }, { "epoch": 0.14, "learning_rate": 4.980286753286196e-06, "logits/chosen": -2.95074462890625, "logits/rejected": -2.9718101024627686, "logps/chosen": -3.2749199867248535, "logps/rejected": -263.069580078125, "loss": 0.1114, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28378161787986755, "rewards/margins": 2.562072515487671, "rewards/rejected": -2.2782909870147705, "step": 3400 }, { "epoch": 0.14, "eval_logits/chosen": -3.014766216278076, "eval_logits/rejected": -3.0295913219451904, "eval_logps/chosen": -5.14328670501709, "eval_logps/rejected": -239.39218139648438, "eval_loss": 0.15339627861976624, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.26516664028167725, "eval_rewards/margins": 2.2989330291748047, "eval_rewards/rejected": -2.033766269683838, "eval_runtime": 2.5409, "eval_samples_per_second": 1.968, "eval_steps_per_second": 0.394, "step": 3400 }, { "epoch": 0.14, "learning_rate": 4.979846840187804e-06, "logits/chosen": -2.940023899078369, "logits/rejected": -2.9621360301971436, "logps/chosen": -3.1855998039245605, "logps/rejected": -271.45843505859375, "loss": 0.0941, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28717708587646484, "rewards/margins": 2.6435396671295166, "rewards/rejected": -2.3563623428344727, "step": 3410 }, { "epoch": 0.14, "learning_rate": 4.979402092501104e-06, "logits/chosen": -2.9817726612091064, "logits/rejected": -3.0008304119110107, "logps/chosen": -7.34716796875, "logps/rejected": -264.15435791015625, "loss": 0.1376, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.24611906707286835, "rewards/margins": 2.532275676727295, "rewards/rejected": -2.286156415939331, "step": 3420 }, { "epoch": 0.14, "learning_rate": 4.9789525110931545e-06, "logits/chosen": -2.9251327514648438, "logits/rejected": -2.9482407569885254, "logps/chosen": -0.31823164224624634, "logps/rejected": -276.3880310058594, "loss": 0.0674, "rewards/accuracies": 1.0, "rewards/chosen": 0.3117162585258484, "rewards/margins": 2.723740577697754, "rewards/rejected": -2.41202449798584, "step": 3430 }, { "epoch": 0.14, "learning_rate": 4.978498096840437e-06, "logits/chosen": -2.984823703765869, "logits/rejected": -3.0053646564483643, "logps/chosen": -3.515002489089966, "logps/rejected": -275.13116455078125, "loss": 0.0975, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.281242698431015, "rewards/margins": 2.681021213531494, "rewards/rejected": -2.3997788429260254, "step": 3440 }, { "epoch": 0.14, "learning_rate": 4.978038850628855e-06, "logits/chosen": -2.9372620582580566, "logits/rejected": -2.9623453617095947, "logps/chosen": -0.09169197082519531, "logps/rejected": -278.4825439453125, "loss": 0.0635, "rewards/accuracies": 1.0, "rewards/chosen": 0.3181647062301636, "rewards/margins": 2.7442233562469482, "rewards/rejected": -2.426058769226074, "step": 3450 }, { "epoch": 0.14, "learning_rate": 4.977574773353732e-06, "logits/chosen": -2.9674019813537598, "logits/rejected": -2.989534616470337, "logps/chosen": -0.5127516388893127, "logps/rejected": -275.28948974609375, "loss": 0.0701, "rewards/accuracies": 1.0, "rewards/chosen": 0.3114417493343353, "rewards/margins": 2.7102084159851074, "rewards/rejected": -2.3987669944763184, "step": 3460 }, { "epoch": 0.14, "learning_rate": 4.9771058659198115e-06, "logits/chosen": -3.001070261001587, "logits/rejected": -3.023266077041626, "logps/chosen": -3.6363697052001953, "logps/rejected": -276.53277587890625, "loss": 0.0973, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2822750508785248, "rewards/margins": 2.692070960998535, "rewards/rejected": -2.4097959995269775, "step": 3470 }, { "epoch": 0.14, "learning_rate": 4.976632129241253e-06, "logits/chosen": -2.9674417972564697, "logits/rejected": -2.9880144596099854, "logps/chosen": -0.6072449684143066, "logps/rejected": -279.2430725097656, "loss": 0.0631, "rewards/accuracies": 1.0, "rewards/chosen": 0.3122336268424988, "rewards/margins": 2.7497055530548096, "rewards/rejected": -2.437472105026245, "step": 3480 }, { "epoch": 0.14, "learning_rate": 4.9761535642416284e-06, "logits/chosen": -2.9824717044830322, "logits/rejected": -2.9995129108428955, "logps/chosen": -4.720705986022949, "logps/rejected": -275.31964111328125, "loss": 0.1089, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.269332617521286, "rewards/margins": 2.6724348068237305, "rewards/rejected": -2.403102397918701, "step": 3490 }, { "epoch": 0.14, "learning_rate": 4.975670171853926e-06, "logits/chosen": -2.9817614555358887, "logits/rejected": -3.000938892364502, "logps/chosen": -2.8532891273498535, "logps/rejected": -273.23956298828125, "loss": 0.0934, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2880306839942932, "rewards/margins": 2.663059711456299, "rewards/rejected": -2.3750290870666504, "step": 3500 }, { "epoch": 0.14, "eval_logits/chosen": -3.021339178085327, "eval_logits/rejected": -3.031224250793457, "eval_logps/chosen": -6.577630519866943, "eval_logps/rejected": -238.33987426757812, "eval_loss": 0.17022468149662018, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.2508232295513153, "eval_rewards/margins": 2.274066209793091, "eval_rewards/rejected": -2.0232431888580322, "eval_runtime": 2.5322, "eval_samples_per_second": 1.975, "eval_steps_per_second": 0.395, "step": 3500 }, { "epoch": 0.14, "learning_rate": 4.975181953020544e-06, "logits/chosen": -2.9884767532348633, "logits/rejected": -3.007065534591675, "logps/chosen": -0.08830885589122772, "logps/rejected": -282.2452392578125, "loss": 0.0605, "rewards/accuracies": 1.0, "rewards/chosen": 0.3174853026866913, "rewards/margins": 2.7842400074005127, "rewards/rejected": -2.46675443649292, "step": 3510 }, { "epoch": 0.14, "learning_rate": 4.97468890869329e-06, "logits/chosen": -2.982537031173706, "logits/rejected": -3.0001795291900635, "logps/chosen": -10.267989158630371, "logps/rejected": -268.7005920410156, "loss": 0.1653, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.21426737308502197, "rewards/margins": 2.548938035964966, "rewards/rejected": -2.3346705436706543, "step": 3520 }, { "epoch": 0.14, "learning_rate": 4.974191039833378e-06, "logits/chosen": -2.9700169563293457, "logits/rejected": -2.9911646842956543, "logps/chosen": -2.289313793182373, "logps/rejected": -272.71478271484375, "loss": 0.0875, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2944023311138153, "rewards/margins": 2.668416738510132, "rewards/rejected": -2.3740146160125732, "step": 3530 }, { "epoch": 0.14, "learning_rate": 4.973688347411431e-06, "logits/chosen": -2.9676849842071533, "logits/rejected": -2.986800193786621, "logps/chosen": -0.26664113998413086, "logps/rejected": -272.60052490234375, "loss": 0.0697, "rewards/accuracies": 1.0, "rewards/chosen": 0.3126445412635803, "rewards/margins": 2.6871111392974854, "rewards/rejected": -2.3744664192199707, "step": 3540 }, { "epoch": 0.14, "learning_rate": 4.973180832407471e-06, "logits/chosen": -2.9810705184936523, "logits/rejected": -2.999385356903076, "logps/chosen": -1.6957324743270874, "logps/rejected": -272.8710021972656, "loss": 0.077, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2972790002822876, "rewards/margins": 2.676074981689453, "rewards/rejected": -2.378795862197876, "step": 3550 }, { "epoch": 0.14, "learning_rate": 4.972668495810927e-06, "logits/chosen": -2.968233585357666, "logits/rejected": -2.98991322517395, "logps/chosen": -5.691117286682129, "logps/rejected": -271.1932678222656, "loss": 0.1195, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.25841671228408813, "rewards/margins": 2.617259979248047, "rewards/rejected": -2.3588433265686035, "step": 3560 }, { "epoch": 0.14, "learning_rate": 4.9721513386206235e-06, "logits/chosen": -2.969755172729492, "logits/rejected": -2.9932169914245605, "logps/chosen": -0.8790081143379211, "logps/rejected": -278.4128112792969, "loss": 0.0654, "rewards/accuracies": 1.0, "rewards/chosen": 0.3060380816459656, "rewards/margins": 2.739657402038574, "rewards/rejected": -2.433619737625122, "step": 3570 }, { "epoch": 0.14, "learning_rate": 4.971629361844785e-06, "logits/chosen": -2.962700366973877, "logits/rejected": -2.986854076385498, "logps/chosen": -0.08826326578855515, "logps/rejected": -280.44140625, "loss": 0.0617, "rewards/accuracies": 1.0, "rewards/chosen": 0.3185242712497711, "rewards/margins": 2.7656259536743164, "rewards/rejected": -2.4471020698547363, "step": 3580 }, { "epoch": 0.14, "learning_rate": 4.9711025665010335e-06, "logits/chosen": -2.952615737915039, "logits/rejected": -2.9779627323150635, "logps/chosen": -3.4568467140197754, "logps/rejected": -275.4110412597656, "loss": 0.0966, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28376564383506775, "rewards/margins": 2.678913116455078, "rewards/rejected": -2.3951475620269775, "step": 3590 }, { "epoch": 0.14, "learning_rate": 4.970570953616383e-06, "logits/chosen": -2.979170560836792, "logits/rejected": -2.9991633892059326, "logps/chosen": -0.15985316038131714, "logps/rejected": -279.3775634765625, "loss": 0.063, "rewards/accuracies": 1.0, "rewards/chosen": 0.31619539856910706, "rewards/margins": 2.7533907890319824, "rewards/rejected": -2.4371955394744873, "step": 3600 }, { "epoch": 0.14, "eval_logits/chosen": -3.0219814777374268, "eval_logits/rejected": -3.042097330093384, "eval_logps/chosen": -0.042605459690093994, "eval_logps/rejected": -269.3163146972656, "eval_loss": 0.0716976746916771, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3161734640598297, "eval_rewards/margins": 2.6491808891296387, "eval_rewards/rejected": -2.333007335662842, "eval_runtime": 2.5395, "eval_samples_per_second": 1.969, "eval_steps_per_second": 0.394, "step": 3600 }, { "epoch": 0.14, "learning_rate": 4.970034524227239e-06, "logits/chosen": -2.9530763626098633, "logits/rejected": -2.9759879112243652, "logps/chosen": -2.732703685760498, "logps/rejected": -274.076171875, "loss": 0.0901, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28760504722595215, "rewards/margins": 2.679018497467041, "rewards/rejected": -2.3914132118225098, "step": 3610 }, { "epoch": 0.14, "learning_rate": 4.969493279379397e-06, "logits/chosen": -2.9773521423339844, "logits/rejected": -3.000641345977783, "logps/chosen": -0.7091792821884155, "logps/rejected": -275.4801025390625, "loss": 0.0739, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.3089652955532074, "rewards/margins": 2.710944414138794, "rewards/rejected": -2.4019789695739746, "step": 3620 }, { "epoch": 0.15, "learning_rate": 4.968947220128046e-06, "logits/chosen": -2.9741549491882324, "logits/rejected": -2.9935221672058105, "logps/chosen": -7.172226905822754, "logps/rejected": -271.99688720703125, "loss": 0.1341, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24631810188293457, "rewards/margins": 2.611926317214966, "rewards/rejected": -2.3656086921691895, "step": 3630 }, { "epoch": 0.15, "learning_rate": 4.968396347537751e-06, "logits/chosen": -2.9547410011291504, "logits/rejected": -2.9776289463043213, "logps/chosen": -3.612088441848755, "logps/rejected": -277.2936706542969, "loss": 0.0964, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27866607904434204, "rewards/margins": 2.704230785369873, "rewards/rejected": -2.425565004348755, "step": 3640 }, { "epoch": 0.15, "learning_rate": 4.96784066268247e-06, "logits/chosen": -2.982178211212158, "logits/rejected": -3.004009246826172, "logps/chosen": -0.3417501747608185, "logps/rejected": -275.1251220703125, "loss": 0.0718, "rewards/accuracies": 1.0, "rewards/chosen": 0.3155524730682373, "rewards/margins": 2.7123446464538574, "rewards/rejected": -2.396791934967041, "step": 3650 }, { "epoch": 0.15, "learning_rate": 4.967280166645538e-06, "logits/chosen": -2.9705708026885986, "logits/rejected": -2.9907729625701904, "logps/chosen": -3.5934367179870605, "logps/rejected": -277.73272705078125, "loss": 0.0965, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2803354859352112, "rewards/margins": 2.7042527198791504, "rewards/rejected": -2.423916816711426, "step": 3660 }, { "epoch": 0.15, "learning_rate": 4.96671486051967e-06, "logits/chosen": -2.966859817504883, "logits/rejected": -2.9867405891418457, "logps/chosen": -7.030834197998047, "logps/rejected": -275.7059326171875, "loss": 0.1296, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24594202637672424, "rewards/margins": 2.6481995582580566, "rewards/rejected": -2.4022574424743652, "step": 3670 }, { "epoch": 0.15, "learning_rate": 4.966144745406961e-06, "logits/chosen": -2.9793953895568848, "logits/rejected": -3.000272274017334, "logps/chosen": -1.7839170694351196, "logps/rejected": -277.2862548828125, "loss": 0.0737, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.30053094029426575, "rewards/margins": 2.7186923027038574, "rewards/rejected": -2.418161153793335, "step": 3680 }, { "epoch": 0.15, "learning_rate": 4.965569822418878e-06, "logits/chosen": -2.9718847274780273, "logits/rejected": -2.99558687210083, "logps/chosen": -0.059700556099414825, "logps/rejected": -280.9217224121094, "loss": 0.0614, "rewards/accuracies": 1.0, "rewards/chosen": 0.31621870398521423, "rewards/margins": 2.7741570472717285, "rewards/rejected": -2.4579386711120605, "step": 3690 }, { "epoch": 0.15, "learning_rate": 4.964990092676263e-06, "logits/chosen": -2.974015951156616, "logits/rejected": -2.9943361282348633, "logps/chosen": -1.4893521070480347, "logps/rejected": -278.8644714355469, "loss": 0.0673, "rewards/accuracies": 1.0, "rewards/chosen": 0.3019491732120514, "rewards/margins": 2.7393369674682617, "rewards/rejected": -2.4373879432678223, "step": 3700 }, { "epoch": 0.15, "eval_logits/chosen": -3.0296058654785156, "eval_logits/rejected": -3.0412096977233887, "eval_logps/chosen": -7.572740077972412, "eval_logps/rejected": -237.04861450195312, "eval_loss": 0.18432767689228058, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.24087214469909668, "eval_rewards/margins": 2.2512028217315674, "eval_rewards/rejected": -2.0103304386138916, "eval_runtime": 2.5363, "eval_samples_per_second": 1.971, "eval_steps_per_second": 0.394, "step": 3700 }, { "epoch": 0.15, "learning_rate": 4.964405557309329e-06, "logits/chosen": -2.9959030151367188, "logits/rejected": -3.0159268379211426, "logps/chosen": -0.11601345241069794, "logps/rejected": -278.9722595214844, "loss": 0.0646, "rewards/accuracies": 1.0, "rewards/chosen": 0.3160446584224701, "rewards/margins": 2.7520668506622314, "rewards/rejected": -2.4360222816467285, "step": 3710 }, { "epoch": 0.15, "learning_rate": 4.9638162174576575e-06, "logits/chosen": -2.987706422805786, "logits/rejected": -3.007023334503174, "logps/chosen": -9.267744064331055, "logps/rejected": -271.6952209472656, "loss": 0.1428, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.22588232159614563, "rewards/margins": 2.588879346847534, "rewards/rejected": -2.36299729347229, "step": 3720 }, { "epoch": 0.15, "learning_rate": 4.963222074270197e-06, "logits/chosen": -2.944993495941162, "logits/rejected": -2.969804048538208, "logps/chosen": -3.6522457599639893, "logps/rejected": -279.9569091796875, "loss": 0.0953, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2830791771411896, "rewards/margins": 2.7244973182678223, "rewards/rejected": -2.441418170928955, "step": 3730 }, { "epoch": 0.15, "learning_rate": 4.9626231289052594e-06, "logits/chosen": -2.949824333190918, "logits/rejected": -2.973968029022217, "logps/chosen": -0.1538485288619995, "logps/rejected": -281.6356506347656, "loss": 0.0614, "rewards/accuracies": 1.0, "rewards/chosen": 0.3151856064796448, "rewards/margins": 2.7740440368652344, "rewards/rejected": -2.4588587284088135, "step": 3740 }, { "epoch": 0.15, "learning_rate": 4.962019382530521e-06, "logits/chosen": -2.9592864513397217, "logits/rejected": -2.9834704399108887, "logps/chosen": -0.3953624367713928, "logps/rejected": -278.82318115234375, "loss": 0.0646, "rewards/accuracies": 1.0, "rewards/chosen": 0.31249916553497314, "rewards/margins": 2.7455458641052246, "rewards/rejected": -2.433046817779541, "step": 3750 }, { "epoch": 0.15, "learning_rate": 4.961410836323014e-06, "logits/chosen": -2.967629909515381, "logits/rejected": -2.9907071590423584, "logps/chosen": -6.847254753112793, "logps/rejected": -271.585205078125, "loss": 0.1309, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24693123996257782, "rewards/margins": 2.613346815109253, "rewards/rejected": -2.366415500640869, "step": 3760 }, { "epoch": 0.15, "learning_rate": 4.960797491469131e-06, "logits/chosen": -2.992189407348633, "logits/rejected": -3.0113954544067383, "logps/chosen": -0.08807673305273056, "logps/rejected": -281.0797424316406, "loss": 0.0619, "rewards/accuracies": 1.0, "rewards/chosen": 0.3153635561466217, "rewards/margins": 2.770308256149292, "rewards/rejected": -2.454944610595703, "step": 3770 }, { "epoch": 0.15, "learning_rate": 4.960179349164621e-06, "logits/chosen": -2.9755611419677734, "logits/rejected": -2.995709180831909, "logps/chosen": -0.12059106677770615, "logps/rejected": -283.641357421875, "loss": 0.0598, "rewards/accuracies": 1.0, "rewards/chosen": 0.31640735268592834, "rewards/margins": 2.7947070598602295, "rewards/rejected": -2.478299617767334, "step": 3780 }, { "epoch": 0.15, "learning_rate": 4.9595564106145825e-06, "logits/chosen": -2.9755005836486816, "logits/rejected": -2.9942920207977295, "logps/chosen": -3.6898529529571533, "logps/rejected": -276.1412048339844, "loss": 0.0978, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28061842918395996, "rewards/margins": 2.6844253540039062, "rewards/rejected": -2.4038071632385254, "step": 3790 }, { "epoch": 0.15, "learning_rate": 4.958928677033465e-06, "logits/chosen": -2.992516040802002, "logits/rejected": -3.011733293533325, "logps/chosen": -7.218579292297363, "logps/rejected": -275.41485595703125, "loss": 0.1292, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24417981505393982, "rewards/margins": 2.644908905029297, "rewards/rejected": -2.400728940963745, "step": 3800 }, { "epoch": 0.15, "eval_logits/chosen": -3.027773857116699, "eval_logits/rejected": -3.049705743789673, "eval_logps/chosen": -0.04979877918958664, "eval_logps/rejected": -268.47357177734375, "eval_loss": 0.07230284810066223, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3161015212535858, "eval_rewards/margins": 2.640681743621826, "eval_rewards/rejected": -2.324579954147339, "eval_runtime": 2.5407, "eval_samples_per_second": 1.968, "eval_steps_per_second": 0.394, "step": 3800 }, { "epoch": 0.15, "learning_rate": 4.95829614964507e-06, "logits/chosen": -2.9703826904296875, "logits/rejected": -2.993757486343384, "logps/chosen": -0.12069046497344971, "logps/rejected": -278.62896728515625, "loss": 0.0632, "rewards/accuracies": 1.0, "rewards/chosen": 0.31698933243751526, "rewards/margins": 2.747802257537842, "rewards/rejected": -2.4308125972747803, "step": 3810 }, { "epoch": 0.15, "learning_rate": 4.957658829682539e-06, "logits/chosen": -2.972675323486328, "logits/rejected": -2.9975485801696777, "logps/chosen": -3.610311985015869, "logps/rejected": -276.81402587890625, "loss": 0.097, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2830526828765869, "rewards/margins": 2.6958518028259277, "rewards/rejected": -2.412799119949341, "step": 3820 }, { "epoch": 0.15, "learning_rate": 4.957016718388362e-06, "logits/chosen": -3.000523090362549, "logits/rejected": -3.0234885215759277, "logps/chosen": -0.09370069950819016, "logps/rejected": -280.70892333984375, "loss": 0.0621, "rewards/accuracies": 1.0, "rewards/chosen": 0.31779709458351135, "rewards/margins": 2.766321897506714, "rewards/rejected": -2.4485249519348145, "step": 3830 }, { "epoch": 0.15, "learning_rate": 4.956369817014367e-06, "logits/chosen": -2.9937338829040527, "logits/rejected": -3.015089511871338, "logps/chosen": -2.8786706924438477, "logps/rejected": -279.58807373046875, "loss": 0.0885, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28899046778678894, "rewards/margins": 2.728053569793701, "rewards/rejected": -2.4390625953674316, "step": 3840 }, { "epoch": 0.15, "learning_rate": 4.9557181268217225e-06, "logits/chosen": -2.9868767261505127, "logits/rejected": -3.009284496307373, "logps/chosen": -2.756619453430176, "logps/rejected": -277.61614990234375, "loss": 0.0884, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.287952721118927, "rewards/margins": 2.7128357887268066, "rewards/rejected": -2.4248833656311035, "step": 3850 }, { "epoch": 0.15, "learning_rate": 4.95506164908093e-06, "logits/chosen": -2.9750800132751465, "logits/rejected": -2.9986276626586914, "logps/chosen": -0.07864616811275482, "logps/rejected": -285.5621032714844, "loss": 0.0581, "rewards/accuracies": 1.0, "rewards/chosen": 0.3150957226753235, "rewards/margins": 2.8163857460021973, "rewards/rejected": -2.5012898445129395, "step": 3860 }, { "epoch": 0.15, "learning_rate": 4.954400385071827e-06, "logits/chosen": -3.002523183822632, "logits/rejected": -3.021223783493042, "logps/chosen": -3.3871848583221436, "logps/rejected": -277.06109619140625, "loss": 0.0955, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.285531222820282, "rewards/margins": 2.6962027549743652, "rewards/rejected": -2.4106714725494385, "step": 3870 }, { "epoch": 0.16, "learning_rate": 4.953734336083582e-06, "logits/chosen": -2.984314441680908, "logits/rejected": -3.0032243728637695, "logps/chosen": -10.328107833862305, "logps/rejected": -270.49786376953125, "loss": 0.1451, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.21326282620429993, "rewards/margins": 2.5640006065368652, "rewards/rejected": -2.3507373332977295, "step": 3880 }, { "epoch": 0.16, "learning_rate": 4.953063503414692e-06, "logits/chosen": -2.9902472496032715, "logits/rejected": -3.0126187801361084, "logps/chosen": -2.5700478553771973, "logps/rejected": -278.4434814453125, "loss": 0.0726, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2908267676830292, "rewards/margins": 2.7235207557678223, "rewards/rejected": -2.4326939582824707, "step": 3890 }, { "epoch": 0.16, "learning_rate": 4.9523878883729794e-06, "logits/chosen": -2.995908260345459, "logits/rejected": -3.018131971359253, "logps/chosen": -1.8335380554199219, "logps/rejected": -278.60491943359375, "loss": 0.0683, "rewards/accuracies": 1.0, "rewards/chosen": 0.29932379722595215, "rewards/margins": 2.7256722450256348, "rewards/rejected": -2.4263486862182617, "step": 3900 }, { "epoch": 0.16, "eval_logits/chosen": -3.0320723056793213, "eval_logits/rejected": -3.0506036281585693, "eval_logps/chosen": -0.045135773718357086, "eval_logps/rejected": -271.6014099121094, "eval_loss": 0.06987325102090836, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31614816188812256, "eval_rewards/margins": 2.672006607055664, "eval_rewards/rejected": -2.355858325958252, "eval_runtime": 2.5436, "eval_samples_per_second": 1.966, "eval_steps_per_second": 0.393, "step": 3900 }, { "epoch": 0.16, "learning_rate": 4.951707492275589e-06, "logits/chosen": -2.9794821739196777, "logits/rejected": -3.000650405883789, "logps/chosen": -5.881481647491455, "logps/rejected": -275.35491943359375, "loss": 0.1014, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2600100040435791, "rewards/margins": 2.661472797393799, "rewards/rejected": -2.4014623165130615, "step": 3910 }, { "epoch": 0.16, "learning_rate": 4.95102231644899e-06, "logits/chosen": -2.9762749671936035, "logits/rejected": -2.9988045692443848, "logps/chosen": -6.32058572769165, "logps/rejected": -268.8612060546875, "loss": 0.1315, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.25599247217178345, "rewards/margins": 2.587488889694214, "rewards/rejected": -2.331496000289917, "step": 3920 }, { "epoch": 0.16, "learning_rate": 4.950332362228966e-06, "logits/chosen": -2.9889419078826904, "logits/rejected": -3.0110890865325928, "logps/chosen": -0.1461101621389389, "logps/rejected": -283.7841796875, "loss": 0.0594, "rewards/accuracies": 1.0, "rewards/chosen": 0.31754156947135925, "rewards/margins": 2.8022778034210205, "rewards/rejected": -2.4847359657287598, "step": 3930 }, { "epoch": 0.16, "learning_rate": 4.949637630960618e-06, "logits/chosen": -2.98836612701416, "logits/rejected": -3.010911226272583, "logps/chosen": -3.6333396434783936, "logps/rejected": -275.56121826171875, "loss": 0.0959, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27835220098495483, "rewards/margins": 2.6822876930236816, "rewards/rejected": -2.403935432434082, "step": 3940 }, { "epoch": 0.16, "learning_rate": 4.94893812399836e-06, "logits/chosen": -2.9632179737091064, "logits/rejected": -2.9858155250549316, "logps/chosen": -0.1617555022239685, "logps/rejected": -280.2940979003906, "loss": 0.0623, "rewards/accuracies": 1.0, "rewards/chosen": 0.3148265480995178, "rewards/margins": 2.7670645713806152, "rewards/rejected": -2.452238082885742, "step": 3950 }, { "epoch": 0.16, "learning_rate": 4.948233842705919e-06, "logits/chosen": -2.9652633666992188, "logits/rejected": -2.989370107650757, "logps/chosen": -2.8416929244995117, "logps/rejected": -276.65887451171875, "loss": 0.0908, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2866603434085846, "rewards/margins": 2.6980032920837402, "rewards/rejected": -2.4113430976867676, "step": 3960 }, { "epoch": 0.16, "learning_rate": 4.947524788456325e-06, "logits/chosen": -2.981260061264038, "logits/rejected": -3.004488468170166, "logps/chosen": -3.73335599899292, "logps/rejected": -280.02960205078125, "loss": 0.0957, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2791144847869873, "rewards/margins": 2.7232155799865723, "rewards/rejected": -2.444100856781006, "step": 3970 }, { "epoch": 0.16, "learning_rate": 4.946810962631916e-06, "logits/chosen": -2.9860329627990723, "logits/rejected": -3.008103132247925, "logps/chosen": -8.107025146484375, "logps/rejected": -272.87957763671875, "loss": 0.1372, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.23653869330883026, "rewards/margins": 2.610607624053955, "rewards/rejected": -2.3740692138671875, "step": 3980 }, { "epoch": 0.16, "learning_rate": 4.946092366624333e-06, "logits/chosen": -2.9852020740509033, "logits/rejected": -3.0070900917053223, "logps/chosen": -0.10410264879465103, "logps/rejected": -282.15496826171875, "loss": 0.061, "rewards/accuracies": 1.0, "rewards/chosen": 0.31658345460891724, "rewards/margins": 2.779818058013916, "rewards/rejected": -2.4632344245910645, "step": 3990 }, { "epoch": 0.16, "learning_rate": 4.9453690018345144e-06, "logits/chosen": -2.9819889068603516, "logits/rejected": -3.005112648010254, "logps/chosen": -8.758352279663086, "logps/rejected": -272.49932861328125, "loss": 0.1472, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.2308562695980072, "rewards/margins": 2.5976805686950684, "rewards/rejected": -2.366824150085449, "step": 4000 }, { "epoch": 0.16, "eval_logits/chosen": -3.0306925773620605, "eval_logits/rejected": -3.0525200366973877, "eval_logps/chosen": -0.04542647302150726, "eval_logps/rejected": -271.69378662109375, "eval_loss": 0.06974434107542038, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31614527106285095, "eval_rewards/margins": 2.6729273796081543, "eval_rewards/rejected": -2.3567824363708496, "eval_runtime": 2.5364, "eval_samples_per_second": 1.971, "eval_steps_per_second": 0.394, "step": 4000 }, { "epoch": 0.16, "learning_rate": 4.9446408696726974e-06, "logits/chosen": -2.9675230979919434, "logits/rejected": -2.989609718322754, "logps/chosen": -4.51370906829834, "logps/rejected": -278.5287170410156, "loss": 0.0827, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2719292938709259, "rewards/margins": 2.7012577056884766, "rewards/rejected": -2.429328441619873, "step": 4010 }, { "epoch": 0.16, "learning_rate": 4.943907971558414e-06, "logits/chosen": -2.98136830329895, "logits/rejected": -3.005876302719116, "logps/chosen": -2.251939535140991, "logps/rejected": -280.66668701171875, "loss": 0.0695, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.29281407594680786, "rewards/margins": 2.7459683418273926, "rewards/rejected": -2.4531540870666504, "step": 4020 }, { "epoch": 0.16, "learning_rate": 4.943170308920484e-06, "logits/chosen": -2.9719767570495605, "logits/rejected": -2.9950947761535645, "logps/chosen": -5.9595842361450195, "logps/rejected": -267.0484924316406, "loss": 0.1203, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2569754719734192, "rewards/margins": 2.574775218963623, "rewards/rejected": -2.3178000450134277, "step": 4030 }, { "epoch": 0.16, "learning_rate": 4.942427883197021e-06, "logits/chosen": -2.9850752353668213, "logits/rejected": -3.0099005699157715, "logps/chosen": -3.671910524368286, "logps/rejected": -276.7843933105469, "loss": 0.0991, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27949753403663635, "rewards/margins": 2.693117141723633, "rewards/rejected": -2.41361927986145, "step": 4040 }, { "epoch": 0.16, "learning_rate": 4.9416806958354206e-06, "logits/chosen": -2.9834470748901367, "logits/rejected": -3.0078346729278564, "logps/chosen": -1.5956265926361084, "logps/rejected": -276.3949279785156, "loss": 0.0737, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.30373793840408325, "rewards/margins": 2.710355043411255, "rewards/rejected": -2.4066174030303955, "step": 4050 }, { "epoch": 0.16, "learning_rate": 4.940928748292363e-06, "logits/chosen": -3.008810520172119, "logits/rejected": -3.0313549041748047, "logps/chosen": -4.721566200256348, "logps/rejected": -272.6415100097656, "loss": 0.1084, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.27185115218162537, "rewards/margins": 2.6411890983581543, "rewards/rejected": -2.369337797164917, "step": 4060 }, { "epoch": 0.16, "learning_rate": 4.940172042033808e-06, "logits/chosen": -3.006065845489502, "logits/rejected": -3.0285518169403076, "logps/chosen": -0.11305595934391022, "logps/rejected": -282.0202941894531, "loss": 0.0615, "rewards/accuracies": 1.0, "rewards/chosen": 0.31646549701690674, "rewards/margins": 2.7767221927642822, "rewards/rejected": -2.460256814956665, "step": 4070 }, { "epoch": 0.16, "learning_rate": 4.939410578534994e-06, "logits/chosen": -2.9962716102600098, "logits/rejected": -3.019587755203247, "logps/chosen": -0.14845162630081177, "logps/rejected": -283.7741394042969, "loss": 0.0599, "rewards/accuracies": 1.0, "rewards/chosen": 0.31613507866859436, "rewards/margins": 2.7978391647338867, "rewards/rejected": -2.481703758239746, "step": 4080 }, { "epoch": 0.16, "learning_rate": 4.938644359280433e-06, "logits/chosen": -2.990828037261963, "logits/rejected": -3.0128607749938965, "logps/chosen": -5.401320934295654, "logps/rejected": -280.354248046875, "loss": 0.0984, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2651645243167877, "rewards/margins": 2.710826873779297, "rewards/rejected": -2.445662498474121, "step": 4090 }, { "epoch": 0.16, "learning_rate": 4.937873385763909e-06, "logits/chosen": -2.9644405841827393, "logits/rejected": -2.987145185470581, "logps/chosen": -5.781490325927734, "logps/rejected": -278.20159912109375, "loss": 0.104, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2575196921825409, "rewards/margins": 2.6888415813446045, "rewards/rejected": -2.431321859359741, "step": 4100 }, { "epoch": 0.16, "eval_logits/chosen": -3.0360982418060303, "eval_logits/rejected": -3.055572271347046, "eval_logps/chosen": -14.524850845336914, "eval_logps/rejected": -269.620849609375, "eval_loss": 0.09769073873758316, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.17135103046894073, "eval_rewards/margins": 2.50740385055542, "eval_rewards/rejected": -2.336052656173706, "eval_runtime": 2.5395, "eval_samples_per_second": 1.969, "eval_steps_per_second": 0.394, "step": 4100 }, { "epoch": 0.16, "learning_rate": 4.937097659488473e-06, "logits/chosen": -2.9930789470672607, "logits/rejected": -3.0150017738342285, "logps/chosen": -3.5257439613342285, "logps/rejected": -280.61370849609375, "loss": 0.0943, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2807757556438446, "rewards/margins": 2.731001377105713, "rewards/rejected": -2.450225830078125, "step": 4110 }, { "epoch": 0.16, "learning_rate": 4.9363171819664434e-06, "logits/chosen": -2.9774835109710693, "logits/rejected": -2.998265266418457, "logps/chosen": -10.810728073120117, "logps/rejected": -267.61688232421875, "loss": 0.168, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.2063523232936859, "rewards/margins": 2.5317764282226562, "rewards/rejected": -2.3254239559173584, "step": 4120 }, { "epoch": 0.17, "learning_rate": 4.935531954719401e-06, "logits/chosen": -3.0078060626983643, "logits/rejected": -3.027414321899414, "logps/chosen": -0.12381377071142197, "logps/rejected": -285.20477294921875, "loss": 0.0589, "rewards/accuracies": 1.0, "rewards/chosen": 0.318420946598053, "rewards/margins": 2.810399055480957, "rewards/rejected": -2.491978168487549, "step": 4130 }, { "epoch": 0.17, "learning_rate": 4.934741979278188e-06, "logits/chosen": -2.9871599674224854, "logits/rejected": -3.0065948963165283, "logps/chosen": -2.280360460281372, "logps/rejected": -281.41400146484375, "loss": 0.0708, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2938382029533386, "rewards/margins": 2.7521958351135254, "rewards/rejected": -2.458357810974121, "step": 4140 }, { "epoch": 0.17, "learning_rate": 4.933947257182901e-06, "logits/chosen": -2.994497299194336, "logits/rejected": -3.0131826400756836, "logps/chosen": -1.194793701171875, "logps/rejected": -283.5026550292969, "loss": 0.061, "rewards/accuracies": 1.0, "rewards/chosen": 0.3069208562374115, "rewards/margins": 2.784928560256958, "rewards/rejected": -2.4780070781707764, "step": 4150 }, { "epoch": 0.17, "learning_rate": 4.933147789982891e-06, "logits/chosen": -3.007347583770752, "logits/rejected": -3.029618740081787, "logps/chosen": -6.041491985321045, "logps/rejected": -273.52850341796875, "loss": 0.1226, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.25814104080200195, "rewards/margins": 2.637150764465332, "rewards/rejected": -2.37900972366333, "step": 4160 }, { "epoch": 0.17, "learning_rate": 4.93234357923676e-06, "logits/chosen": -2.9873135089874268, "logits/rejected": -3.009258270263672, "logps/chosen": -5.967925548553467, "logps/rejected": -276.02484130859375, "loss": 0.12, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2577475905418396, "rewards/margins": 2.662808895111084, "rewards/rejected": -2.4050614833831787, "step": 4170 }, { "epoch": 0.17, "learning_rate": 4.931534626512359e-06, "logits/chosen": -2.970879554748535, "logits/rejected": -2.9958810806274414, "logps/chosen": -4.349549293518066, "logps/rejected": -276.65350341796875, "loss": 0.1047, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2722077965736389, "rewards/margins": 2.6848084926605225, "rewards/rejected": -2.4126007556915283, "step": 4180 }, { "epoch": 0.17, "learning_rate": 4.930720933386782e-06, "logits/chosen": -2.9964969158172607, "logits/rejected": -3.020843744277954, "logps/chosen": -1.152702808380127, "logps/rejected": -272.22271728515625, "loss": 0.0826, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.30379796028137207, "rewards/margins": 2.675860643386841, "rewards/rejected": -2.3720626831054688, "step": 4190 }, { "epoch": 0.17, "learning_rate": 4.9299025014463665e-06, "logits/chosen": -2.960857391357422, "logits/rejected": -2.9886131286621094, "logps/chosen": -3.976670026779175, "logps/rejected": -277.08013916015625, "loss": 0.103, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2779232859611511, "rewards/margins": 2.697679281234741, "rewards/rejected": -2.4197564125061035, "step": 4200 }, { "epoch": 0.17, "eval_logits/chosen": -3.0347747802734375, "eval_logits/rejected": -3.0572149753570557, "eval_logps/chosen": -0.065556600689888, "eval_logps/rejected": -264.3194274902344, "eval_loss": 0.08023140579462051, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31594395637512207, "eval_rewards/margins": 2.598982572555542, "eval_rewards/rejected": -2.28303861618042, "eval_runtime": 2.5408, "eval_samples_per_second": 1.968, "eval_steps_per_second": 0.394, "step": 4200 }, { "epoch": 0.17, "learning_rate": 4.929079332286685e-06, "logits/chosen": -3.001821994781494, "logits/rejected": -3.025146961212158, "logps/chosen": -3.40997314453125, "logps/rejected": -281.3956604003906, "loss": 0.092, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2849999666213989, "rewards/margins": 2.7431674003601074, "rewards/rejected": -2.4581680297851562, "step": 4210 }, { "epoch": 0.17, "learning_rate": 4.928251427512551e-06, "logits/chosen": -2.9911813735961914, "logits/rejected": -3.0148708820343018, "logps/chosen": -10.267712593078613, "logps/rejected": -267.4298095703125, "loss": 0.1677, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.2133229672908783, "rewards/margins": 2.5377254486083984, "rewards/rejected": -2.324402332305908, "step": 4220 }, { "epoch": 0.17, "learning_rate": 4.927418788738004e-06, "logits/chosen": -2.990753650665283, "logits/rejected": -3.017868757247925, "logps/chosen": -3.5150704383850098, "logps/rejected": -280.1457214355469, "loss": 0.0906, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.280912309885025, "rewards/margins": 2.7269985675811768, "rewards/rejected": -2.4460864067077637, "step": 4230 }, { "epoch": 0.17, "learning_rate": 4.926581417586319e-06, "logits/chosen": -2.9836466312408447, "logits/rejected": -3.007497787475586, "logps/chosen": -3.903517961502075, "logps/rejected": -278.66339111328125, "loss": 0.0971, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27795934677124023, "rewards/margins": 2.7078988552093506, "rewards/rejected": -2.4299397468566895, "step": 4240 }, { "epoch": 0.17, "learning_rate": 4.925739315689991e-06, "logits/chosen": -2.9737250804901123, "logits/rejected": -2.997143030166626, "logps/chosen": -0.7649596929550171, "logps/rejected": -278.9692687988281, "loss": 0.0707, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.3079896569252014, "rewards/margins": 2.7443230152130127, "rewards/rejected": -2.436333417892456, "step": 4250 }, { "epoch": 0.17, "learning_rate": 4.924892484690744e-06, "logits/chosen": -2.9573745727539062, "logits/rejected": -2.983161687850952, "logps/chosen": -0.08482742309570312, "logps/rejected": -281.97296142578125, "loss": 0.0615, "rewards/accuracies": 1.0, "rewards/chosen": 0.31810545921325684, "rewards/margins": 2.777606964111328, "rewards/rejected": -2.4595017433166504, "step": 4260 }, { "epoch": 0.17, "learning_rate": 4.924040926239515e-06, "logits/chosen": -2.9941678047180176, "logits/rejected": -3.0166568756103516, "logps/chosen": -5.077086448669434, "logps/rejected": -273.32696533203125, "loss": 0.1119, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2652580142021179, "rewards/margins": 2.6437580585479736, "rewards/rejected": -2.378499746322632, "step": 4270 }, { "epoch": 0.17, "learning_rate": 4.923184641996463e-06, "logits/chosen": -2.9775445461273193, "logits/rejected": -2.999530553817749, "logps/chosen": -4.1812357902526855, "logps/rejected": -276.0430908203125, "loss": 0.1029, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2740359306335449, "rewards/margins": 2.6814522743225098, "rewards/rejected": -2.407416820526123, "step": 4280 }, { "epoch": 0.17, "learning_rate": 4.922323633630957e-06, "logits/chosen": -3.0078136920928955, "logits/rejected": -3.029372453689575, "logps/chosen": -1.1529546976089478, "logps/rejected": -276.13970947265625, "loss": 0.0739, "rewards/accuracies": 1.0, "rewards/chosen": 0.303530216217041, "rewards/margins": 2.708925247192383, "rewards/rejected": -2.405395030975342, "step": 4290 }, { "epoch": 0.17, "learning_rate": 4.921457902821578e-06, "logits/chosen": -2.9911365509033203, "logits/rejected": -3.0156967639923096, "logps/chosen": -3.698829174041748, "logps/rejected": -282.1670227050781, "loss": 0.0939, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2838190793991089, "rewards/margins": 2.7502548694610596, "rewards/rejected": -2.4664359092712402, "step": 4300 }, { "epoch": 0.17, "eval_logits/chosen": -3.0352158546447754, "eval_logits/rejected": -3.05765438079834, "eval_logps/chosen": -0.04915366321802139, "eval_logps/rejected": -271.68511962890625, "eval_loss": 0.07048001140356064, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31610798835754395, "eval_rewards/margins": 2.6728038787841797, "eval_rewards/rejected": -2.3566956520080566, "eval_runtime": 2.5426, "eval_samples_per_second": 1.967, "eval_steps_per_second": 0.393, "step": 4300 }, { "epoch": 0.17, "learning_rate": 4.920587451256112e-06, "logits/chosen": -3.0039775371551514, "logits/rejected": -3.0264999866485596, "logps/chosen": -5.998003005981445, "logps/rejected": -275.0284118652344, "loss": 0.121, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2552075982093811, "rewards/margins": 2.6534574031829834, "rewards/rejected": -2.398249864578247, "step": 4310 }, { "epoch": 0.17, "learning_rate": 4.919712280631547e-06, "logits/chosen": -2.991525650024414, "logits/rejected": -3.0137996673583984, "logps/chosen": -0.08088923245668411, "logps/rejected": -283.62054443359375, "loss": 0.0599, "rewards/accuracies": 1.0, "rewards/chosen": 0.3152411878108978, "rewards/margins": 2.799522638320923, "rewards/rejected": -2.484281539916992, "step": 4320 }, { "epoch": 0.17, "learning_rate": 4.918832392654075e-06, "logits/chosen": -3.012331485748291, "logits/rejected": -3.034796953201294, "logps/chosen": -3.651451826095581, "logps/rejected": -279.5082702636719, "loss": 0.0958, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2853643298149109, "rewards/margins": 2.7241337299346924, "rewards/rejected": -2.4387691020965576, "step": 4330 }, { "epoch": 0.17, "learning_rate": 4.9179477890390825e-06, "logits/chosen": -2.979299545288086, "logits/rejected": -3.002322196960449, "logps/chosen": -2.288792133331299, "logps/rejected": -281.71734619140625, "loss": 0.0686, "rewards/accuracies": 1.0, "rewards/chosen": 0.2919761538505554, "rewards/margins": 2.7555465698242188, "rewards/rejected": -2.4635708332061768, "step": 4340 }, { "epoch": 0.17, "learning_rate": 4.917058471511149e-06, "logits/chosen": -2.9899723529815674, "logits/rejected": -3.0142300128936768, "logps/chosen": -8.69981861114502, "logps/rejected": -273.12567138671875, "loss": 0.1466, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.23039576411247253, "rewards/margins": 2.609924793243408, "rewards/rejected": -2.3795289993286133, "step": 4350 }, { "epoch": 0.17, "learning_rate": 4.916164441804044e-06, "logits/chosen": -2.972620964050293, "logits/rejected": -2.9985337257385254, "logps/chosen": -2.4511449337005615, "logps/rejected": -275.63311767578125, "loss": 0.0876, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.29330989718437195, "rewards/margins": 2.694218635559082, "rewards/rejected": -2.4009087085723877, "step": 4360 }, { "epoch": 0.17, "learning_rate": 4.915265701660726e-06, "logits/chosen": -2.9857442378997803, "logits/rejected": -3.004915475845337, "logps/chosen": -5.930447578430176, "logps/rejected": -277.4071044921875, "loss": 0.1183, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.25754064321517944, "rewards/margins": 2.6772637367248535, "rewards/rejected": -2.4197230339050293, "step": 4370 }, { "epoch": 0.18, "learning_rate": 4.914362252833332e-06, "logits/chosen": -3.0059263706207275, "logits/rejected": -3.0300426483154297, "logps/chosen": -7.20613956451416, "logps/rejected": -278.37200927734375, "loss": 0.1292, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2445591390132904, "rewards/margins": 2.6769416332244873, "rewards/rejected": -2.432382345199585, "step": 4380 }, { "epoch": 0.18, "learning_rate": 4.913454097083185e-06, "logits/chosen": -2.988520622253418, "logits/rejected": -3.014064311981201, "logps/chosen": -0.3004329800605774, "logps/rejected": -279.99322509765625, "loss": 0.0631, "rewards/accuracies": 1.0, "rewards/chosen": 0.31510981917381287, "rewards/margins": 2.7579398155212402, "rewards/rejected": -2.4428305625915527, "step": 4390 }, { "epoch": 0.18, "learning_rate": 4.912541236180779e-06, "logits/chosen": -2.982120990753174, "logits/rejected": -3.0076794624328613, "logps/chosen": -3.663050413131714, "logps/rejected": -276.02667236328125, "loss": 0.1012, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27947360277175903, "rewards/margins": 2.6872124671936035, "rewards/rejected": -2.40773868560791, "step": 4400 }, { "epoch": 0.18, "eval_logits/chosen": -3.0388031005859375, "eval_logits/rejected": -3.0618317127227783, "eval_logps/chosen": -0.05255429074168205, "eval_logps/rejected": -273.3020935058594, "eval_loss": 0.06874220818281174, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.316074013710022, "eval_rewards/margins": 2.688939094543457, "eval_rewards/rejected": -2.3728652000427246, "eval_runtime": 2.5363, "eval_samples_per_second": 1.971, "eval_steps_per_second": 0.394, "step": 4400 }, { "epoch": 0.18, "learning_rate": 4.911623671905784e-06, "logits/chosen": -2.944742441177368, "logits/rejected": -2.9719698429107666, "logps/chosen": -1.7331855297088623, "logps/rejected": -281.08526611328125, "loss": 0.0757, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.30119362473487854, "rewards/margins": 2.7548928260803223, "rewards/rejected": -2.4536991119384766, "step": 4410 }, { "epoch": 0.18, "learning_rate": 4.910701406047037e-06, "logits/chosen": -3.000967502593994, "logits/rejected": -3.0256519317626953, "logps/chosen": -0.14429742097854614, "logps/rejected": -280.50982666015625, "loss": 0.0633, "rewards/accuracies": 1.0, "rewards/chosen": 0.3173660337924957, "rewards/margins": 2.7619094848632812, "rewards/rejected": -2.4445431232452393, "step": 4420 }, { "epoch": 0.18, "learning_rate": 4.9097744404025435e-06, "logits/chosen": -2.9809985160827637, "logits/rejected": -3.001534938812256, "logps/chosen": -3.514406204223633, "logps/rejected": -275.1272277832031, "loss": 0.098, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27890485525131226, "rewards/margins": 2.6789252758026123, "rewards/rejected": -2.4000203609466553, "step": 4430 }, { "epoch": 0.18, "learning_rate": 4.908842776779472e-06, "logits/chosen": -2.9804189205169678, "logits/rejected": -3.003782033920288, "logps/chosen": -3.6422533988952637, "logps/rejected": -277.69317626953125, "loss": 0.0968, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27874499559402466, "rewards/margins": 2.706242799758911, "rewards/rejected": -2.427497625350952, "step": 4440 }, { "epoch": 0.18, "learning_rate": 4.907906416994146e-06, "logits/chosen": -2.982748508453369, "logits/rejected": -3.007615566253662, "logps/chosen": -0.4491070806980133, "logps/rejected": -278.43121337890625, "loss": 0.0659, "rewards/accuracies": 1.0, "rewards/chosen": 0.3135746419429779, "rewards/margins": 2.739652156829834, "rewards/rejected": -2.4260776042938232, "step": 4450 }, { "epoch": 0.18, "learning_rate": 4.906965362872048e-06, "logits/chosen": -2.9515037536621094, "logits/rejected": -2.9763808250427246, "logps/chosen": -8.938207626342773, "logps/rejected": -275.2086181640625, "loss": 0.1362, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.22682595252990723, "rewards/margins": 2.625018358230591, "rewards/rejected": -2.3981924057006836, "step": 4460 }, { "epoch": 0.18, "learning_rate": 4.90601961624781e-06, "logits/chosen": -2.985548734664917, "logits/rejected": -3.008352041244507, "logps/chosen": -2.627971887588501, "logps/rejected": -278.2849426269531, "loss": 0.0841, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.29335495829582214, "rewards/margins": 2.7125024795532227, "rewards/rejected": -2.419147491455078, "step": 4470 }, { "epoch": 0.18, "learning_rate": 4.905069178965215e-06, "logits/chosen": -2.9803738594055176, "logits/rejected": -3.0033059120178223, "logps/chosen": -2.482811689376831, "logps/rejected": -278.851806640625, "loss": 0.0854, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2920432388782501, "rewards/margins": 2.7258405685424805, "rewards/rejected": -2.4337971210479736, "step": 4480 }, { "epoch": 0.18, "learning_rate": 4.904114052877189e-06, "logits/chosen": -2.9826438426971436, "logits/rejected": -3.0044639110565186, "logps/chosen": -4.127838134765625, "logps/rejected": -273.6899719238281, "loss": 0.1021, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2744656205177307, "rewards/margins": 2.6585028171539307, "rewards/rejected": -2.384037494659424, "step": 4490 }, { "epoch": 0.18, "learning_rate": 4.903154239845798e-06, "logits/chosen": -2.9969704151153564, "logits/rejected": -3.0216715335845947, "logps/chosen": -4.0495500564575195, "logps/rejected": -283.6988525390625, "loss": 0.0928, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2765844762325287, "rewards/margins": 2.7642874717712402, "rewards/rejected": -2.4877030849456787, "step": 4500 }, { "epoch": 0.18, "eval_logits/chosen": -3.0422379970550537, "eval_logits/rejected": -3.064002275466919, "eval_logps/chosen": -0.06640340387821198, "eval_logps/rejected": -274.23468017578125, "eval_loss": 0.06780683249235153, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31593549251556396, "eval_rewards/margins": 2.6981265544891357, "eval_rewards/rejected": -2.3821911811828613, "eval_runtime": 2.5406, "eval_samples_per_second": 1.968, "eval_steps_per_second": 0.394, "step": 4500 }, { "epoch": 0.18, "learning_rate": 4.902189741742247e-06, "logits/chosen": -3.0133230686187744, "logits/rejected": -3.034255266189575, "logps/chosen": -7.239530086517334, "logps/rejected": -274.60699462890625, "loss": 0.1326, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2435760498046875, "rewards/margins": 2.636695146560669, "rewards/rejected": -2.3931186199188232, "step": 4510 }, { "epoch": 0.18, "learning_rate": 4.901220560446875e-06, "logits/chosen": -3.0098090171813965, "logits/rejected": -3.033118724822998, "logps/chosen": -2.7310898303985596, "logps/rejected": -278.35089111328125, "loss": 0.0878, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.29081740975379944, "rewards/margins": 2.721912145614624, "rewards/rejected": -2.4310948848724365, "step": 4520 }, { "epoch": 0.18, "learning_rate": 4.90024669784915e-06, "logits/chosen": -2.988487720489502, "logits/rejected": -3.011195182800293, "logps/chosen": -3.551788330078125, "logps/rejected": -281.0076599121094, "loss": 0.094, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28055211901664734, "rewards/margins": 2.73574161529541, "rewards/rejected": -2.4551899433135986, "step": 4530 }, { "epoch": 0.18, "learning_rate": 4.899268155847667e-06, "logits/chosen": -2.9622600078582764, "logits/rejected": -2.985560417175293, "logps/chosen": -0.2938328981399536, "logps/rejected": -281.53057861328125, "loss": 0.0623, "rewards/accuracies": 1.0, "rewards/chosen": 0.3128475844860077, "rewards/margins": 2.7748546600341797, "rewards/rejected": -2.4620070457458496, "step": 4540 }, { "epoch": 0.18, "learning_rate": 4.898284936350144e-06, "logits/chosen": -2.9653756618499756, "logits/rejected": -2.989915609359741, "logps/chosen": -0.8320473432540894, "logps/rejected": -277.2349853515625, "loss": 0.0662, "rewards/accuracies": 1.0, "rewards/chosen": 0.30463629961013794, "rewards/margins": 2.7275753021240234, "rewards/rejected": -2.4229390621185303, "step": 4550 }, { "epoch": 0.18, "learning_rate": 4.8972970412734174e-06, "logits/chosen": -2.981529712677002, "logits/rejected": -3.0042641162872314, "logps/chosen": -3.6343746185302734, "logps/rejected": -278.56842041015625, "loss": 0.0966, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.281321257352829, "rewards/margins": 2.7121989727020264, "rewards/rejected": -2.430877208709717, "step": 4560 }, { "epoch": 0.18, "learning_rate": 4.89630447254344e-06, "logits/chosen": -2.9801342487335205, "logits/rejected": -3.003708839416504, "logps/chosen": -0.7719541788101196, "logps/rejected": -282.6393127441406, "loss": 0.0614, "rewards/accuracies": 1.0, "rewards/chosen": 0.30780893564224243, "rewards/margins": 2.7808825969696045, "rewards/rejected": -2.473073720932007, "step": 4570 }, { "epoch": 0.18, "learning_rate": 4.895307232095275e-06, "logits/chosen": -3.006650447845459, "logits/rejected": -3.030689001083374, "logps/chosen": -11.427572250366211, "logps/rejected": -270.5079040527344, "loss": 0.1755, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.2005116194486618, "rewards/margins": 2.5560355186462402, "rewards/rejected": -2.3555240631103516, "step": 4580 }, { "epoch": 0.18, "learning_rate": 4.894305321873092e-06, "logits/chosen": -2.9859251976013184, "logits/rejected": -3.013094902038574, "logps/chosen": -0.12001538276672363, "logps/rejected": -283.48370361328125, "loss": 0.0599, "rewards/accuracies": 1.0, "rewards/chosen": 0.3139466643333435, "rewards/margins": 2.797086238861084, "rewards/rejected": -2.4831395149230957, "step": 4590 }, { "epoch": 0.18, "learning_rate": 4.893298743830168e-06, "logits/chosen": -2.995144844055176, "logits/rejected": -3.019346237182617, "logps/chosen": -2.3526852130889893, "logps/rejected": -275.88751220703125, "loss": 0.092, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.29430514574050903, "rewards/margins": 2.696133852005005, "rewards/rejected": -2.4018287658691406, "step": 4600 }, { "epoch": 0.18, "eval_logits/chosen": -3.039649248123169, "eval_logits/rejected": -3.062656879425049, "eval_logps/chosen": -0.04716275632381439, "eval_logps/rejected": -273.8116149902344, "eval_loss": 0.0680815726518631, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3161279261112213, "eval_rewards/margins": 2.6940884590148926, "eval_rewards/rejected": -2.377960443496704, "eval_runtime": 2.5452, "eval_samples_per_second": 1.965, "eval_steps_per_second": 0.393, "step": 4600 }, { "epoch": 0.18, "learning_rate": 4.892287499928879e-06, "logits/chosen": -2.952566146850586, "logits/rejected": -2.975142002105713, "logps/chosen": -3.4622931480407715, "logps/rejected": -270.6771240234375, "loss": 0.1078, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2811293601989746, "rewards/margins": 2.6372270584106445, "rewards/rejected": -2.35609769821167, "step": 4610 }, { "epoch": 0.18, "learning_rate": 4.891271592140695e-06, "logits/chosen": -2.978227138519287, "logits/rejected": -3.0049753189086914, "logps/chosen": -5.08248233795166, "logps/rejected": -269.8345031738281, "loss": 0.1238, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.26546168327331543, "rewards/margins": 2.6124260425567627, "rewards/rejected": -2.3469643592834473, "step": 4620 }, { "epoch": 0.19, "learning_rate": 4.890251022446181e-06, "logits/chosen": -2.996875286102295, "logits/rejected": -3.021876335144043, "logps/chosen": -1.287255883216858, "logps/rejected": -279.2198181152344, "loss": 0.0777, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.3058275878429413, "rewards/margins": 2.742849826812744, "rewards/rejected": -2.4370219707489014, "step": 4630 }, { "epoch": 0.19, "learning_rate": 4.889225792834991e-06, "logits/chosen": -2.9709343910217285, "logits/rejected": -2.9995503425598145, "logps/chosen": -0.1349790096282959, "logps/rejected": -281.5534973144531, "loss": 0.0658, "rewards/accuracies": 1.0, "rewards/chosen": 0.3138509690761566, "rewards/margins": 2.77821683883667, "rewards/rejected": -2.4643654823303223, "step": 4640 }, { "epoch": 0.19, "learning_rate": 4.888195905305859e-06, "logits/chosen": -2.985363721847534, "logits/rejected": -3.011430501937866, "logps/chosen": -0.3180665075778961, "logps/rejected": -281.0392150878906, "loss": 0.0663, "rewards/accuracies": 1.0, "rewards/chosen": 0.31554514169692993, "rewards/margins": 2.7668635845184326, "rewards/rejected": -2.4513182640075684, "step": 4650 }, { "epoch": 0.19, "learning_rate": 4.887161361866608e-06, "logits/chosen": -2.9892563819885254, "logits/rejected": -3.0150394439697266, "logps/chosen": -2.850008010864258, "logps/rejected": -281.0337219238281, "loss": 0.0868, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28933706879615784, "rewards/margins": 2.7467386722564697, "rewards/rejected": -2.4574015140533447, "step": 4660 }, { "epoch": 0.19, "learning_rate": 4.8861221645341305e-06, "logits/chosen": -2.9736623764038086, "logits/rejected": -2.998011350631714, "logps/chosen": -8.769153594970703, "logps/rejected": -274.9989013671875, "loss": 0.1465, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.23189444839954376, "rewards/margins": 2.6254897117614746, "rewards/rejected": -2.3935952186584473, "step": 4670 }, { "epoch": 0.19, "learning_rate": 4.885078315334395e-06, "logits/chosen": -2.996387004852295, "logits/rejected": -3.02105712890625, "logps/chosen": -10.883025169372559, "logps/rejected": -270.528564453125, "loss": 0.1694, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.21006667613983154, "rewards/margins": 2.560932159423828, "rewards/rejected": -2.350865364074707, "step": 4680 }, { "epoch": 0.19, "learning_rate": 4.884029816302441e-06, "logits/chosen": -3.0012941360473633, "logits/rejected": -3.0264530181884766, "logps/chosen": -4.2471113204956055, "logps/rejected": -275.428466796875, "loss": 0.1067, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2758389711380005, "rewards/margins": 2.676461696624756, "rewards/rejected": -2.400622844696045, "step": 4690 }, { "epoch": 0.19, "learning_rate": 4.882976669482368e-06, "logits/chosen": -3.013587713241577, "logits/rejected": -3.036530017852783, "logps/chosen": -3.4292125701904297, "logps/rejected": -276.2977600097656, "loss": 0.0966, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28248825669288635, "rewards/margins": 2.691143035888672, "rewards/rejected": -2.4086549282073975, "step": 4700 }, { "epoch": 0.19, "eval_logits/chosen": -3.0393896102905273, "eval_logits/rejected": -3.063404083251953, "eval_logps/chosen": -0.0664147213101387, "eval_logps/rejected": -273.9405212402344, "eval_loss": 0.06849328428506851, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3159353733062744, "eval_rewards/margins": 2.6951849460601807, "eval_rewards/rejected": -2.3792498111724854, "eval_runtime": 2.5429, "eval_samples_per_second": 1.966, "eval_steps_per_second": 0.393, "step": 4700 }, { "epoch": 0.19, "learning_rate": 4.881918876927342e-06, "logits/chosen": -3.0057754516601562, "logits/rejected": -3.030545711517334, "logps/chosen": -6.398134708404541, "logps/rejected": -276.6763916015625, "loss": 0.1224, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2547248899936676, "rewards/margins": 2.6687448024749756, "rewards/rejected": -2.414019823074341, "step": 4710 }, { "epoch": 0.19, "learning_rate": 4.880856440699582e-06, "logits/chosen": -2.987400770187378, "logits/rejected": -3.0132596492767334, "logps/chosen": -0.18555238842964172, "logps/rejected": -284.5752258300781, "loss": 0.0594, "rewards/accuracies": 1.0, "rewards/chosen": 0.3150274455547333, "rewards/margins": 2.8060688972473145, "rewards/rejected": -2.491041421890259, "step": 4720 }, { "epoch": 0.19, "learning_rate": 4.879789362870363e-06, "logits/chosen": -2.9657535552978516, "logits/rejected": -2.9925484657287598, "logps/chosen": -0.15070989727973938, "logps/rejected": -280.6770935058594, "loss": 0.0642, "rewards/accuracies": 1.0, "rewards/chosen": 0.314454048871994, "rewards/margins": 2.765815019607544, "rewards/rejected": -2.4513611793518066, "step": 4730 }, { "epoch": 0.19, "learning_rate": 4.878717645520008e-06, "logits/chosen": -2.9852190017700195, "logits/rejected": -3.010143518447876, "logps/chosen": -4.698237419128418, "logps/rejected": -279.5423583984375, "loss": 0.0998, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27004730701446533, "rewards/margins": 2.712459087371826, "rewards/rejected": -2.4424118995666504, "step": 4740 }, { "epoch": 0.19, "learning_rate": 4.8776412907378845e-06, "logits/chosen": -2.995004653930664, "logits/rejected": -3.0204148292541504, "logps/chosen": -3.671159267425537, "logps/rejected": -283.57763671875, "loss": 0.0933, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2796114981174469, "rewards/margins": 2.7643332481384277, "rewards/rejected": -2.4847218990325928, "step": 4750 }, { "epoch": 0.19, "learning_rate": 4.8765603006224e-06, "logits/chosen": -2.943943500518799, "logits/rejected": -2.9706056118011475, "logps/chosen": -3.652454376220703, "logps/rejected": -276.9048156738281, "loss": 0.0986, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2784743905067444, "rewards/margins": 2.6926753520965576, "rewards/rejected": -2.414201021194458, "step": 4760 }, { "epoch": 0.19, "learning_rate": 4.875474677281003e-06, "logits/chosen": -2.977837324142456, "logits/rejected": -3.0043294429779053, "logps/chosen": -0.16753363609313965, "logps/rejected": -285.61090087890625, "loss": 0.0589, "rewards/accuracies": 1.0, "rewards/chosen": 0.3150641918182373, "rewards/margins": 2.816174030303955, "rewards/rejected": -2.5011098384857178, "step": 4770 }, { "epoch": 0.19, "learning_rate": 4.8743844228301676e-06, "logits/chosen": -2.9988725185394287, "logits/rejected": -3.023986339569092, "logps/chosen": -2.0301413536071777, "logps/rejected": -285.46099853515625, "loss": 0.0751, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.30019980669021606, "rewards/margins": 2.792390823364258, "rewards/rejected": -2.4921913146972656, "step": 4780 }, { "epoch": 0.19, "learning_rate": 4.873289539395404e-06, "logits/chosen": -2.9917759895324707, "logits/rejected": -3.0166802406311035, "logps/chosen": -2.317474842071533, "logps/rejected": -283.35784912109375, "loss": 0.0791, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2927587628364563, "rewards/margins": 2.774296283721924, "rewards/rejected": -2.4815375804901123, "step": 4790 }, { "epoch": 0.19, "learning_rate": 4.8721900291112415e-06, "logits/chosen": -3.014835834503174, "logits/rejected": -3.0397868156433105, "logps/chosen": -0.8680275678634644, "logps/rejected": -285.16680908203125, "loss": 0.0605, "rewards/accuracies": 1.0, "rewards/chosen": 0.3080899119377136, "rewards/margins": 2.803882122039795, "rewards/rejected": -2.4957923889160156, "step": 4800 }, { "epoch": 0.19, "eval_logits/chosen": -3.04407000541687, "eval_logits/rejected": -3.065844774246216, "eval_logps/chosen": -0.3556212782859802, "eval_logps/rejected": -274.81866455078125, "eval_loss": 0.06790484488010406, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3130432963371277, "eval_rewards/margins": 2.7010741233825684, "eval_rewards/rejected": -2.388031005859375, "eval_runtime": 2.5499, "eval_samples_per_second": 1.961, "eval_steps_per_second": 0.392, "step": 4800 }, { "epoch": 0.19, "learning_rate": 4.871085894121234e-06, "logits/chosen": -3.002750873565674, "logits/rejected": -3.0290141105651855, "logps/chosen": -1.7312772274017334, "logps/rejected": -280.16583251953125, "loss": 0.0699, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.29865798354148865, "rewards/margins": 2.743295907974243, "rewards/rejected": -2.4446380138397217, "step": 4810 }, { "epoch": 0.19, "learning_rate": 4.869977136577946e-06, "logits/chosen": -2.993488073348999, "logits/rejected": -3.0200881958007812, "logps/chosen": -2.682328224182129, "logps/rejected": -283.029052734375, "loss": 0.0836, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2927404046058655, "rewards/margins": 2.7652904987335205, "rewards/rejected": -2.4725499153137207, "step": 4820 }, { "epoch": 0.19, "learning_rate": 4.86886375864296e-06, "logits/chosen": -2.9731221199035645, "logits/rejected": -3.0015082359313965, "logps/chosen": -0.21205072104930878, "logps/rejected": -284.33477783203125, "loss": 0.0598, "rewards/accuracies": 1.0, "rewards/chosen": 0.3150973618030548, "rewards/margins": 2.8044662475585938, "rewards/rejected": -2.4893689155578613, "step": 4830 }, { "epoch": 0.19, "learning_rate": 4.867745762486862e-06, "logits/chosen": -3.0330519676208496, "logits/rejected": -3.0582799911499023, "logps/chosen": -0.09151200950145721, "logps/rejected": -285.94061279296875, "loss": 0.0585, "rewards/accuracies": 1.0, "rewards/chosen": 0.31810110807418823, "rewards/margins": 2.8224077224731445, "rewards/rejected": -2.5043065547943115, "step": 4840 }, { "epoch": 0.19, "learning_rate": 4.866623150289241e-06, "logits/chosen": -2.991901397705078, "logits/rejected": -3.0165717601776123, "logps/chosen": -0.5179120302200317, "logps/rejected": -281.9978942871094, "loss": 0.063, "rewards/accuracies": 1.0, "rewards/chosen": 0.3092666268348694, "rewards/margins": 2.773923397064209, "rewards/rejected": -2.4646568298339844, "step": 4850 }, { "epoch": 0.19, "learning_rate": 4.86549592423869e-06, "logits/chosen": -2.9918746948242188, "logits/rejected": -3.0160465240478516, "logps/chosen": -1.8998520374298096, "logps/rejected": -277.83349609375, "loss": 0.0816, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.30054348707199097, "rewards/margins": 2.721442699432373, "rewards/rejected": -2.4208991527557373, "step": 4860 }, { "epoch": 0.19, "learning_rate": 4.864364086532792e-06, "logits/chosen": -2.979214668273926, "logits/rejected": -3.0035386085510254, "logps/chosen": -2.2428476810455322, "logps/rejected": -279.65313720703125, "loss": 0.078, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.29469600319862366, "rewards/margins": 2.7362239360809326, "rewards/rejected": -2.441528081893921, "step": 4870 }, { "epoch": 0.2, "learning_rate": 4.863227639378124e-06, "logits/chosen": -2.967115879058838, "logits/rejected": -2.990504026412964, "logps/chosen": -5.4245405197143555, "logps/rejected": -279.6117858886719, "loss": 0.096, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2630707323551178, "rewards/margins": 2.7051453590393066, "rewards/rejected": -2.442074775695801, "step": 4880 }, { "epoch": 0.2, "learning_rate": 4.862086584990246e-06, "logits/chosen": -3.0168333053588867, "logits/rejected": -3.0386242866516113, "logps/chosen": -2.701259136199951, "logps/rejected": -280.4543762207031, "loss": 0.0843, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28953248262405396, "rewards/margins": 2.7428059577941895, "rewards/rejected": -2.453273296356201, "step": 4890 }, { "epoch": 0.2, "learning_rate": 4.860940925593703e-06, "logits/chosen": -3.008188009262085, "logits/rejected": -3.0259790420532227, "logps/chosen": -5.6571526527404785, "logps/rejected": -280.8555603027344, "loss": 0.1031, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2598158121109009, "rewards/margins": 2.7140612602233887, "rewards/rejected": -2.4542458057403564, "step": 4900 }, { "epoch": 0.2, "eval_logits/chosen": -3.053445339202881, "eval_logits/rejected": -3.0700230598449707, "eval_logps/chosen": -6.206935405731201, "eval_logps/rejected": -275.163330078125, "eval_loss": 0.07480038702487946, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.2545301616191864, "eval_rewards/margins": 2.646008014678955, "eval_rewards/rejected": -2.3914778232574463, "eval_runtime": 2.5453, "eval_samples_per_second": 1.964, "eval_steps_per_second": 0.393, "step": 4900 }, { "epoch": 0.2, "learning_rate": 4.8597906634220165e-06, "logits/chosen": -3.0064408779144287, "logits/rejected": -3.0268728733062744, "logps/chosen": -10.893194198608398, "logps/rejected": -271.6018981933594, "loss": 0.1676, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.20762237906455994, "rewards/margins": 2.573477268218994, "rewards/rejected": -2.3658547401428223, "step": 4910 }, { "epoch": 0.2, "learning_rate": 4.8586358007176815e-06, "logits/chosen": -2.9728705883026123, "logits/rejected": -3.0026345252990723, "logps/chosen": -2.9918735027313232, "logps/rejected": -283.22674560546875, "loss": 0.0859, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28382521867752075, "rewards/margins": 2.764925479888916, "rewards/rejected": -2.481100082397461, "step": 4920 }, { "epoch": 0.2, "learning_rate": 4.857476339732162e-06, "logits/chosen": -2.9785854816436768, "logits/rejected": -3.004824161529541, "logps/chosen": -0.10504801571369171, "logps/rejected": -284.8758544921875, "loss": 0.0594, "rewards/accuracies": 1.0, "rewards/chosen": 0.3161746859550476, "rewards/margins": 2.808763265609741, "rewards/rejected": -2.492588520050049, "step": 4930 }, { "epoch": 0.2, "learning_rate": 4.856312282725886e-06, "logits/chosen": -2.9904375076293945, "logits/rejected": -3.0144543647766113, "logps/chosen": -1.903324842453003, "logps/rejected": -281.33392333984375, "loss": 0.0771, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2979467511177063, "rewards/margins": 2.7567648887634277, "rewards/rejected": -2.458817958831787, "step": 4940 }, { "epoch": 0.2, "learning_rate": 4.855143631968242e-06, "logits/chosen": -3.021012306213379, "logits/rejected": -3.045344591140747, "logps/chosen": -2.6018805503845215, "logps/rejected": -280.78521728515625, "loss": 0.0855, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2920997440814972, "rewards/margins": 2.743265151977539, "rewards/rejected": -2.451164960861206, "step": 4950 }, { "epoch": 0.2, "learning_rate": 4.853970389737576e-06, "logits/chosen": -3.000765085220337, "logits/rejected": -3.02725887298584, "logps/chosen": -3.7380614280700684, "logps/rejected": -282.67205810546875, "loss": 0.094, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2796979546546936, "rewards/margins": 2.753054618835449, "rewards/rejected": -2.4733564853668213, "step": 4960 }, { "epoch": 0.2, "learning_rate": 4.852792558321182e-06, "logits/chosen": -3.026385545730591, "logits/rejected": -3.0487587451934814, "logps/chosen": -2.840367317199707, "logps/rejected": -282.5412902832031, "loss": 0.0859, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2881172299385071, "rewards/margins": 2.7585530281066895, "rewards/rejected": -2.470435857772827, "step": 4970 }, { "epoch": 0.2, "learning_rate": 4.8516101400153036e-06, "logits/chosen": -3.0124380588531494, "logits/rejected": -3.038508653640747, "logps/chosen": -0.16468851268291473, "logps/rejected": -285.6763916015625, "loss": 0.0589, "rewards/accuracies": 1.0, "rewards/chosen": 0.3163270354270935, "rewards/margins": 2.817246198654175, "rewards/rejected": -2.5009191036224365, "step": 4980 }, { "epoch": 0.2, "learning_rate": 4.850423137125126e-06, "logits/chosen": -3.0090136528015137, "logits/rejected": -3.034686326980591, "logps/chosen": -0.12559811770915985, "logps/rejected": -287.38189697265625, "loss": 0.0576, "rewards/accuracies": 1.0, "rewards/chosen": 0.317909300327301, "rewards/margins": 2.833775043487549, "rewards/rejected": -2.5158660411834717, "step": 4990 }, { "epoch": 0.2, "learning_rate": 4.849231551964771e-06, "logits/chosen": -2.967095375061035, "logits/rejected": -2.9942245483398438, "logps/chosen": -3.249340772628784, "logps/rejected": -278.35845947265625, "loss": 0.093, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28275883197784424, "rewards/margins": 2.7163405418395996, "rewards/rejected": -2.433581590652466, "step": 5000 }, { "epoch": 0.2, "eval_logits/chosen": -3.046325922012329, "eval_logits/rejected": -3.0696637630462646, "eval_logps/chosen": -0.06915195286273956, "eval_logps/rejected": -275.6590576171875, "eval_loss": 0.06696515530347824, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3159080147743225, "eval_rewards/margins": 2.7123429775238037, "eval_rewards/rejected": -2.396435022354126, "eval_runtime": 2.5346, "eval_samples_per_second": 1.973, "eval_steps_per_second": 0.395, "step": 5000 }, { "epoch": 0.2, "learning_rate": 4.848035386857296e-06, "logits/chosen": -2.988198757171631, "logits/rejected": -3.0140597820281982, "logps/chosen": -0.14891251921653748, "logps/rejected": -286.2432556152344, "loss": 0.0586, "rewards/accuracies": 1.0, "rewards/chosen": 0.3167990446090698, "rewards/margins": 2.819549083709717, "rewards/rejected": -2.5027499198913574, "step": 5010 }, { "epoch": 0.2, "learning_rate": 4.846834644134686e-06, "logits/chosen": -2.991053819656372, "logits/rejected": -3.0181632041931152, "logps/chosen": -5.708492279052734, "logps/rejected": -274.8403015136719, "loss": 0.1213, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2585825026035309, "rewards/margins": 2.6554694175720215, "rewards/rejected": -2.3968870639801025, "step": 5020 }, { "epoch": 0.2, "learning_rate": 4.845629326137849e-06, "logits/chosen": -2.987603187561035, "logits/rejected": -3.0143887996673584, "logps/chosen": -0.33898723125457764, "logps/rejected": -281.35919189453125, "loss": 0.0655, "rewards/accuracies": 1.0, "rewards/chosen": 0.31379902362823486, "rewards/margins": 2.7733383178710938, "rewards/rejected": -2.4595389366149902, "step": 5030 }, { "epoch": 0.2, "learning_rate": 4.844419435216615e-06, "logits/chosen": -3.0103983879089355, "logits/rejected": -3.0364389419555664, "logps/chosen": -3.731510639190674, "logps/rejected": -282.48077392578125, "loss": 0.0945, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2792286276817322, "rewards/margins": 2.7528929710388184, "rewards/rejected": -2.4736645221710205, "step": 5040 }, { "epoch": 0.2, "learning_rate": 4.84320497372973e-06, "logits/chosen": -3.0130934715270996, "logits/rejected": -3.035919189453125, "logps/chosen": -6.770234107971191, "logps/rejected": -276.83367919921875, "loss": 0.1268, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.25048890709877014, "rewards/margins": 2.659921646118164, "rewards/rejected": -2.4094326496124268, "step": 5050 }, { "epoch": 0.2, "learning_rate": 4.841985944044845e-06, "logits/chosen": -3.027770519256592, "logits/rejected": -3.0496394634246826, "logps/chosen": -2.190147876739502, "logps/rejected": -279.42608642578125, "loss": 0.0778, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.29465436935424805, "rewards/margins": 2.7372045516967773, "rewards/rejected": -2.4425501823425293, "step": 5060 }, { "epoch": 0.2, "learning_rate": 4.840762348538524e-06, "logits/chosen": -3.023709297180176, "logits/rejected": -3.047478199005127, "logps/chosen": -4.115225315093994, "logps/rejected": -272.94268798828125, "loss": 0.108, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.27587980031967163, "rewards/margins": 2.6491522789001465, "rewards/rejected": -2.37327241897583, "step": 5070 }, { "epoch": 0.2, "learning_rate": 4.839534189596228e-06, "logits/chosen": -2.9944915771484375, "logits/rejected": -3.0241336822509766, "logps/chosen": -3.7421340942382812, "logps/rejected": -273.23553466796875, "loss": 0.1087, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2796842157840729, "rewards/margins": 2.655867338180542, "rewards/rejected": -2.376183271408081, "step": 5080 }, { "epoch": 0.2, "learning_rate": 4.838301469612315e-06, "logits/chosen": -2.9738142490386963, "logits/rejected": -2.9968013763427734, "logps/chosen": -3.8305649757385254, "logps/rejected": -281.9336853027344, "loss": 0.0942, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2813973128795624, "rewards/margins": 2.7441117763519287, "rewards/rejected": -2.462714433670044, "step": 5090 }, { "epoch": 0.2, "learning_rate": 4.837064190990036e-06, "logits/chosen": -3.0114753246307373, "logits/rejected": -3.034301996231079, "logps/chosen": -6.5129714012146, "logps/rejected": -275.9564514160156, "loss": 0.1176, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2522929012775421, "rewards/margins": 2.656597137451172, "rewards/rejected": -2.404304027557373, "step": 5100 }, { "epoch": 0.2, "eval_logits/chosen": -3.045559883117676, "eval_logits/rejected": -3.0660154819488525, "eval_logps/chosen": -5.571707248687744, "eval_logps/rejected": -275.8960266113281, "eval_loss": 0.07284247130155563, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.2608824372291565, "eval_rewards/margins": 2.659686803817749, "eval_rewards/rejected": -2.3988044261932373, "eval_runtime": 2.5322, "eval_samples_per_second": 1.975, "eval_steps_per_second": 0.395, "step": 5100 }, { "epoch": 0.2, "learning_rate": 4.8358223561415304e-06, "logits/chosen": -3.0213634967803955, "logits/rejected": -3.045280933380127, "logps/chosen": -3.4640231132507324, "logps/rejected": -283.1900329589844, "loss": 0.0834, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28271740674972534, "rewards/margins": 2.7596843242645264, "rewards/rejected": -2.4769668579101562, "step": 5110 }, { "epoch": 0.2, "learning_rate": 4.834575967487817e-06, "logits/chosen": -3.0242295265197754, "logits/rejected": -3.0460681915283203, "logps/chosen": -21.52669906616211, "logps/rejected": -269.23114013671875, "loss": 0.2292, "rewards/accuracies": 0.9375, "rewards/chosen": 0.10146062076091766, "rewards/margins": 2.4395530223846436, "rewards/rejected": -2.33809232711792, "step": 5120 }, { "epoch": 0.21, "learning_rate": 4.833325027458796e-06, "logits/chosen": -2.9772894382476807, "logits/rejected": -3.005458116531372, "logps/chosen": -5.15264368057251, "logps/rejected": -280.710205078125, "loss": 0.1014, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2641873061656952, "rewards/margins": 2.714550495147705, "rewards/rejected": -2.4503633975982666, "step": 5130 }, { "epoch": 0.21, "learning_rate": 4.832069538493237e-06, "logits/chosen": -2.9894983768463135, "logits/rejected": -3.0160276889801025, "logps/chosen": -6.04154109954834, "logps/rejected": -275.21929931640625, "loss": 0.1167, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.25564128160476685, "rewards/margins": 2.6555287837982178, "rewards/rejected": -2.3998875617980957, "step": 5140 }, { "epoch": 0.21, "learning_rate": 4.830809503038781e-06, "logits/chosen": -2.9923324584960938, "logits/rejected": -3.019908905029297, "logps/chosen": -2.5176753997802734, "logps/rejected": -279.10284423828125, "loss": 0.0856, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2899080216884613, "rewards/margins": 2.7306771278381348, "rewards/rejected": -2.4407687187194824, "step": 5150 }, { "epoch": 0.21, "learning_rate": 4.8295449235519314e-06, "logits/chosen": -2.97491455078125, "logits/rejected": -3.0049123764038086, "logps/chosen": -0.27630746364593506, "logps/rejected": -286.36712646484375, "loss": 0.0582, "rewards/accuracies": 1.0, "rewards/chosen": 0.3153168559074402, "rewards/margins": 2.823737859725952, "rewards/rejected": -2.508420944213867, "step": 5160 }, { "epoch": 0.21, "learning_rate": 4.828275802498051e-06, "logits/chosen": -3.0073401927948, "logits/rejected": -3.0350141525268555, "logps/chosen": -1.3468650579452515, "logps/rejected": -279.30865478515625, "loss": 0.0734, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.304487407207489, "rewards/margins": 2.7407588958740234, "rewards/rejected": -2.4362711906433105, "step": 5170 }, { "epoch": 0.21, "learning_rate": 4.827002142351356e-06, "logits/chosen": -3.013518810272217, "logits/rejected": -3.041588544845581, "logps/chosen": -0.1792793571949005, "logps/rejected": -283.0902099609375, "loss": 0.0612, "rewards/accuracies": 1.0, "rewards/chosen": 0.31537944078445435, "rewards/margins": 2.7944111824035645, "rewards/rejected": -2.479031562805176, "step": 5180 }, { "epoch": 0.21, "learning_rate": 4.825723945594912e-06, "logits/chosen": -2.978010892868042, "logits/rejected": -3.007244348526001, "logps/chosen": -0.13003824651241302, "logps/rejected": -284.87396240234375, "loss": 0.0595, "rewards/accuracies": 1.0, "rewards/chosen": 0.31643998622894287, "rewards/margins": 2.809086322784424, "rewards/rejected": -2.4926466941833496, "step": 5190 }, { "epoch": 0.21, "learning_rate": 4.824441214720629e-06, "logits/chosen": -3.01904296875, "logits/rejected": -3.047013998031616, "logps/chosen": -6.677224636077881, "logps/rejected": -279.6515197753906, "loss": 0.1232, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.25243738293647766, "rewards/margins": 2.6932272911071777, "rewards/rejected": -2.4407899379730225, "step": 5200 }, { "epoch": 0.21, "eval_logits/chosen": -3.0431811809539795, "eval_logits/rejected": -3.0683486461639404, "eval_logps/chosen": -0.09310842305421829, "eval_logps/rejected": -276.9130554199219, "eval_loss": 0.06589251011610031, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3156684339046478, "eval_rewards/margins": 2.7246437072753906, "eval_rewards/rejected": -2.408975124359131, "eval_runtime": 2.5336, "eval_samples_per_second": 1.973, "eval_steps_per_second": 0.395, "step": 5200 }, { "epoch": 0.21, "learning_rate": 4.823153952229257e-06, "logits/chosen": -3.0003466606140137, "logits/rejected": -3.0299229621887207, "logps/chosen": -5.22200870513916, "logps/rejected": -279.8187255859375, "loss": 0.1028, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.26476961374282837, "rewards/margins": 2.709141969680786, "rewards/rejected": -2.4443719387054443, "step": 5210 }, { "epoch": 0.21, "learning_rate": 4.821862160630378e-06, "logits/chosen": -2.9920783042907715, "logits/rejected": -3.0211939811706543, "logps/chosen": -0.16880831122398376, "logps/rejected": -281.62799072265625, "loss": 0.0623, "rewards/accuracies": 1.0, "rewards/chosen": 0.3149469494819641, "rewards/margins": 2.772291660308838, "rewards/rejected": -2.4573450088500977, "step": 5220 }, { "epoch": 0.21, "learning_rate": 4.820565842442408e-06, "logits/chosen": -2.9849021434783936, "logits/rejected": -3.0162155628204346, "logps/chosen": -3.824056625366211, "logps/rejected": -281.0265197753906, "loss": 0.0962, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27691560983657837, "rewards/margins": 2.733022689819336, "rewards/rejected": -2.4561069011688232, "step": 5230 }, { "epoch": 0.21, "learning_rate": 4.8192650001925855e-06, "logits/chosen": -3.0123753547668457, "logits/rejected": -3.037736415863037, "logps/chosen": -3.151174783706665, "logps/rejected": -284.35748291015625, "loss": 0.0876, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2879105508327484, "rewards/margins": 2.7755022048950195, "rewards/rejected": -2.487591505050659, "step": 5240 }, { "epoch": 0.21, "learning_rate": 4.817959636416969e-06, "logits/chosen": -3.0098423957824707, "logits/rejected": -3.0360500812530518, "logps/chosen": -2.3893561363220215, "logps/rejected": -283.1678161621094, "loss": 0.0745, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.29124701023101807, "rewards/margins": 2.7718939781188965, "rewards/rejected": -2.480647087097168, "step": 5250 }, { "epoch": 0.21, "learning_rate": 4.816649753660431e-06, "logits/chosen": -2.9678845405578613, "logits/rejected": -2.9951272010803223, "logps/chosen": -3.754436492919922, "logps/rejected": -280.5032043457031, "loss": 0.0963, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2785215377807617, "rewards/margins": 2.728949546813965, "rewards/rejected": -2.450428009033203, "step": 5260 }, { "epoch": 0.21, "learning_rate": 4.8153353544766555e-06, "logits/chosen": -2.995993137359619, "logits/rejected": -3.024031162261963, "logps/chosen": -2.113128900527954, "logps/rejected": -288.9522399902344, "loss": 0.0589, "rewards/accuracies": 1.0, "rewards/chosen": 0.2942522168159485, "rewards/margins": 2.83134126663208, "rewards/rejected": -2.5370888710021973, "step": 5270 }, { "epoch": 0.21, "learning_rate": 4.814016441428131e-06, "logits/chosen": -3.0124223232269287, "logits/rejected": -3.0375776290893555, "logps/chosen": -7.36072301864624, "logps/rejected": -277.7760925292969, "loss": 0.1312, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2450375109910965, "rewards/margins": 2.6679511070251465, "rewards/rejected": -2.4229137897491455, "step": 5280 }, { "epoch": 0.21, "learning_rate": 4.812693017086145e-06, "logits/chosen": -2.9796597957611084, "logits/rejected": -3.005096197128296, "logps/chosen": -6.916863918304443, "logps/rejected": -277.06646728515625, "loss": 0.1265, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24588258564472198, "rewards/margins": 2.6645333766937256, "rewards/rejected": -2.4186506271362305, "step": 5290 }, { "epoch": 0.21, "learning_rate": 4.811365084030784e-06, "logits/chosen": -2.995205879211426, "logits/rejected": -3.0177133083343506, "logps/chosen": -10.850362777709961, "logps/rejected": -273.59564208984375, "loss": 0.1606, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.20662181079387665, "rewards/margins": 2.5925040245056152, "rewards/rejected": -2.3858823776245117, "step": 5300 }, { "epoch": 0.21, "eval_logits/chosen": -3.0496912002563477, "eval_logits/rejected": -3.0747828483581543, "eval_logps/chosen": -0.0840633288025856, "eval_logps/rejected": -277.4427185058594, "eval_loss": 0.06537709385156631, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3157588839530945, "eval_rewards/margins": 2.7300305366516113, "eval_rewards/rejected": -2.414271831512451, "eval_runtime": 2.5375, "eval_samples_per_second": 1.97, "eval_steps_per_second": 0.394, "step": 5300 }, { "epoch": 0.21, "learning_rate": 4.810032644850917e-06, "logits/chosen": -2.9977829456329346, "logits/rejected": -3.0210726261138916, "logps/chosen": -10.872428894042969, "logps/rejected": -276.3466796875, "loss": 0.1334, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.2093191146850586, "rewards/margins": 2.6149439811706543, "rewards/rejected": -2.4056248664855957, "step": 5310 }, { "epoch": 0.21, "learning_rate": 4.808695702144206e-06, "logits/chosen": -2.9975335597991943, "logits/rejected": -3.024334192276001, "logps/chosen": -2.718376636505127, "logps/rejected": -282.79290771484375, "loss": 0.0844, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.289681077003479, "rewards/margins": 2.761896848678589, "rewards/rejected": -2.4722161293029785, "step": 5320 }, { "epoch": 0.21, "learning_rate": 4.807354258517088e-06, "logits/chosen": -2.9908366203308105, "logits/rejected": -3.0173122882843018, "logps/chosen": -2.141191005706787, "logps/rejected": -284.1576232910156, "loss": 0.07, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2949434816837311, "rewards/margins": 2.7823336124420166, "rewards/rejected": -2.4873905181884766, "step": 5330 }, { "epoch": 0.21, "learning_rate": 4.806008316584776e-06, "logits/chosen": -3.0210137367248535, "logits/rejected": -3.0474653244018555, "logps/chosen": -0.11841583251953125, "logps/rejected": -286.0948486328125, "loss": 0.0586, "rewards/accuracies": 1.0, "rewards/chosen": 0.31964972615242004, "rewards/margins": 2.8220717906951904, "rewards/rejected": -2.5024220943450928, "step": 5340 }, { "epoch": 0.21, "learning_rate": 4.804657878971252e-06, "logits/chosen": -3.001260995864868, "logits/rejected": -3.024810314178467, "logps/chosen": -7.871110439300537, "logps/rejected": -277.09051513671875, "loss": 0.1101, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.23968505859375, "rewards/margins": 2.653125047683716, "rewards/rejected": -2.413440227508545, "step": 5350 }, { "epoch": 0.21, "learning_rate": 4.803302948309264e-06, "logits/chosen": -3.010918140411377, "logits/rejected": -3.036341905593872, "logps/chosen": -2.7920916080474854, "logps/rejected": -283.22216796875, "loss": 0.0846, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2862365245819092, "rewards/margins": 2.76819109916687, "rewards/rejected": -2.481954574584961, "step": 5360 }, { "epoch": 0.21, "learning_rate": 4.801943527240318e-06, "logits/chosen": -3.004141092300415, "logits/rejected": -3.0291011333465576, "logps/chosen": -0.1728305071592331, "logps/rejected": -284.32843017578125, "loss": 0.0597, "rewards/accuracies": 1.0, "rewards/chosen": 0.3130221366882324, "rewards/margins": 2.8068413734436035, "rewards/rejected": -2.493818998336792, "step": 5370 }, { "epoch": 0.22, "learning_rate": 4.800579618414677e-06, "logits/chosen": -2.994201183319092, "logits/rejected": -3.017364978790283, "logps/chosen": -3.3542914390563965, "logps/rejected": -283.75994873046875, "loss": 0.0862, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2826102077960968, "rewards/margins": 2.7639987468719482, "rewards/rejected": -2.481388568878174, "step": 5380 }, { "epoch": 0.22, "learning_rate": 4.799211224491348e-06, "logits/chosen": -3.011475086212158, "logits/rejected": -3.0344512462615967, "logps/chosen": -1.3095567226409912, "logps/rejected": -286.20928955078125, "loss": 0.0592, "rewards/accuracies": 1.0, "rewards/chosen": 0.3053375780582428, "rewards/margins": 2.8138983249664307, "rewards/rejected": -2.5085608959198, "step": 5390 }, { "epoch": 0.22, "learning_rate": 4.7978383481380865e-06, "logits/chosen": -2.9939093589782715, "logits/rejected": -3.022151470184326, "logps/chosen": -0.5725975036621094, "logps/rejected": -284.52099609375, "loss": 0.0604, "rewards/accuracies": 1.0, "rewards/chosen": 0.30863556265830994, "rewards/margins": 2.802967071533203, "rewards/rejected": -2.4943315982818604, "step": 5400 }, { "epoch": 0.22, "eval_logits/chosen": -3.0504937171936035, "eval_logits/rejected": -3.076338291168213, "eval_logps/chosen": -0.07069720327854156, "eval_logps/rejected": -277.43603515625, "eval_loss": 0.065459705889225, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3158925473690033, "eval_rewards/margins": 2.7300972938537598, "eval_rewards/rejected": -2.4142050743103027, "eval_runtime": 2.5389, "eval_samples_per_second": 1.969, "eval_steps_per_second": 0.394, "step": 5400 }, { "epoch": 0.22, "learning_rate": 4.796460992031386e-06, "logits/chosen": -2.9822239875793457, "logits/rejected": -3.011122226715088, "logps/chosen": -0.12708790600299835, "logps/rejected": -287.0798645019531, "loss": 0.0581, "rewards/accuracies": 1.0, "rewards/chosen": 0.3168625235557556, "rewards/margins": 2.8285791873931885, "rewards/rejected": -2.511716604232788, "step": 5410 }, { "epoch": 0.22, "learning_rate": 4.795079158856471e-06, "logits/chosen": -2.9922337532043457, "logits/rejected": -3.019601345062256, "logps/chosen": -3.6230626106262207, "logps/rejected": -283.01165771484375, "loss": 0.0934, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2779354155063629, "rewards/margins": 2.756148338317871, "rewards/rejected": -2.478212833404541, "step": 5420 }, { "epoch": 0.22, "learning_rate": 4.793692851307297e-06, "logits/chosen": -3.002472400665283, "logits/rejected": -3.03022837638855, "logps/chosen": -3.616180896759033, "logps/rejected": -283.5354919433594, "loss": 0.0934, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2793738842010498, "rewards/margins": 2.7615578174591064, "rewards/rejected": -2.4821836948394775, "step": 5430 }, { "epoch": 0.22, "learning_rate": 4.792302072086542e-06, "logits/chosen": -2.989656448364258, "logits/rejected": -3.020087480545044, "logps/chosen": -0.12035367637872696, "logps/rejected": -288.4855651855469, "loss": 0.0569, "rewards/accuracies": 1.0, "rewards/chosen": 0.31498414278030396, "rewards/margins": 2.845165252685547, "rewards/rejected": -2.5301809310913086, "step": 5440 }, { "epoch": 0.22, "learning_rate": 4.790906823905599e-06, "logits/chosen": -2.983506441116333, "logits/rejected": -3.012816905975342, "logps/chosen": -2.434417724609375, "logps/rejected": -285.08282470703125, "loss": 0.0671, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2927570641040802, "rewards/margins": 2.787292957305908, "rewards/rejected": -2.4945359230041504, "step": 5450 }, { "epoch": 0.22, "learning_rate": 4.789507109484579e-06, "logits/chosen": -2.9901227951049805, "logits/rejected": -3.0189948081970215, "logps/chosen": -3.6610610485076904, "logps/rejected": -279.69207763671875, "loss": 0.0966, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2794961929321289, "rewards/margins": 2.718701124191284, "rewards/rejected": -2.4392049312591553, "step": 5460 }, { "epoch": 0.22, "learning_rate": 4.788102931552294e-06, "logits/chosen": -3.003236770629883, "logits/rejected": -3.0293753147125244, "logps/chosen": -7.479198455810547, "logps/rejected": -276.07122802734375, "loss": 0.1325, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24060240387916565, "rewards/margins": 2.648540496826172, "rewards/rejected": -2.4079384803771973, "step": 5470 }, { "epoch": 0.22, "learning_rate": 4.7866942928462625e-06, "logits/chosen": -3.013003349304199, "logits/rejected": -3.038752555847168, "logps/chosen": -9.585858345031738, "logps/rejected": -272.69610595703125, "loss": 0.1552, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.22354379296302795, "rewards/margins": 2.598958969116211, "rewards/rejected": -2.3754146099090576, "step": 5480 }, { "epoch": 0.22, "learning_rate": 4.7852811961126974e-06, "logits/chosen": -2.979689359664917, "logits/rejected": -3.006019353866577, "logps/chosen": -3.443286418914795, "logps/rejected": -283.8199157714844, "loss": 0.0863, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2843311131000519, "rewards/margins": 2.7654991149902344, "rewards/rejected": -2.481168270111084, "step": 5490 }, { "epoch": 0.22, "learning_rate": 4.783863644106502e-06, "logits/chosen": -3.0008890628814697, "logits/rejected": -3.028687000274658, "logps/chosen": -3.7000396251678467, "logps/rejected": -279.0063781738281, "loss": 0.0966, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2782553732395172, "rewards/margins": 2.7144827842712402, "rewards/rejected": -2.436227798461914, "step": 5500 }, { "epoch": 0.22, "eval_logits/chosen": -3.0537102222442627, "eval_logits/rejected": -3.0782530307769775, "eval_logps/chosen": -0.08424742519855499, "eval_logps/rejected": -277.52099609375, "eval_loss": 0.06527624279260635, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31575706601142883, "eval_rewards/margins": 2.730811595916748, "eval_rewards/rejected": -2.4150543212890625, "eval_runtime": 2.5425, "eval_samples_per_second": 1.967, "eval_steps_per_second": 0.393, "step": 5500 }, { "epoch": 0.22, "learning_rate": 4.782441639591269e-06, "logits/chosen": -2.977029323577881, "logits/rejected": -3.0045688152313232, "logps/chosen": -2.9239728450775146, "logps/rejected": -281.86663818359375, "loss": 0.0866, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2886525094509125, "rewards/margins": 2.7537224292755127, "rewards/rejected": -2.465069532394409, "step": 5510 }, { "epoch": 0.22, "learning_rate": 4.781015185339266e-06, "logits/chosen": -3.0239710807800293, "logits/rejected": -3.0481863021850586, "logps/chosen": -0.14096742868423462, "logps/rejected": -287.78826904296875, "loss": 0.057, "rewards/accuracies": 1.0, "rewards/chosen": 0.31710124015808105, "rewards/margins": 2.8413474559783936, "rewards/rejected": -2.5242464542388916, "step": 5520 }, { "epoch": 0.22, "learning_rate": 4.77958428413144e-06, "logits/chosen": -2.9753341674804688, "logits/rejected": -3.0044076442718506, "logps/chosen": -0.16268211603164673, "logps/rejected": -288.9393005371094, "loss": 0.0562, "rewards/accuracies": 1.0, "rewards/chosen": 0.31202051043510437, "rewards/margins": 2.8532347679138184, "rewards/rejected": -2.5412139892578125, "step": 5530 }, { "epoch": 0.22, "learning_rate": 4.778148938757406e-06, "logits/chosen": -3.001692771911621, "logits/rejected": -3.0269522666931152, "logps/chosen": -0.27422869205474854, "logps/rejected": -282.5021667480469, "loss": 0.0642, "rewards/accuracies": 1.0, "rewards/chosen": 0.31451302766799927, "rewards/margins": 2.782334804534912, "rewards/rejected": -2.4678215980529785, "step": 5540 }, { "epoch": 0.22, "learning_rate": 4.776709152015443e-06, "logits/chosen": -3.0168192386627197, "logits/rejected": -3.0443758964538574, "logps/chosen": -5.663025856018066, "logps/rejected": -282.13525390625, "loss": 0.1117, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2619752585887909, "rewards/margins": 2.7279717922210693, "rewards/rejected": -2.465996503829956, "step": 5550 }, { "epoch": 0.22, "learning_rate": 4.775264926712489e-06, "logits/chosen": -2.9867618083953857, "logits/rejected": -3.0156524181365967, "logps/chosen": -2.502401828765869, "logps/rejected": -282.93035888671875, "loss": 0.0818, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.29016605019569397, "rewards/margins": 2.76554799079895, "rewards/rejected": -2.475381851196289, "step": 5560 }, { "epoch": 0.22, "learning_rate": 4.7738162656641365e-06, "logits/chosen": -3.014021635055542, "logits/rejected": -3.0408735275268555, "logps/chosen": -0.1814623922109604, "logps/rejected": -285.5419006347656, "loss": 0.0591, "rewards/accuracies": 1.0, "rewards/chosen": 0.3159232437610626, "rewards/margins": 2.815229892730713, "rewards/rejected": -2.499307155609131, "step": 5570 }, { "epoch": 0.22, "learning_rate": 4.772363171694623e-06, "logits/chosen": -3.0144991874694824, "logits/rejected": -3.040661096572876, "logps/chosen": -3.902198076248169, "logps/rejected": -285.73394775390625, "loss": 0.0919, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2797431945800781, "rewards/margins": 2.782278299331665, "rewards/rejected": -2.502535343170166, "step": 5580 }, { "epoch": 0.22, "learning_rate": 4.770905647636828e-06, "logits/chosen": -2.995851516723633, "logits/rejected": -3.0218727588653564, "logps/chosen": -6.5278730392456055, "logps/rejected": -278.62689208984375, "loss": 0.1245, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2521207928657532, "rewards/margins": 2.682407855987549, "rewards/rejected": -2.4302871227264404, "step": 5590 }, { "epoch": 0.22, "learning_rate": 4.769443696332272e-06, "logits/chosen": -3.01865816116333, "logits/rejected": -3.04410982131958, "logps/chosen": -0.220844104886055, "logps/rejected": -284.3800964355469, "loss": 0.0609, "rewards/accuracies": 1.0, "rewards/chosen": 0.3165411353111267, "rewards/margins": 2.8015096187591553, "rewards/rejected": -2.484968423843384, "step": 5600 }, { "epoch": 0.22, "eval_logits/chosen": -3.0548884868621826, "eval_logits/rejected": -3.075730323791504, "eval_logps/chosen": -0.06839067488908768, "eval_logps/rejected": -276.83538818359375, "eval_loss": 0.06618759781122208, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31591561436653137, "eval_rewards/margins": 2.724113941192627, "eval_rewards/rejected": -2.408198356628418, "eval_runtime": 2.5381, "eval_samples_per_second": 1.97, "eval_steps_per_second": 0.394, "step": 5600 }, { "epoch": 0.22, "learning_rate": 4.767977320631103e-06, "logits/chosen": -2.9947867393493652, "logits/rejected": -3.01865291595459, "logps/chosen": -7.23397970199585, "logps/rejected": -281.4549560546875, "loss": 0.1279, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24220876395702362, "rewards/margins": 2.7047393321990967, "rewards/rejected": -2.4625306129455566, "step": 5610 }, { "epoch": 0.22, "learning_rate": 4.766506523392095e-06, "logits/chosen": -2.9988255500793457, "logits/rejected": -3.021033525466919, "logps/chosen": -5.993952751159668, "logps/rejected": -281.3063049316406, "loss": 0.1142, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.25700733065605164, "rewards/margins": 2.718773365020752, "rewards/rejected": -2.461765766143799, "step": 5620 }, { "epoch": 0.23, "learning_rate": 4.765031307482643e-06, "logits/chosen": -2.9897289276123047, "logits/rejected": -3.015772819519043, "logps/chosen": -0.19449779391288757, "logps/rejected": -285.6607971191406, "loss": 0.0588, "rewards/accuracies": 1.0, "rewards/chosen": 0.3126969039440155, "rewards/margins": 2.8199539184570312, "rewards/rejected": -2.5072569847106934, "step": 5630 }, { "epoch": 0.23, "learning_rate": 4.763551675778755e-06, "logits/chosen": -2.995809555053711, "logits/rejected": -3.019446849822998, "logps/chosen": -10.198819160461426, "logps/rejected": -277.8360900878906, "loss": 0.1576, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.21547874808311462, "rewards/margins": 2.640719175338745, "rewards/rejected": -2.4252407550811768, "step": 5640 }, { "epoch": 0.23, "learning_rate": 4.762067631165049e-06, "logits/chosen": -2.9979541301727295, "logits/rejected": -3.017665147781372, "logps/chosen": -2.92610764503479, "logps/rejected": -282.95208740234375, "loss": 0.0842, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2873653471469879, "rewards/margins": 2.76316237449646, "rewards/rejected": -2.475796937942505, "step": 5650 }, { "epoch": 0.23, "learning_rate": 4.760579176534747e-06, "logits/chosen": -3.0402538776397705, "logits/rejected": -3.0603628158569336, "logps/chosen": -2.7408053874969482, "logps/rejected": -283.5105285644531, "loss": 0.0728, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2895038425922394, "rewards/margins": 2.7722067832946777, "rewards/rejected": -2.482703447341919, "step": 5660 }, { "epoch": 0.23, "learning_rate": 4.759086314789667e-06, "logits/chosen": -3.0003163814544678, "logits/rejected": -3.0235044956207275, "logps/chosen": -6.6892523765563965, "logps/rejected": -277.8475646972656, "loss": 0.1234, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2510222792625427, "rewards/margins": 2.6727445125579834, "rewards/rejected": -2.421722412109375, "step": 5670 }, { "epoch": 0.23, "learning_rate": 4.757589048840219e-06, "logits/chosen": -2.9842097759246826, "logits/rejected": -3.0081372261047363, "logps/chosen": -2.750180959701538, "logps/rejected": -282.5481262207031, "loss": 0.0852, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2869001626968384, "rewards/margins": 2.7605679035186768, "rewards/rejected": -2.473667860031128, "step": 5680 }, { "epoch": 0.23, "learning_rate": 4.756087381605399e-06, "logits/chosen": -3.0327491760253906, "logits/rejected": -3.0583977699279785, "logps/chosen": -0.23625019192695618, "logps/rejected": -290.95404052734375, "loss": 0.055, "rewards/accuracies": 1.0, "rewards/chosen": 0.31491363048553467, "rewards/margins": 2.873502254486084, "rewards/rejected": -2.5585885047912598, "step": 5690 }, { "epoch": 0.23, "learning_rate": 4.754581316012785e-06, "logits/chosen": -3.015150785446167, "logits/rejected": -3.0417256355285645, "logps/chosen": -0.16002210974693298, "logps/rejected": -286.6380920410156, "loss": 0.0583, "rewards/accuracies": 1.0, "rewards/chosen": 0.31780534982681274, "rewards/margins": 2.8275203704833984, "rewards/rejected": -2.5097153186798096, "step": 5700 }, { "epoch": 0.23, "eval_logits/chosen": -3.054204225540161, "eval_logits/rejected": -3.0779027938842773, "eval_logps/chosen": -0.09215555340051651, "eval_logps/rejected": -277.0788879394531, "eval_loss": 0.06608527153730392, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3156779706478119, "eval_rewards/margins": 2.726311206817627, "eval_rewards/rejected": -2.4106333255767822, "eval_runtime": 2.5383, "eval_samples_per_second": 1.97, "eval_steps_per_second": 0.394, "step": 5700 }, { "epoch": 0.23, "learning_rate": 4.753070854998529e-06, "logits/chosen": -3.0177407264709473, "logits/rejected": -3.0421741008758545, "logps/chosen": -3.7082724571228027, "logps/rejected": -282.53125, "loss": 0.0939, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2818816304206848, "rewards/margins": 2.753798484802246, "rewards/rejected": -2.471916913986206, "step": 5710 }, { "epoch": 0.23, "learning_rate": 4.751556001507351e-06, "logits/chosen": -3.009211778640747, "logits/rejected": -3.0335564613342285, "logps/chosen": -4.161299228668213, "logps/rejected": -281.5154724121094, "loss": 0.1015, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2745741903781891, "rewards/margins": 2.7339348793029785, "rewards/rejected": -2.4593605995178223, "step": 5720 }, { "epoch": 0.23, "learning_rate": 4.750036758492537e-06, "logits/chosen": -3.0196495056152344, "logits/rejected": -3.0426273345947266, "logps/chosen": -7.405558109283447, "logps/rejected": -277.1383361816406, "loss": 0.1317, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24357923865318298, "rewards/margins": 2.6604506969451904, "rewards/rejected": -2.4168715476989746, "step": 5730 }, { "epoch": 0.23, "learning_rate": 4.748513128915928e-06, "logits/chosen": -2.9975619316101074, "logits/rejected": -3.0180888175964355, "logps/chosen": -5.902446269989014, "logps/rejected": -281.98077392578125, "loss": 0.1008, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.259005069732666, "rewards/margins": 2.722454786300659, "rewards/rejected": -2.4634499549865723, "step": 5740 }, { "epoch": 0.23, "learning_rate": 4.746985115747918e-06, "logits/chosen": -2.983950138092041, "logits/rejected": -3.0078461170196533, "logps/chosen": -5.859738349914551, "logps/rejected": -282.2395935058594, "loss": 0.0999, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2552436292171478, "rewards/margins": 2.7292685508728027, "rewards/rejected": -2.474025011062622, "step": 5750 }, { "epoch": 0.23, "learning_rate": 4.745452721967446e-06, "logits/chosen": -3.0252652168273926, "logits/rejected": -3.0471701622009277, "logps/chosen": -2.5821545124053955, "logps/rejected": -282.8995361328125, "loss": 0.0713, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.29237794876098633, "rewards/margins": 2.7651519775390625, "rewards/rejected": -2.472774028778076, "step": 5760 }, { "epoch": 0.23, "learning_rate": 4.7439159505619946e-06, "logits/chosen": -3.02209210395813, "logits/rejected": -3.0443127155303955, "logps/chosen": -3.652449131011963, "logps/rejected": -285.6189270019531, "loss": 0.065, "rewards/accuracies": 1.0, "rewards/chosen": 0.2804614305496216, "rewards/margins": 2.7848641872406006, "rewards/rejected": -2.5044023990631104, "step": 5770 }, { "epoch": 0.23, "learning_rate": 4.7423748045275755e-06, "logits/chosen": -3.016594648361206, "logits/rejected": -3.0456576347351074, "logps/chosen": -2.151545763015747, "logps/rejected": -286.63739013671875, "loss": 0.0633, "rewards/accuracies": 1.0, "rewards/chosen": 0.29564639925956726, "rewards/margins": 2.8070411682128906, "rewards/rejected": -2.51139497756958, "step": 5780 }, { "epoch": 0.23, "learning_rate": 4.740829286868732e-06, "logits/chosen": -3.005688190460205, "logits/rejected": -3.0330302715301514, "logps/chosen": -0.1725854128599167, "logps/rejected": -286.83099365234375, "loss": 0.0582, "rewards/accuracies": 1.0, "rewards/chosen": 0.3153653144836426, "rewards/margins": 2.8276920318603516, "rewards/rejected": -2.512326955795288, "step": 5790 }, { "epoch": 0.23, "learning_rate": 4.7392794005985324e-06, "logits/chosen": -2.9846508502960205, "logits/rejected": -3.01656174659729, "logps/chosen": -2.295790910720825, "logps/rejected": -283.23614501953125, "loss": 0.0721, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.29361528158187866, "rewards/margins": 2.770256996154785, "rewards/rejected": -2.4766411781311035, "step": 5800 }, { "epoch": 0.23, "eval_logits/chosen": -3.052077054977417, "eval_logits/rejected": -3.0766377449035645, "eval_logps/chosen": -0.0944148376584053, "eval_logps/rejected": -278.2587585449219, "eval_loss": 0.06480594724416733, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.315655380487442, "eval_rewards/margins": 2.7380871772766113, "eval_rewards/rejected": -2.4224319458007812, "eval_runtime": 2.538, "eval_samples_per_second": 1.97, "eval_steps_per_second": 0.394, "step": 5800 }, { "epoch": 0.23, "learning_rate": 4.7377251487385565e-06, "logits/chosen": -3.0007691383361816, "logits/rejected": -3.0306930541992188, "logps/chosen": -0.1628924310207367, "logps/rejected": -283.7952575683594, "loss": 0.0605, "rewards/accuracies": 1.0, "rewards/chosen": 0.31857210397720337, "rewards/margins": 2.796290159225464, "rewards/rejected": -2.477717876434326, "step": 5810 }, { "epoch": 0.23, "learning_rate": 4.7361665343189e-06, "logits/chosen": -2.992703914642334, "logits/rejected": -3.021397829055786, "logps/chosen": -0.1766202747821808, "logps/rejected": -288.2649230957031, "loss": 0.0571, "rewards/accuracies": 1.0, "rewards/chosen": 0.31533268094062805, "rewards/margins": 2.8419222831726074, "rewards/rejected": -2.5265896320343018, "step": 5820 }, { "epoch": 0.23, "learning_rate": 4.73460356037816e-06, "logits/chosen": -3.000839948654175, "logits/rejected": -3.028808116912842, "logps/chosen": -7.776961326599121, "logps/rejected": -279.50323486328125, "loss": 0.1255, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.23939020931720734, "rewards/margins": 2.6829397678375244, "rewards/rejected": -2.443549394607544, "step": 5830 }, { "epoch": 0.23, "learning_rate": 4.733036229963435e-06, "logits/chosen": -2.9929916858673096, "logits/rejected": -3.020524263381958, "logps/chosen": -3.408146381378174, "logps/rejected": -280.2055358886719, "loss": 0.0899, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28185638785362244, "rewards/margins": 2.7300562858581543, "rewards/rejected": -2.44819974899292, "step": 5840 }, { "epoch": 0.23, "learning_rate": 4.731464546130315e-06, "logits/chosen": -3.0052735805511475, "logits/rejected": -3.0338711738586426, "logps/chosen": -0.6699663400650024, "logps/rejected": -283.9396057128906, "loss": 0.0651, "rewards/accuracies": 1.0, "rewards/chosen": 0.3101986050605774, "rewards/margins": 2.794015407562256, "rewards/rejected": -2.4838168621063232, "step": 5850 }, { "epoch": 0.23, "learning_rate": 4.729888511942877e-06, "logits/chosen": -3.013779640197754, "logits/rejected": -3.0377748012542725, "logps/chosen": -7.22261905670166, "logps/rejected": -278.0504455566406, "loss": 0.1296, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24502527713775635, "rewards/margins": 2.6725757122039795, "rewards/rejected": -2.427550792694092, "step": 5860 }, { "epoch": 0.23, "learning_rate": 4.7283081304736834e-06, "logits/chosen": -3.019649028778076, "logits/rejected": -3.0437254905700684, "logps/chosen": -3.238417387008667, "logps/rejected": -280.5517883300781, "loss": 0.0781, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2880454659461975, "rewards/margins": 2.733168601989746, "rewards/rejected": -2.4451231956481934, "step": 5870 }, { "epoch": 0.24, "learning_rate": 4.726723404803767e-06, "logits/chosen": -3.028444766998291, "logits/rejected": -3.0556631088256836, "logps/chosen": -4.7360100746154785, "logps/rejected": -280.0032653808594, "loss": 0.1053, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2677459716796875, "rewards/margins": 2.7167441844940186, "rewards/rejected": -2.448997974395752, "step": 5880 }, { "epoch": 0.24, "learning_rate": 4.725134338022631e-06, "logits/chosen": -3.0218803882598877, "logits/rejected": -3.048365592956543, "logps/chosen": -0.1717347800731659, "logps/rejected": -286.7164306640625, "loss": 0.0583, "rewards/accuracies": 1.0, "rewards/chosen": 0.3137911558151245, "rewards/margins": 2.828274726867676, "rewards/rejected": -2.514483690261841, "step": 5890 }, { "epoch": 0.24, "learning_rate": 4.723540933228245e-06, "logits/chosen": -3.026310443878174, "logits/rejected": -3.0544044971466064, "logps/chosen": -3.8821182250976562, "logps/rejected": -284.3301696777344, "loss": 0.0935, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27735868096351624, "rewards/margins": 2.768646001815796, "rewards/rejected": -2.4912872314453125, "step": 5900 }, { "epoch": 0.24, "eval_logits/chosen": -3.055402994155884, "eval_logits/rejected": -3.0793588161468506, "eval_logps/chosen": -0.07770083844661713, "eval_logps/rejected": -278.1962890625, "eval_loss": 0.06493928283452988, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3158225417137146, "eval_rewards/margins": 2.7376296520233154, "eval_rewards/rejected": -2.421807050704956, "eval_runtime": 2.5354, "eval_samples_per_second": 1.972, "eval_steps_per_second": 0.394, "step": 5900 }, { "epoch": 0.24, "learning_rate": 4.721943193527029e-06, "logits/chosen": -3.032132387161255, "logits/rejected": -3.0618205070495605, "logps/chosen": -3.555936813354492, "logps/rejected": -284.54296875, "loss": 0.0915, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27828529477119446, "rewards/margins": 2.7734131813049316, "rewards/rejected": -2.4951281547546387, "step": 5910 }, { "epoch": 0.24, "learning_rate": 4.720341122033862e-06, "logits/chosen": -2.997732639312744, "logits/rejected": -3.0262839794158936, "logps/chosen": -5.108921527862549, "logps/rejected": -277.88201904296875, "loss": 0.1093, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2660923898220062, "rewards/margins": 2.6901395320892334, "rewards/rejected": -2.4240472316741943, "step": 5920 }, { "epoch": 0.24, "learning_rate": 4.718734721872062e-06, "logits/chosen": -3.0255167484283447, "logits/rejected": -3.052149772644043, "logps/chosen": -0.14531289041042328, "logps/rejected": -287.03369140625, "loss": 0.0595, "rewards/accuracies": 1.0, "rewards/chosen": 0.31468045711517334, "rewards/margins": 2.8317368030548096, "rewards/rejected": -2.5170559883117676, "step": 5930 }, { "epoch": 0.24, "learning_rate": 4.71712399617339e-06, "logits/chosen": -3.037341356277466, "logits/rejected": -3.061558246612549, "logps/chosen": -3.8090527057647705, "logps/rejected": -285.59625244140625, "loss": 0.0925, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28206878900527954, "rewards/margins": 2.7816407680511475, "rewards/rejected": -2.4995715618133545, "step": 5940 }, { "epoch": 0.24, "learning_rate": 4.715508948078037e-06, "logits/chosen": -3.0016002655029297, "logits/rejected": -3.0300345420837402, "logps/chosen": -0.14449608325958252, "logps/rejected": -287.52838134765625, "loss": 0.0578, "rewards/accuracies": 1.0, "rewards/chosen": 0.31418466567993164, "rewards/margins": 2.835129737854004, "rewards/rejected": -2.5209450721740723, "step": 5950 }, { "epoch": 0.24, "learning_rate": 4.713889580734623e-06, "logits/chosen": -3.03023099899292, "logits/rejected": -3.057603120803833, "logps/chosen": -0.18711015582084656, "logps/rejected": -288.14300537109375, "loss": 0.0574, "rewards/accuracies": 1.0, "rewards/chosen": 0.3184083104133606, "rewards/margins": 2.8390707969665527, "rewards/rejected": -2.520662307739258, "step": 5960 }, { "epoch": 0.24, "learning_rate": 4.712265897300186e-06, "logits/chosen": -2.9965946674346924, "logits/rejected": -3.025155782699585, "logps/chosen": -7.685487270355225, "logps/rejected": -279.0698547363281, "loss": 0.1315, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.23923833668231964, "rewards/margins": 2.678440809249878, "rewards/rejected": -2.4392027854919434, "step": 5970 }, { "epoch": 0.24, "learning_rate": 4.710637900940181e-06, "logits/chosen": -2.9857897758483887, "logits/rejected": -3.0164971351623535, "logps/chosen": -3.7033169269561768, "logps/rejected": -280.7667236328125, "loss": 0.0951, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27929311990737915, "rewards/margins": 2.734968662261963, "rewards/rejected": -2.4556751251220703, "step": 5980 }, { "epoch": 0.24, "learning_rate": 4.709005594828471e-06, "logits/chosen": -3.034104824066162, "logits/rejected": -3.0601108074188232, "logps/chosen": -3.4710681438446045, "logps/rejected": -284.91387939453125, "loss": 0.0908, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2811073958873749, "rewards/margins": 2.778981924057007, "rewards/rejected": -2.4978744983673096, "step": 5990 }, { "epoch": 0.24, "learning_rate": 4.707368982147318e-06, "logits/chosen": -3.0436110496520996, "logits/rejected": -3.0669798851013184, "logps/chosen": -10.156457901000977, "logps/rejected": -278.8134765625, "loss": 0.1559, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.21831190586090088, "rewards/margins": 2.649937152862549, "rewards/rejected": -2.4316253662109375, "step": 6000 }, { "epoch": 0.24, "eval_logits/chosen": -3.056652545928955, "eval_logits/rejected": -3.077849864959717, "eval_logps/chosen": -0.0829881876707077, "eval_logps/rejected": -278.44561767578125, "eval_loss": 0.06478901952505112, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31576964259147644, "eval_rewards/margins": 2.740070343017578, "eval_rewards/rejected": -2.4243006706237793, "eval_runtime": 2.5366, "eval_samples_per_second": 1.971, "eval_steps_per_second": 0.394, "step": 6000 }, { "epoch": 0.24, "learning_rate": 4.705728066087384e-06, "logits/chosen": -3.020084857940674, "logits/rejected": -3.044428825378418, "logps/chosen": -3.7693939208984375, "logps/rejected": -280.59649658203125, "loss": 0.0964, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2798910439014435, "rewards/margins": 2.7301220893859863, "rewards/rejected": -2.4502310752868652, "step": 6010 }, { "epoch": 0.24, "learning_rate": 4.704082849847718e-06, "logits/chosen": -3.0160863399505615, "logits/rejected": -3.0433430671691895, "logps/chosen": -5.810656547546387, "logps/rejected": -280.79876708984375, "loss": 0.1151, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.259061336517334, "rewards/margins": 2.7125041484832764, "rewards/rejected": -2.4534425735473633, "step": 6020 }, { "epoch": 0.24, "learning_rate": 4.702433336635753e-06, "logits/chosen": -2.995929718017578, "logits/rejected": -3.0221354961395264, "logps/chosen": -3.7924628257751465, "logps/rejected": -283.5187683105469, "loss": 0.0942, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2805129289627075, "rewards/margins": 2.7610862255096436, "rewards/rejected": -2.4805736541748047, "step": 6030 }, { "epoch": 0.24, "learning_rate": 4.700779529667301e-06, "logits/chosen": -3.0163204669952393, "logits/rejected": -3.0446319580078125, "logps/chosen": -3.443824291229248, "logps/rejected": -286.1566467285156, "loss": 0.089, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.282692015171051, "rewards/margins": 2.792403221130371, "rewards/rejected": -2.509711265563965, "step": 6040 }, { "epoch": 0.24, "learning_rate": 4.699121432166542e-06, "logits/chosen": -2.990424633026123, "logits/rejected": -3.0180656909942627, "logps/chosen": -0.4434276521205902, "logps/rejected": -286.5597229003906, "loss": 0.0592, "rewards/accuracies": 1.0, "rewards/chosen": 0.3124910295009613, "rewards/margins": 2.8248448371887207, "rewards/rejected": -2.5123536586761475, "step": 6050 }, { "epoch": 0.24, "learning_rate": 4.697459047366022e-06, "logits/chosen": -3.018059253692627, "logits/rejected": -3.045051097869873, "logps/chosen": -3.3044090270996094, "logps/rejected": -284.13519287109375, "loss": 0.0893, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28333574533462524, "rewards/margins": 2.770473003387451, "rewards/rejected": -2.4871373176574707, "step": 6060 }, { "epoch": 0.24, "learning_rate": 4.695792378506645e-06, "logits/chosen": -3.012720823287964, "logits/rejected": -3.037449836730957, "logps/chosen": -3.8617279529571533, "logps/rejected": -284.21160888671875, "loss": 0.0749, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2773016691207886, "rewards/margins": 2.768852472305298, "rewards/rejected": -2.491550922393799, "step": 6070 }, { "epoch": 0.24, "learning_rate": 4.694121428837668e-06, "logits/chosen": -3.0047249794006348, "logits/rejected": -3.0309042930603027, "logps/chosen": -0.41448482871055603, "logps/rejected": -284.9490661621094, "loss": 0.0604, "rewards/accuracies": 1.0, "rewards/chosen": 0.3128068149089813, "rewards/margins": 2.8086533546447754, "rewards/rejected": -2.4958462715148926, "step": 6080 }, { "epoch": 0.24, "learning_rate": 4.692446201616692e-06, "logits/chosen": -2.9705543518066406, "logits/rejected": -2.9980621337890625, "logps/chosen": -10.626190185546875, "logps/rejected": -278.08074951171875, "loss": 0.1609, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.21026208996772766, "rewards/margins": 2.6362569332122803, "rewards/rejected": -2.425995111465454, "step": 6090 }, { "epoch": 0.24, "learning_rate": 4.690766700109659e-06, "logits/chosen": -3.0113320350646973, "logits/rejected": -3.0388405323028564, "logps/chosen": -3.6730141639709473, "logps/rejected": -278.26214599609375, "loss": 0.0973, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2783082127571106, "rewards/margins": 2.707520008087158, "rewards/rejected": -2.4292120933532715, "step": 6100 }, { "epoch": 0.24, "eval_logits/chosen": -3.0613906383514404, "eval_logits/rejected": -3.087005853652954, "eval_logps/chosen": -0.06658507883548737, "eval_logps/rejected": -278.2578125, "eval_loss": 0.06452878564596176, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3159337043762207, "eval_rewards/margins": 2.738356113433838, "eval_rewards/rejected": -2.422422170639038, "eval_runtime": 2.5442, "eval_samples_per_second": 1.965, "eval_steps_per_second": 0.393, "step": 6100 }, { "epoch": 0.24, "learning_rate": 4.689082927590844e-06, "logits/chosen": -2.988966226577759, "logits/rejected": -3.0168092250823975, "logps/chosen": -2.299539089202881, "logps/rejected": -282.9349365234375, "loss": 0.0809, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.295277863740921, "rewards/margins": 2.7683753967285156, "rewards/rejected": -2.473097801208496, "step": 6110 }, { "epoch": 0.24, "learning_rate": 4.687394887342845e-06, "logits/chosen": -2.975510835647583, "logits/rejected": -3.0064749717712402, "logps/chosen": -2.164003372192383, "logps/rejected": -284.64422607421875, "loss": 0.0783, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.29419785737991333, "rewards/margins": 2.785858392715454, "rewards/rejected": -2.4916601181030273, "step": 6120 }, { "epoch": 0.25, "learning_rate": 4.6857025826565845e-06, "logits/chosen": -3.0071375370025635, "logits/rejected": -3.0357067584991455, "logps/chosen": -0.6709804534912109, "logps/rejected": -281.2978820800781, "loss": 0.0672, "rewards/accuracies": 1.0, "rewards/chosen": 0.30905449390411377, "rewards/margins": 2.7693991661071777, "rewards/rejected": -2.4603450298309326, "step": 6130 }, { "epoch": 0.25, "learning_rate": 4.684006016831297e-06, "logits/chosen": -3.0183777809143066, "logits/rejected": -3.0391480922698975, "logps/chosen": -5.210557460784912, "logps/rejected": -284.9086608886719, "loss": 0.072, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.26538166403770447, "rewards/margins": 2.761587381362915, "rewards/rejected": -2.4962055683135986, "step": 6140 }, { "epoch": 0.25, "learning_rate": 4.682305193174524e-06, "logits/chosen": -3.0098862648010254, "logits/rejected": -3.036790132522583, "logps/chosen": -2.2537224292755127, "logps/rejected": -286.711181640625, "loss": 0.0648, "rewards/accuracies": 1.0, "rewards/chosen": 0.29384785890579224, "rewards/margins": 2.8035430908203125, "rewards/rejected": -2.509695053100586, "step": 6150 }, { "epoch": 0.25, "learning_rate": 4.680600115002109e-06, "logits/chosen": -2.9997994899749756, "logits/rejected": -3.0256645679473877, "logps/chosen": -3.8336727619171143, "logps/rejected": -285.7980041503906, "loss": 0.0925, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27772650122642517, "rewards/margins": 2.7843222618103027, "rewards/rejected": -2.5065958499908447, "step": 6160 }, { "epoch": 0.25, "learning_rate": 4.6788907856381895e-06, "logits/chosen": -3.003084897994995, "logits/rejected": -3.034090757369995, "logps/chosen": -3.481064558029175, "logps/rejected": -282.18267822265625, "loss": 0.0923, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2790561318397522, "rewards/margins": 2.7489213943481445, "rewards/rejected": -2.469865083694458, "step": 6170 }, { "epoch": 0.25, "learning_rate": 4.677177208415189e-06, "logits/chosen": -3.009286880493164, "logits/rejected": -3.036614179611206, "logps/chosen": -2.3697221279144287, "logps/rejected": -285.1207580566406, "loss": 0.066, "rewards/accuracies": 1.0, "rewards/chosen": 0.2927015423774719, "rewards/margins": 2.7934563159942627, "rewards/rejected": -2.5007545948028564, "step": 6180 }, { "epoch": 0.25, "learning_rate": 4.675459386673815e-06, "logits/chosen": -3.0248451232910156, "logits/rejected": -3.0495171546936035, "logps/chosen": -7.918715476989746, "logps/rejected": -278.83599853515625, "loss": 0.1296, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.23807068169116974, "rewards/margins": 2.6720824241638184, "rewards/rejected": -2.434011936187744, "step": 6190 }, { "epoch": 0.25, "learning_rate": 4.673737323763048e-06, "logits/chosen": -3.018551826477051, "logits/rejected": -3.0443015098571777, "logps/chosen": -7.419596195220947, "logps/rejected": -280.09820556640625, "loss": 0.1298, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24304521083831787, "rewards/margins": 2.6893317699432373, "rewards/rejected": -2.44628643989563, "step": 6200 }, { "epoch": 0.25, "eval_logits/chosen": -3.061109781265259, "eval_logits/rejected": -3.0851426124572754, "eval_logps/chosen": -0.10589097440242767, "eval_logps/rejected": -279.147216796875, "eval_loss": 0.06404805928468704, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.315540611743927, "eval_rewards/margins": 2.746857166290283, "eval_rewards/rejected": -2.431316614151001, "eval_runtime": 2.5448, "eval_samples_per_second": 1.965, "eval_steps_per_second": 0.393, "step": 6200 }, { "epoch": 0.25, "learning_rate": 4.6720110230401385e-06, "logits/chosen": -3.0084190368652344, "logits/rejected": -3.036289691925049, "logps/chosen": -6.796907901763916, "logps/rejected": -281.6875915527344, "loss": 0.1229, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.25059252977371216, "rewards/margins": 2.711827516555786, "rewards/rejected": -2.4612350463867188, "step": 6210 }, { "epoch": 0.25, "learning_rate": 4.670280487870599e-06, "logits/chosen": -3.016592502593994, "logits/rejected": -3.0439326763153076, "logps/chosen": -4.364754676818848, "logps/rejected": -281.3330078125, "loss": 0.0806, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2754364609718323, "rewards/margins": 2.7326929569244385, "rewards/rejected": -2.45725679397583, "step": 6220 }, { "epoch": 0.25, "learning_rate": 4.668545721628194e-06, "logits/chosen": -3.0180370807647705, "logits/rejected": -3.04744291305542, "logps/chosen": -0.15483811497688293, "logps/rejected": -288.1318664550781, "loss": 0.0572, "rewards/accuracies": 1.0, "rewards/chosen": 0.31478825211524963, "rewards/margins": 2.8418424129486084, "rewards/rejected": -2.5270543098449707, "step": 6230 }, { "epoch": 0.25, "learning_rate": 4.666806727694942e-06, "logits/chosen": -3.0086419582366943, "logits/rejected": -3.0357518196105957, "logps/chosen": -1.8852885961532593, "logps/rejected": -287.8769836425781, "loss": 0.0607, "rewards/accuracies": 1.0, "rewards/chosen": 0.29880839586257935, "rewards/margins": 2.81880259513855, "rewards/rejected": -2.5199942588806152, "step": 6240 }, { "epoch": 0.25, "learning_rate": 4.665063509461098e-06, "logits/chosen": -3.017430305480957, "logits/rejected": -3.045067310333252, "logps/chosen": -8.580038070678711, "logps/rejected": -280.5375671386719, "loss": 0.1268, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2297937422990799, "rewards/margins": 2.6815834045410156, "rewards/rejected": -2.451789617538452, "step": 6250 }, { "epoch": 0.25, "learning_rate": 4.6633160703251556e-06, "logits/chosen": -3.0093045234680176, "logits/rejected": -3.040210008621216, "logps/chosen": -2.929779529571533, "logps/rejected": -272.77703857421875, "loss": 0.1019, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2853177487850189, "rewards/margins": 2.6673927307128906, "rewards/rejected": -2.382075071334839, "step": 6260 }, { "epoch": 0.25, "learning_rate": 4.661564413693838e-06, "logits/chosen": -2.9859235286712646, "logits/rejected": -3.016834020614624, "logps/chosen": -1.5738976001739502, "logps/rejected": -283.6581726074219, "loss": 0.0713, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.3024364709854126, "rewards/margins": 2.7850441932678223, "rewards/rejected": -2.48260760307312, "step": 6270 }, { "epoch": 0.25, "learning_rate": 4.659808542982089e-06, "logits/chosen": -3.0347249507904053, "logits/rejected": -3.0628888607025146, "logps/chosen": -3.7066593170166016, "logps/rejected": -280.18450927734375, "loss": 0.0987, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2798413634300232, "rewards/margins": 2.7256503105163574, "rewards/rejected": -2.4458093643188477, "step": 6280 }, { "epoch": 0.25, "learning_rate": 4.658048461613068e-06, "logits/chosen": -2.986987352371216, "logits/rejected": -3.018256425857544, "logps/chosen": -6.52359676361084, "logps/rejected": -279.5649719238281, "loss": 0.1253, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24996843934059143, "rewards/margins": 2.690551280975342, "rewards/rejected": -2.4405829906463623, "step": 6290 }, { "epoch": 0.25, "learning_rate": 4.656284173018144e-06, "logits/chosen": -3.0210580825805664, "logits/rejected": -3.0511279106140137, "logps/chosen": -3.857705593109131, "logps/rejected": -279.33258056640625, "loss": 0.1037, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.27991771697998047, "rewards/margins": 2.713872194290161, "rewards/rejected": -2.4339544773101807, "step": 6300 }, { "epoch": 0.25, "eval_logits/chosen": -3.058549642562866, "eval_logits/rejected": -3.088538885116577, "eval_logps/chosen": -0.12569785118103027, "eval_logps/rejected": -278.4085998535156, "eval_loss": 0.06484492123126984, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31534257531166077, "eval_rewards/margins": 2.7392725944519043, "eval_rewards/rejected": -2.4239304065704346, "eval_runtime": 2.5438, "eval_samples_per_second": 1.966, "eval_steps_per_second": 0.393, "step": 6300 }, { "epoch": 0.25, "learning_rate": 4.654515680636888e-06, "logits/chosen": -3.008592128753662, "logits/rejected": -3.0368857383728027, "logps/chosen": -7.3562726974487305, "logps/rejected": -272.8302917480469, "loss": 0.1397, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2449733316898346, "rewards/margins": 2.619210720062256, "rewards/rejected": -2.3742377758026123, "step": 6310 }, { "epoch": 0.25, "learning_rate": 4.652742987917066e-06, "logits/chosen": -3.0099687576293945, "logits/rejected": -3.0368614196777344, "logps/chosen": -3.63481068611145, "logps/rejected": -282.1846618652344, "loss": 0.0939, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2818365693092346, "rewards/margins": 2.744386672973633, "rewards/rejected": -2.462550163269043, "step": 6320 }, { "epoch": 0.25, "learning_rate": 4.6509660983146334e-06, "logits/chosen": -3.0003228187561035, "logits/rejected": -3.0278944969177246, "logps/chosen": -0.31081491708755493, "logps/rejected": -285.68414306640625, "loss": 0.0593, "rewards/accuracies": 1.0, "rewards/chosen": 0.31345346570014954, "rewards/margins": 2.8160018920898438, "rewards/rejected": -2.5025484561920166, "step": 6330 }, { "epoch": 0.25, "learning_rate": 4.649185015293728e-06, "logits/chosen": -3.016906261444092, "logits/rejected": -3.04288911819458, "logps/chosen": -4.2719221115112305, "logps/rejected": -282.2127685546875, "loss": 0.0889, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27283358573913574, "rewards/margins": 2.7400875091552734, "rewards/rejected": -2.4672536849975586, "step": 6340 }, { "epoch": 0.25, "learning_rate": 4.6473997423266615e-06, "logits/chosen": -2.965548515319824, "logits/rejected": -2.9972853660583496, "logps/chosen": -0.2615472376346588, "logps/rejected": -287.3536682128906, "loss": 0.058, "rewards/accuracies": 1.0, "rewards/chosen": 0.3116239905357361, "rewards/margins": 2.832303524017334, "rewards/rejected": -2.520679473876953, "step": 6350 }, { "epoch": 0.25, "learning_rate": 4.645610282893914e-06, "logits/chosen": -3.023125410079956, "logits/rejected": -3.0512378215789795, "logps/chosen": -0.35184961557388306, "logps/rejected": -289.27264404296875, "loss": 0.0565, "rewards/accuracies": 1.0, "rewards/chosen": 0.31310752034187317, "rewards/margins": 2.8519155979156494, "rewards/rejected": -2.5388081073760986, "step": 6360 }, { "epoch": 0.25, "learning_rate": 4.6438166404841316e-06, "logits/chosen": -3.022429943084717, "logits/rejected": -3.049954891204834, "logps/chosen": -0.28324180841445923, "logps/rejected": -288.33258056640625, "loss": 0.0573, "rewards/accuracies": 1.0, "rewards/chosen": 0.31336745619773865, "rewards/margins": 2.8419833183288574, "rewards/rejected": -2.528615951538086, "step": 6370 }, { "epoch": 0.26, "learning_rate": 4.642018818594107e-06, "logits/chosen": -2.99055552482605, "logits/rejected": -3.0165138244628906, "logps/chosen": -0.41787204146385193, "logps/rejected": -283.6003723144531, "loss": 0.0618, "rewards/accuracies": 1.0, "rewards/chosen": 0.31330057978630066, "rewards/margins": 2.7901909351348877, "rewards/rejected": -2.4768900871276855, "step": 6380 }, { "epoch": 0.26, "learning_rate": 4.640216820728791e-06, "logits/chosen": -3.041405439376831, "logits/rejected": -3.0680060386657715, "logps/chosen": -0.18768338859081268, "logps/rejected": -286.247314453125, "loss": 0.059, "rewards/accuracies": 1.0, "rewards/chosen": 0.3167031407356262, "rewards/margins": 2.8173158168792725, "rewards/rejected": -2.50061297416687, "step": 6390 }, { "epoch": 0.26, "learning_rate": 4.638410650401267e-06, "logits/chosen": -3.0214314460754395, "logits/rejected": -3.0471508502960205, "logps/chosen": -3.902657985687256, "logps/rejected": -284.2465515136719, "loss": 0.0939, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2793024182319641, "rewards/margins": 2.7627148628234863, "rewards/rejected": -2.483412265777588, "step": 6400 }, { "epoch": 0.26, "eval_logits/chosen": -3.0593981742858887, "eval_logits/rejected": -3.0841283798217773, "eval_logps/chosen": -0.1197052001953125, "eval_logps/rejected": -280.1843566894531, "eval_loss": 0.06327111274003983, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3154025077819824, "eval_rewards/margins": 2.7570903301239014, "eval_rewards/rejected": -2.441688060760498, "eval_runtime": 2.5425, "eval_samples_per_second": 1.967, "eval_steps_per_second": 0.393, "step": 6400 }, { "epoch": 0.26, "learning_rate": 4.636600311132758e-06, "logits/chosen": -2.971214771270752, "logits/rejected": -3.0009522438049316, "logps/chosen": -4.331908702850342, "logps/rejected": -283.17144775390625, "loss": 0.0943, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27049344778060913, "rewards/margins": 2.7557907104492188, "rewards/rejected": -2.485297679901123, "step": 6410 }, { "epoch": 0.26, "learning_rate": 4.634785806452613e-06, "logits/chosen": -3.0092828273773193, "logits/rejected": -3.0364737510681152, "logps/chosen": -3.0416293144226074, "logps/rejected": -280.84588623046875, "loss": 0.0887, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28421860933303833, "rewards/margins": 2.740111827850342, "rewards/rejected": -2.4558937549591064, "step": 6420 }, { "epoch": 0.26, "learning_rate": 4.632967139898301e-06, "logits/chosen": -2.9916412830352783, "logits/rejected": -3.0205843448638916, "logps/chosen": -2.6875481605529785, "logps/rejected": -280.43560791015625, "loss": 0.0879, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28969597816467285, "rewards/margins": 2.7420401573181152, "rewards/rejected": -2.4523441791534424, "step": 6430 }, { "epoch": 0.26, "learning_rate": 4.631144315015407e-06, "logits/chosen": -3.000091791152954, "logits/rejected": -3.028843402862549, "logps/chosen": -4.87392520904541, "logps/rejected": -280.8268737792969, "loss": 0.1063, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2694644331932068, "rewards/margins": 2.7221717834472656, "rewards/rejected": -2.4527077674865723, "step": 6440 }, { "epoch": 0.26, "learning_rate": 4.62931733535762e-06, "logits/chosen": -3.0071728229522705, "logits/rejected": -3.0364155769348145, "logps/chosen": -0.18047362565994263, "logps/rejected": -289.7478942871094, "loss": 0.0564, "rewards/accuracies": 1.0, "rewards/chosen": 0.31651997566223145, "rewards/margins": 2.8529515266418457, "rewards/rejected": -2.5364317893981934, "step": 6450 }, { "epoch": 0.26, "learning_rate": 4.62748620448673e-06, "logits/chosen": -3.032928943634033, "logits/rejected": -3.059993267059326, "logps/chosen": -0.2091582715511322, "logps/rejected": -288.98602294921875, "loss": 0.0568, "rewards/accuracies": 1.0, "rewards/chosen": 0.3169783651828766, "rewards/margins": 2.8473918437957764, "rewards/rejected": -2.5304136276245117, "step": 6460 }, { "epoch": 0.26, "learning_rate": 4.625650925972622e-06, "logits/chosen": -3.021629571914673, "logits/rejected": -3.050288200378418, "logps/chosen": -1.7067201137542725, "logps/rejected": -286.3426818847656, "loss": 0.0676, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.29848068952560425, "rewards/margins": 2.810861587524414, "rewards/rejected": -2.512381076812744, "step": 6470 }, { "epoch": 0.26, "learning_rate": 4.623811503393264e-06, "logits/chosen": -3.0363211631774902, "logits/rejected": -3.064572811126709, "logps/chosen": -0.25657105445861816, "logps/rejected": -288.7880554199219, "loss": 0.057, "rewards/accuracies": 1.0, "rewards/chosen": 0.31602928042411804, "rewards/margins": 2.8470265865325928, "rewards/rejected": -2.5309975147247314, "step": 6480 }, { "epoch": 0.26, "learning_rate": 4.621967940334705e-06, "logits/chosen": -3.0149784088134766, "logits/rejected": -3.0403599739074707, "logps/chosen": -5.408601760864258, "logps/rejected": -282.6549377441406, "loss": 0.1002, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.26551857590675354, "rewards/margins": 2.7344400882720947, "rewards/rejected": -2.468921422958374, "step": 6490 }, { "epoch": 0.26, "learning_rate": 4.620120240391065e-06, "logits/chosen": -3.0360028743743896, "logits/rejected": -3.060819149017334, "logps/chosen": -1.1530096530914307, "logps/rejected": -288.6084289550781, "loss": 0.0588, "rewards/accuracies": 1.0, "rewards/chosen": 0.3049672245979309, "rewards/margins": 2.8375821113586426, "rewards/rejected": -2.5326149463653564, "step": 6500 }, { "epoch": 0.26, "eval_logits/chosen": -3.0580432415008545, "eval_logits/rejected": -3.0841403007507324, "eval_logps/chosen": -0.14157958328723907, "eval_logps/rejected": -280.0093078613281, "eval_loss": 0.0635337233543396, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31518375873565674, "eval_rewards/margins": 2.7551209926605225, "eval_rewards/rejected": -2.4399373531341553, "eval_runtime": 2.5423, "eval_samples_per_second": 1.967, "eval_steps_per_second": 0.393, "step": 6500 }, { "epoch": 0.26, "learning_rate": 4.618268407164531e-06, "logits/chosen": -3.008301019668579, "logits/rejected": -3.0357134342193604, "logps/chosen": -3.8357245922088623, "logps/rejected": -284.50372314453125, "loss": 0.093, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27991604804992676, "rewards/margins": 2.7723469734191895, "rewards/rejected": -2.4924309253692627, "step": 6510 }, { "epoch": 0.26, "learning_rate": 4.616412444265344e-06, "logits/chosen": -3.002413749694824, "logits/rejected": -3.033297538757324, "logps/chosen": -0.2161935269832611, "logps/rejected": -289.68280029296875, "loss": 0.0564, "rewards/accuracies": 1.0, "rewards/chosen": 0.3128521740436554, "rewards/margins": 2.8541409969329834, "rewards/rejected": -2.5412888526916504, "step": 6520 }, { "epoch": 0.26, "learning_rate": 4.614552355311802e-06, "logits/chosen": -3.019195795059204, "logits/rejected": -3.0474305152893066, "logps/chosen": -6.359457015991211, "logps/rejected": -280.3211975097656, "loss": 0.1188, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.25419747829437256, "rewards/margins": 2.6996140480041504, "rewards/rejected": -2.4454166889190674, "step": 6530 }, { "epoch": 0.26, "learning_rate": 4.612688143930242e-06, "logits/chosen": -3.000709056854248, "logits/rejected": -3.030503749847412, "logps/chosen": -0.2782098054885864, "logps/rejected": -285.7286376953125, "loss": 0.0592, "rewards/accuracies": 1.0, "rewards/chosen": 0.3127882480621338, "rewards/margins": 2.8160195350646973, "rewards/rejected": -2.5032312870025635, "step": 6540 }, { "epoch": 0.26, "learning_rate": 4.610819813755038e-06, "logits/chosen": -3.018059253692627, "logits/rejected": -3.0461840629577637, "logps/chosen": -4.300431728363037, "logps/rejected": -283.1270446777344, "loss": 0.0959, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27324149012565613, "rewards/margins": 2.7505016326904297, "rewards/rejected": -2.477260112762451, "step": 6550 }, { "epoch": 0.26, "learning_rate": 4.608947368428598e-06, "logits/chosen": -3.038865566253662, "logits/rejected": -3.064649820327759, "logps/chosen": -7.673970699310303, "logps/rejected": -282.6482849121094, "loss": 0.123, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24225036799907684, "rewards/margins": 2.712185859680176, "rewards/rejected": -2.469935178756714, "step": 6560 }, { "epoch": 0.26, "learning_rate": 4.607070811601347e-06, "logits/chosen": -3.016500949859619, "logits/rejected": -3.0473155975341797, "logps/chosen": -2.3012547492980957, "logps/rejected": -282.8114013671875, "loss": 0.0787, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2908532917499542, "rewards/margins": 2.765131711959839, "rewards/rejected": -2.474278211593628, "step": 6570 }, { "epoch": 0.26, "learning_rate": 4.605190146931731e-06, "logits/chosen": -3.0209665298461914, "logits/rejected": -3.0495917797088623, "logps/chosen": -0.19721418619155884, "logps/rejected": -290.0712890625, "loss": 0.056, "rewards/accuracies": 1.0, "rewards/chosen": 0.3148941397666931, "rewards/margins": 2.8592689037323, "rewards/rejected": -2.54437518119812, "step": 6580 }, { "epoch": 0.26, "learning_rate": 4.603305378086201e-06, "logits/chosen": -3.003058910369873, "logits/rejected": -3.0336177349090576, "logps/chosen": -7.116499423980713, "logps/rejected": -281.48345947265625, "loss": 0.1263, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24550724029541016, "rewards/margins": 2.7094414234161377, "rewards/rejected": -2.4639339447021484, "step": 6590 }, { "epoch": 0.26, "learning_rate": 4.601416508739211e-06, "logits/chosen": -3.0210776329040527, "logits/rejected": -3.050705671310425, "logps/chosen": -4.873332500457764, "logps/rejected": -280.88787841796875, "loss": 0.1025, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2677438259124756, "rewards/margins": 2.7219443321228027, "rewards/rejected": -2.4542009830474854, "step": 6600 }, { "epoch": 0.26, "eval_logits/chosen": -3.0576181411743164, "eval_logits/rejected": -3.08673357963562, "eval_logps/chosen": -0.2315964698791504, "eval_logps/rejected": -279.7302551269531, "eval_loss": 0.06366094201803207, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.314283549785614, "eval_rewards/margins": 2.7514305114746094, "eval_rewards/rejected": -2.4371466636657715, "eval_runtime": 2.5368, "eval_samples_per_second": 1.971, "eval_steps_per_second": 0.394, "step": 6600 }, { "epoch": 0.26, "learning_rate": 4.599523542573207e-06, "logits/chosen": -3.0423781871795654, "logits/rejected": -3.0676662921905518, "logps/chosen": -6.8021087646484375, "logps/rejected": -281.2986755371094, "loss": 0.1225, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.25134187936782837, "rewards/margins": 2.7112858295440674, "rewards/rejected": -2.459944248199463, "step": 6610 }, { "epoch": 0.26, "learning_rate": 4.597626483278626e-06, "logits/chosen": -3.0063529014587402, "logits/rejected": -3.03131103515625, "logps/chosen": -5.844998359680176, "logps/rejected": -283.81884765625, "loss": 0.1011, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2581987977027893, "rewards/margins": 2.741429328918457, "rewards/rejected": -2.4832301139831543, "step": 6620 }, { "epoch": 0.27, "learning_rate": 4.595725334553879e-06, "logits/chosen": -2.9841206073760986, "logits/rejected": -3.006610155105591, "logps/chosen": -0.2933168113231659, "logps/rejected": -288.637939453125, "loss": 0.057, "rewards/accuracies": 1.0, "rewards/chosen": 0.3134903013706207, "rewards/margins": 2.8447279930114746, "rewards/rejected": -2.531238079071045, "step": 6630 }, { "epoch": 0.27, "learning_rate": 4.593820100105355e-06, "logits/chosen": -3.0006508827209473, "logits/rejected": -3.0269477367401123, "logps/chosen": -4.534999370574951, "logps/rejected": -284.52679443359375, "loss": 0.0923, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2707737386226654, "rewards/margins": 2.7603721618652344, "rewards/rejected": -2.489598274230957, "step": 6640 }, { "epoch": 0.27, "learning_rate": 4.591910783647405e-06, "logits/chosen": -3.0126566886901855, "logits/rejected": -3.040555238723755, "logps/chosen": -7.418600559234619, "logps/rejected": -279.11614990234375, "loss": 0.1265, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.24456961452960968, "rewards/margins": 2.6782402992248535, "rewards/rejected": -2.4336705207824707, "step": 6650 }, { "epoch": 0.27, "learning_rate": 4.589997388902339e-06, "logits/chosen": -2.998840808868408, "logits/rejected": -3.0292696952819824, "logps/chosen": -4.150060176849365, "logps/rejected": -278.82330322265625, "loss": 0.1019, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2746580243110657, "rewards/margins": 2.71020245552063, "rewards/rejected": -2.435544490814209, "step": 6660 }, { "epoch": 0.27, "learning_rate": 4.588079919600419e-06, "logits/chosen": -3.001171588897705, "logits/rejected": -3.0281472206115723, "logps/chosen": -6.637002468109131, "logps/rejected": -279.87664794921875, "loss": 0.118, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24818451702594757, "rewards/margins": 2.6928000450134277, "rewards/rejected": -2.4446158409118652, "step": 6670 }, { "epoch": 0.27, "learning_rate": 4.586158379479848e-06, "logits/chosen": -3.0349209308624268, "logits/rejected": -3.05918025970459, "logps/chosen": -4.077338218688965, "logps/rejected": -282.5894775390625, "loss": 0.098, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27888673543930054, "rewards/margins": 2.747860908508301, "rewards/rejected": -2.4689738750457764, "step": 6680 }, { "epoch": 0.27, "learning_rate": 4.584232772286769e-06, "logits/chosen": -3.0220611095428467, "logits/rejected": -3.045246124267578, "logps/chosen": -7.725103855133057, "logps/rejected": -276.64093017578125, "loss": 0.1306, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.23994675278663635, "rewards/margins": 2.6500320434570312, "rewards/rejected": -2.4100852012634277, "step": 6690 }, { "epoch": 0.27, "learning_rate": 4.582303101775249e-06, "logits/chosen": -3.0214786529541016, "logits/rejected": -3.0454986095428467, "logps/chosen": -0.54316246509552, "logps/rejected": -287.7095642089844, "loss": 0.0583, "rewards/accuracies": 1.0, "rewards/chosen": 0.31050291657447815, "rewards/margins": 2.8365464210510254, "rewards/rejected": -2.52604341506958, "step": 6700 }, { "epoch": 0.27, "eval_logits/chosen": -3.0596139430999756, "eval_logits/rejected": -3.0833640098571777, "eval_logps/chosen": -0.10917438566684723, "eval_logps/rejected": -279.8492431640625, "eval_loss": 0.06363450735807419, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31550776958465576, "eval_rewards/margins": 2.7538444995880127, "eval_rewards/rejected": -2.4383366107940674, "eval_runtime": 2.5401, "eval_samples_per_second": 1.968, "eval_steps_per_second": 0.394, "step": 6700 }, { "epoch": 0.27, "learning_rate": 4.580369371707282e-06, "logits/chosen": -2.970688581466675, "logits/rejected": -3.003121852874756, "logps/chosen": -3.6678359508514404, "logps/rejected": -281.94403076171875, "loss": 0.0942, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27914127707481384, "rewards/margins": 2.7396111488342285, "rewards/rejected": -2.4604694843292236, "step": 6710 }, { "epoch": 0.27, "learning_rate": 4.578431585852771e-06, "logits/chosen": -3.023127555847168, "logits/rejected": -3.051640033721924, "logps/chosen": -0.3901863694190979, "logps/rejected": -287.8932800292969, "loss": 0.0577, "rewards/accuracies": 1.0, "rewards/chosen": 0.3134520947933197, "rewards/margins": 2.837815761566162, "rewards/rejected": -2.5243637561798096, "step": 6720 }, { "epoch": 0.27, "learning_rate": 4.576489747989532e-06, "logits/chosen": -2.9918558597564697, "logits/rejected": -3.0195751190185547, "logps/chosen": -8.89402961730957, "logps/rejected": -272.949951171875, "loss": 0.148, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.22808535397052765, "rewards/margins": 2.600950241088867, "rewards/rejected": -2.3728652000427246, "step": 6730 }, { "epoch": 0.27, "learning_rate": 4.574543861903275e-06, "logits/chosen": -3.004214286804199, "logits/rejected": -3.0337300300598145, "logps/chosen": -4.844534873962402, "logps/rejected": -274.57098388671875, "loss": 0.1139, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.26712751388549805, "rewards/margins": 2.6615548133850098, "rewards/rejected": -2.39442777633667, "step": 6740 }, { "epoch": 0.27, "learning_rate": 4.572593931387604e-06, "logits/chosen": -2.995190143585205, "logits/rejected": -3.0273940563201904, "logps/chosen": -3.5143814086914062, "logps/rejected": -273.75177001953125, "loss": 0.1078, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.28309378027915955, "rewards/margins": 2.6639952659606934, "rewards/rejected": -2.38090181350708, "step": 6750 }, { "epoch": 0.27, "learning_rate": 4.570639960244011e-06, "logits/chosen": -2.991973876953125, "logits/rejected": -3.0230367183685303, "logps/chosen": -1.2457488775253296, "logps/rejected": -281.76409912109375, "loss": 0.0704, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.301772803068161, "rewards/margins": 2.768230438232422, "rewards/rejected": -2.4664576053619385, "step": 6760 }, { "epoch": 0.27, "learning_rate": 4.56868195228186e-06, "logits/chosen": -3.015252113342285, "logits/rejected": -3.043281078338623, "logps/chosen": -5.3604736328125, "logps/rejected": -280.75543212890625, "loss": 0.0991, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.26182159781455994, "rewards/margins": 2.715688705444336, "rewards/rejected": -2.453867197036743, "step": 6770 }, { "epoch": 0.27, "learning_rate": 4.566719911318389e-06, "logits/chosen": -3.013939380645752, "logits/rejected": -3.0368525981903076, "logps/chosen": -3.8510475158691406, "logps/rejected": -280.4809265136719, "loss": 0.0959, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2779620289802551, "rewards/margins": 2.7260355949401855, "rewards/rejected": -2.448073625564575, "step": 6780 }, { "epoch": 0.27, "learning_rate": 4.5647538411786965e-06, "logits/chosen": -3.0370357036590576, "logits/rejected": -3.0619635581970215, "logps/chosen": -3.2376952171325684, "logps/rejected": -283.14715576171875, "loss": 0.0877, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2839028239250183, "rewards/margins": 2.763650417327881, "rewards/rejected": -2.4797472953796387, "step": 6790 }, { "epoch": 0.27, "learning_rate": 4.562783745695738e-06, "logits/chosen": -3.005134105682373, "logits/rejected": -3.0314605236053467, "logps/chosen": -2.9112584590911865, "logps/rejected": -279.8175964355469, "loss": 0.0882, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2889579236507416, "rewards/margins": 2.7350525856018066, "rewards/rejected": -2.446094512939453, "step": 6800 }, { "epoch": 0.27, "eval_logits/chosen": -3.0619125366210938, "eval_logits/rejected": -3.085845947265625, "eval_logps/chosen": -0.11869156360626221, "eval_logps/rejected": -280.780517578125, "eval_loss": 0.0629551038146019, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31541261076927185, "eval_rewards/margins": 2.7630622386932373, "eval_rewards/rejected": -2.4476494789123535, "eval_runtime": 2.5308, "eval_samples_per_second": 1.976, "eval_steps_per_second": 0.395, "step": 6800 }, { "epoch": 0.27, "learning_rate": 4.560809628710315e-06, "logits/chosen": -3.0128471851348877, "logits/rejected": -3.038167953491211, "logps/chosen": -0.3351779580116272, "logps/rejected": -287.49761962890625, "loss": 0.0581, "rewards/accuracies": 1.0, "rewards/chosen": 0.3145189881324768, "rewards/margins": 2.8339476585388184, "rewards/rejected": -2.5194289684295654, "step": 6810 }, { "epoch": 0.27, "learning_rate": 4.558831494071069e-06, "logits/chosen": -3.0464439392089844, "logits/rejected": -3.0720038414001465, "logps/chosen": -0.20990052819252014, "logps/rejected": -290.83856201171875, "loss": 0.0554, "rewards/accuracies": 1.0, "rewards/chosen": 0.3148346543312073, "rewards/margins": 2.8706252574920654, "rewards/rejected": -2.555790424346924, "step": 6820 }, { "epoch": 0.27, "learning_rate": 4.556849345634475e-06, "logits/chosen": -2.9906082153320312, "logits/rejected": -3.0176265239715576, "logps/chosen": -1.667474389076233, "logps/rejected": -286.21453857421875, "loss": 0.07, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.3003891706466675, "rewards/margins": 2.8098502159118652, "rewards/rejected": -2.509460926055908, "step": 6830 }, { "epoch": 0.27, "learning_rate": 4.554863187264833e-06, "logits/chosen": -3.00770902633667, "logits/rejected": -3.0389668941497803, "logps/chosen": -6.453503608703613, "logps/rejected": -282.27325439453125, "loss": 0.1197, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24986648559570312, "rewards/margins": 2.719614028930664, "rewards/rejected": -2.469748020172119, "step": 6840 }, { "epoch": 0.27, "learning_rate": 4.55287302283426e-06, "logits/chosen": -3.013366222381592, "logits/rejected": -3.0454649925231934, "logps/chosen": -3.901597499847412, "logps/rejected": -286.3304748535156, "loss": 0.0926, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27675071358680725, "rewards/margins": 2.7881510257720947, "rewards/rejected": -2.5114002227783203, "step": 6850 }, { "epoch": 0.27, "learning_rate": 4.550878856222684e-06, "logits/chosen": -2.992875099182129, "logits/rejected": -3.024229049682617, "logps/chosen": -0.1883603036403656, "logps/rejected": -284.67529296875, "loss": 0.0598, "rewards/accuracies": 1.0, "rewards/chosen": 0.31458568572998047, "rewards/margins": 2.806424856185913, "rewards/rejected": -2.4918391704559326, "step": 6860 }, { "epoch": 0.27, "learning_rate": 4.548880691317835e-06, "logits/chosen": -2.999845027923584, "logits/rejected": -3.0314955711364746, "logps/chosen": -1.5009804964065552, "logps/rejected": -283.96185302734375, "loss": 0.0729, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.30182409286499023, "rewards/margins": 2.7854840755462646, "rewards/rejected": -2.4836599826812744, "step": 6870 }, { "epoch": 0.28, "learning_rate": 4.5468785320152365e-06, "logits/chosen": -3.025071382522583, "logits/rejected": -3.0538582801818848, "logps/chosen": -3.7889411449432373, "logps/rejected": -283.7013244628906, "loss": 0.0938, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2773628234863281, "rewards/margins": 2.7619261741638184, "rewards/rejected": -2.4845633506774902, "step": 6880 }, { "epoch": 0.28, "learning_rate": 4.544872382218202e-06, "logits/chosen": -3.0113229751586914, "logits/rejected": -3.0414161682128906, "logps/chosen": -6.692858695983887, "logps/rejected": -281.5845031738281, "loss": 0.1215, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.25102904438972473, "rewards/margins": 2.712921619415283, "rewards/rejected": -2.4618923664093018, "step": 6890 }, { "epoch": 0.28, "learning_rate": 4.542862245837821e-06, "logits/chosen": -3.0111260414123535, "logits/rejected": -3.0407631397247314, "logps/chosen": -2.9177165031433105, "logps/rejected": -286.0294189453125, "loss": 0.0744, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.287582129240036, "rewards/margins": 2.794233560562134, "rewards/rejected": -2.5066514015197754, "step": 6900 }, { "epoch": 0.28, "eval_logits/chosen": -3.058797597885132, "eval_logits/rejected": -3.0874199867248535, "eval_logps/chosen": -0.13580511510372162, "eval_logps/rejected": -279.90325927734375, "eval_loss": 0.06373264640569687, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3152414858341217, "eval_rewards/margins": 2.7541184425354004, "eval_rewards/rejected": -2.4388771057128906, "eval_runtime": 2.5356, "eval_samples_per_second": 1.972, "eval_steps_per_second": 0.394, "step": 6900 }, { "epoch": 0.28, "learning_rate": 4.54084812679296e-06, "logits/chosen": -2.97629976272583, "logits/rejected": -3.004467487335205, "logps/chosen": -3.1401238441467285, "logps/rejected": -284.19891357421875, "loss": 0.0865, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28438252210617065, "rewards/margins": 2.7775301933288574, "rewards/rejected": -2.493147850036621, "step": 6910 }, { "epoch": 0.28, "learning_rate": 4.538830029010246e-06, "logits/chosen": -3.0038483142852783, "logits/rejected": -3.0333049297332764, "logps/chosen": -0.26084744930267334, "logps/rejected": -288.04705810546875, "loss": 0.0576, "rewards/accuracies": 1.0, "rewards/chosen": 0.31336578726768494, "rewards/margins": 2.842864990234375, "rewards/rejected": -2.529499053955078, "step": 6920 }, { "epoch": 0.28, "learning_rate": 4.536807956424063e-06, "logits/chosen": -3.0073115825653076, "logits/rejected": -3.0346827507019043, "logps/chosen": -2.3058323860168457, "logps/rejected": -287.3481140136719, "loss": 0.0607, "rewards/accuracies": 1.0, "rewards/chosen": 0.29443809390068054, "rewards/margins": 2.816817283630371, "rewards/rejected": -2.522379159927368, "step": 6930 }, { "epoch": 0.28, "learning_rate": 4.534781912976546e-06, "logits/chosen": -2.9978833198547363, "logits/rejected": -3.0282530784606934, "logps/chosen": -3.077340602874756, "logps/rejected": -285.6322937011719, "loss": 0.0855, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.285012811422348, "rewards/margins": 2.7881267070770264, "rewards/rejected": -2.5031135082244873, "step": 6940 }, { "epoch": 0.28, "learning_rate": 4.5327519026175694e-06, "logits/chosen": -3.0277256965637207, "logits/rejected": -3.053220272064209, "logps/chosen": -8.077451705932617, "logps/rejected": -276.1784362792969, "loss": 0.1392, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.23878350853919983, "rewards/margins": 2.6390674114227295, "rewards/rejected": -2.4002840518951416, "step": 6950 }, { "epoch": 0.28, "learning_rate": 4.530717929304743e-06, "logits/chosen": -2.9921839237213135, "logits/rejected": -3.021695852279663, "logps/chosen": -0.2777920365333557, "logps/rejected": -288.0277099609375, "loss": 0.0576, "rewards/accuracies": 1.0, "rewards/chosen": 0.31322532892227173, "rewards/margins": 2.8383443355560303, "rewards/rejected": -2.5251190662384033, "step": 6960 }, { "epoch": 0.28, "learning_rate": 4.528679997003403e-06, "logits/chosen": -2.997366189956665, "logits/rejected": -3.023822069168091, "logps/chosen": -3.908905029296875, "logps/rejected": -284.0055847167969, "loss": 0.0978, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27836373448371887, "rewards/margins": 2.764639377593994, "rewards/rejected": -2.4862751960754395, "step": 6970 }, { "epoch": 0.28, "learning_rate": 4.5266381096866e-06, "logits/chosen": -3.0069851875305176, "logits/rejected": -3.0321781635284424, "logps/chosen": -2.1796698570251465, "logps/rejected": -284.2392883300781, "loss": 0.072, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.29502779245376587, "rewards/margins": 2.7843756675720215, "rewards/rejected": -2.4893479347229004, "step": 6980 }, { "epoch": 0.28, "learning_rate": 4.5245922713351e-06, "logits/chosen": -3.037476062774658, "logits/rejected": -3.0604825019836426, "logps/chosen": -1.1474509239196777, "logps/rejected": -286.1682434082031, "loss": 0.0612, "rewards/accuracies": 1.0, "rewards/chosen": 0.30597612261772156, "rewards/margins": 2.8115992546081543, "rewards/rejected": -2.5056228637695312, "step": 6990 }, { "epoch": 0.28, "learning_rate": 4.522542485937369e-06, "logits/chosen": -2.993351936340332, "logits/rejected": -3.018378257751465, "logps/chosen": -0.3587896227836609, "logps/rejected": -290.99993896484375, "loss": 0.0555, "rewards/accuracies": 1.0, "rewards/chosen": 0.31539440155029297, "rewards/margins": 2.8674123287200928, "rewards/rejected": -2.552018165588379, "step": 7000 }, { "epoch": 0.28, "eval_logits/chosen": -3.0640206336975098, "eval_logits/rejected": -3.0869619846343994, "eval_logps/chosen": -0.1258697509765625, "eval_logps/rejected": -280.5066833496094, "eval_loss": 0.0632161870598793, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3153408467769623, "eval_rewards/margins": 2.760251998901367, "eval_rewards/rejected": -2.444911003112793, "eval_runtime": 2.5371, "eval_samples_per_second": 1.971, "eval_steps_per_second": 0.394, "step": 7000 }, { "epoch": 0.28, "learning_rate": 4.520488757489568e-06, "logits/chosen": -3.029526948928833, "logits/rejected": -3.054647922515869, "logps/chosen": -2.2799346446990967, "logps/rejected": -284.8070373535156, "loss": 0.0782, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2938650846481323, "rewards/margins": 2.7867226600646973, "rewards/rejected": -2.4928574562072754, "step": 7010 }, { "epoch": 0.28, "learning_rate": 4.518431089995546e-06, "logits/chosen": -3.014768600463867, "logits/rejected": -3.039738893508911, "logps/chosen": -1.7345783710479736, "logps/rejected": -286.7563781738281, "loss": 0.0628, "rewards/accuracies": 1.0, "rewards/chosen": 0.30165985226631165, "rewards/margins": 2.8129220008850098, "rewards/rejected": -2.5112624168395996, "step": 7020 }, { "epoch": 0.28, "learning_rate": 4.516369487466832e-06, "logits/chosen": -3.013983964920044, "logits/rejected": -3.0413196086883545, "logps/chosen": -9.848876953125, "logps/rejected": -276.368896484375, "loss": 0.1548, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.21944260597229004, "rewards/margins": 2.630829334259033, "rewards/rejected": -2.4113872051239014, "step": 7030 }, { "epoch": 0.28, "learning_rate": 4.514303953922623e-06, "logits/chosen": -3.036357879638672, "logits/rejected": -3.0624608993530273, "logps/chosen": -5.584227085113525, "logps/rejected": -284.2513122558594, "loss": 0.0971, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.25997811555862427, "rewards/margins": 2.747621774673462, "rewards/rejected": -2.4876437187194824, "step": 7040 }, { "epoch": 0.28, "learning_rate": 4.512234493389785e-06, "logits/chosen": -3.0366339683532715, "logits/rejected": -3.060683250427246, "logps/chosen": -6.613545894622803, "logps/rejected": -281.32928466796875, "loss": 0.1214, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2496412992477417, "rewards/margins": 2.7128939628601074, "rewards/rejected": -2.463252544403076, "step": 7050 }, { "epoch": 0.28, "learning_rate": 4.510161109902837e-06, "logits/chosen": -3.0283141136169434, "logits/rejected": -3.0521349906921387, "logps/chosen": -6.965964317321777, "logps/rejected": -273.34124755859375, "loss": 0.1281, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.24613972008228302, "rewards/margins": 2.6251041889190674, "rewards/rejected": -2.378964424133301, "step": 7060 }, { "epoch": 0.28, "learning_rate": 4.508083807503945e-06, "logits/chosen": -3.001467227935791, "logits/rejected": -3.029003620147705, "logps/chosen": -2.9641950130462646, "logps/rejected": -286.9776611328125, "loss": 0.0814, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2880168557167053, "rewards/margins": 2.8017866611480713, "rewards/rejected": -2.5137698650360107, "step": 7070 }, { "epoch": 0.28, "learning_rate": 4.506002590242917e-06, "logits/chosen": -2.989046573638916, "logits/rejected": -3.0161404609680176, "logps/chosen": -17.723630905151367, "logps/rejected": -276.9977111816406, "loss": 0.1543, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.1378140151500702, "rewards/margins": 2.555187702178955, "rewards/rejected": -2.4173736572265625, "step": 7080 }, { "epoch": 0.28, "learning_rate": 4.503917462177192e-06, "logits/chosen": -3.0192291736602783, "logits/rejected": -3.04876708984375, "logps/chosen": -5.990804672241211, "logps/rejected": -279.95068359375, "loss": 0.1153, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2583114504814148, "rewards/margins": 2.704345226287842, "rewards/rejected": -2.4460337162017822, "step": 7090 }, { "epoch": 0.28, "learning_rate": 4.501828427371834e-06, "logits/chosen": -3.015270709991455, "logits/rejected": -3.043081045150757, "logps/chosen": -2.2254786491394043, "logps/rejected": -287.16571044921875, "loss": 0.0745, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.29620710015296936, "rewards/margins": 2.8078153133392334, "rewards/rejected": -2.511608362197876, "step": 7100 }, { "epoch": 0.28, "eval_logits/chosen": -3.059053421020508, "eval_logits/rejected": -3.0863683223724365, "eval_logps/chosen": -0.17820331454277039, "eval_logps/rejected": -279.1595458984375, "eval_loss": 0.06439804285764694, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31481748819351196, "eval_rewards/margins": 2.7462573051452637, "eval_rewards/rejected": -2.4314398765563965, "eval_runtime": 2.5359, "eval_samples_per_second": 1.972, "eval_steps_per_second": 0.394, "step": 7100 }, { "epoch": 0.28, "learning_rate": 4.499735489899524e-06, "logits/chosen": -3.0352320671081543, "logits/rejected": -3.0632450580596924, "logps/chosen": -0.2230338603258133, "logps/rejected": -286.24725341796875, "loss": 0.06, "rewards/accuracies": 1.0, "rewards/chosen": 0.3153129816055298, "rewards/margins": 2.820810317993164, "rewards/rejected": -2.505497694015503, "step": 7110 }, { "epoch": 0.28, "learning_rate": 4.49763865384055e-06, "logits/chosen": -3.017930507659912, "logits/rejected": -3.0442910194396973, "logps/chosen": -6.693168640136719, "logps/rejected": -279.8658752441406, "loss": 0.1227, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.251553475856781, "rewards/margins": 2.6944210529327393, "rewards/rejected": -2.4428677558898926, "step": 7120 }, { "epoch": 0.29, "learning_rate": 4.4955379232828014e-06, "logits/chosen": -2.989745616912842, "logits/rejected": -3.016331195831299, "logps/chosen": -3.042865037918091, "logps/rejected": -283.3672790527344, "loss": 0.0848, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2873920798301697, "rewards/margins": 2.7668118476867676, "rewards/rejected": -2.4794199466705322, "step": 7130 }, { "epoch": 0.29, "learning_rate": 4.493433302321759e-06, "logits/chosen": -3.0080442428588867, "logits/rejected": -3.036439895629883, "logps/chosen": -0.20523953437805176, "logps/rejected": -290.3081970214844, "loss": 0.0558, "rewards/accuracies": 1.0, "rewards/chosen": 0.3150704503059387, "rewards/margins": 2.8643691539764404, "rewards/rejected": -2.5492987632751465, "step": 7140 }, { "epoch": 0.29, "learning_rate": 4.491324795060491e-06, "logits/chosen": -3.0043129920959473, "logits/rejected": -3.0311119556427, "logps/chosen": -3.5831406116485596, "logps/rejected": -281.39794921875, "loss": 0.0878, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2834619879722595, "rewards/margins": 2.739326238632202, "rewards/rejected": -2.455864429473877, "step": 7150 }, { "epoch": 0.29, "learning_rate": 4.4892124056096386e-06, "logits/chosen": -3.022658348083496, "logits/rejected": -3.0498178005218506, "logps/chosen": -0.3606303334236145, "logps/rejected": -289.09197998046875, "loss": 0.0574, "rewards/accuracies": 1.0, "rewards/chosen": 0.3142373561859131, "rewards/margins": 2.8456130027770996, "rewards/rejected": -2.5313754081726074, "step": 7160 }, { "epoch": 0.29, "learning_rate": 4.487096138087415e-06, "logits/chosen": -3.031001091003418, "logits/rejected": -3.0544350147247314, "logps/chosen": -9.419936180114746, "logps/rejected": -278.2541198730469, "loss": 0.1464, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.2265067845582962, "rewards/margins": 2.6494622230529785, "rewards/rejected": -2.4229555130004883, "step": 7170 }, { "epoch": 0.29, "learning_rate": 4.4849759966195885e-06, "logits/chosen": -3.012423276901245, "logits/rejected": -3.038511276245117, "logps/chosen": -3.9333062171936035, "logps/rejected": -276.35107421875, "loss": 0.1036, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27518853545188904, "rewards/margins": 2.6876230239868164, "rewards/rejected": -2.4124343395233154, "step": 7180 }, { "epoch": 0.29, "learning_rate": 4.482851985339487e-06, "logits/chosen": -3.026526927947998, "logits/rejected": -3.053168535232544, "logps/chosen": -6.699531555175781, "logps/rejected": -280.8331604003906, "loss": 0.1224, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.25147101283073425, "rewards/margins": 2.704495906829834, "rewards/rejected": -2.4530253410339355, "step": 7190 }, { "epoch": 0.29, "learning_rate": 4.4807241083879774e-06, "logits/chosen": -3.014824390411377, "logits/rejected": -3.0477547645568848, "logps/chosen": -5.568627834320068, "logps/rejected": -280.56005859375, "loss": 0.112, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2593812346458435, "rewards/margins": 2.7166144847869873, "rewards/rejected": -2.457233428955078, "step": 7200 }, { "epoch": 0.29, "eval_logits/chosen": -3.0601236820220947, "eval_logits/rejected": -3.0869953632354736, "eval_logps/chosen": -0.1356256902217865, "eval_logps/rejected": -280.36578369140625, "eval_loss": 0.06335752457380295, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31524327397346497, "eval_rewards/margins": 2.7587454319000244, "eval_rewards/rejected": -2.443502426147461, "eval_runtime": 2.5403, "eval_samples_per_second": 1.968, "eval_steps_per_second": 0.394, "step": 7200 }, { "epoch": 0.29, "learning_rate": 4.478592369913464e-06, "logits/chosen": -3.013762950897217, "logits/rejected": -3.0422451496124268, "logps/chosen": -10.866480827331543, "logps/rejected": -277.2164306640625, "loss": 0.1635, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.20901116728782654, "rewards/margins": 2.629044532775879, "rewards/rejected": -2.4200334548950195, "step": 7210 }, { "epoch": 0.29, "learning_rate": 4.476456774071883e-06, "logits/chosen": -3.005096197128296, "logits/rejected": -3.0299594402313232, "logps/chosen": -7.313652992248535, "logps/rejected": -276.2112731933594, "loss": 0.1296, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24298541247844696, "rewards/margins": 2.653797149658203, "rewards/rejected": -2.41081166267395, "step": 7220 }, { "epoch": 0.29, "learning_rate": 4.474317325026685e-06, "logits/chosen": -3.0358126163482666, "logits/rejected": -3.0621910095214844, "logps/chosen": -9.312654495239258, "logps/rejected": -278.196044921875, "loss": 0.1315, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.224706768989563, "rewards/margins": 2.6522629261016846, "rewards/rejected": -2.427556037902832, "step": 7230 }, { "epoch": 0.29, "learning_rate": 4.472174026948836e-06, "logits/chosen": -3.042165756225586, "logits/rejected": -3.069568157196045, "logps/chosen": -0.23928102850914001, "logps/rejected": -285.7279052734375, "loss": 0.0592, "rewards/accuracies": 1.0, "rewards/chosen": 0.3112562596797943, "rewards/margins": 2.815361261367798, "rewards/rejected": -2.5041048526763916, "step": 7240 }, { "epoch": 0.29, "learning_rate": 4.470026884016805e-06, "logits/chosen": -3.0385591983795166, "logits/rejected": -3.065636157989502, "logps/chosen": -3.0345699787139893, "logps/rejected": -285.69146728515625, "loss": 0.0714, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28741681575775146, "rewards/margins": 2.7913849353790283, "rewards/rejected": -2.5039682388305664, "step": 7250 }, { "epoch": 0.29, "learning_rate": 4.467875900416558e-06, "logits/chosen": -3.0161733627319336, "logits/rejected": -3.0404465198516846, "logps/chosen": -3.702500581741333, "logps/rejected": -282.15869140625, "loss": 0.0937, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28014296293258667, "rewards/margins": 2.7461647987365723, "rewards/rejected": -2.466021776199341, "step": 7260 }, { "epoch": 0.29, "learning_rate": 4.465721080341547e-06, "logits/chosen": -3.0049099922180176, "logits/rejected": -3.033806324005127, "logps/chosen": -0.2608969211578369, "logps/rejected": -287.6667785644531, "loss": 0.0578, "rewards/accuracies": 1.0, "rewards/chosen": 0.3142232298851013, "rewards/margins": 2.836409091949463, "rewards/rejected": -2.522185802459717, "step": 7270 }, { "epoch": 0.29, "learning_rate": 4.463562427992705e-06, "logits/chosen": -3.009425401687622, "logits/rejected": -3.0369558334350586, "logps/chosen": -2.9739537239074707, "logps/rejected": -286.2569274902344, "loss": 0.0841, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2879364788532257, "rewards/margins": 2.79599666595459, "rewards/rejected": -2.5080604553222656, "step": 7280 }, { "epoch": 0.29, "learning_rate": 4.461399947578434e-06, "logits/chosen": -3.0093865394592285, "logits/rejected": -3.0399465560913086, "logps/chosen": -6.054053783416748, "logps/rejected": -284.29638671875, "loss": 0.103, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2579701244831085, "rewards/margins": 2.7448489665985107, "rewards/rejected": -2.486879587173462, "step": 7290 }, { "epoch": 0.29, "learning_rate": 4.4592336433146e-06, "logits/chosen": -3.008512020111084, "logits/rejected": -3.0379371643066406, "logps/chosen": -0.840154767036438, "logps/rejected": -290.5660095214844, "loss": 0.0566, "rewards/accuracies": 1.0, "rewards/chosen": 0.30891817808151245, "rewards/margins": 2.8604211807250977, "rewards/rejected": -2.5515029430389404, "step": 7300 }, { "epoch": 0.29, "eval_logits/chosen": -3.0649824142456055, "eval_logits/rejected": -3.08880615234375, "eval_logps/chosen": -0.11585085093975067, "eval_logps/rejected": -280.7962951660156, "eval_loss": 0.06301724165678024, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3154410421848297, "eval_rewards/margins": 2.7632482051849365, "eval_rewards/rejected": -2.4478070735931396, "eval_runtime": 2.5379, "eval_samples_per_second": 1.97, "eval_steps_per_second": 0.394, "step": 7300 }, { "epoch": 0.29, "learning_rate": 4.457063519424525e-06, "logits/chosen": -3.0137665271759033, "logits/rejected": -3.0397725105285645, "logps/chosen": -3.1118853092193604, "logps/rejected": -283.88818359375, "loss": 0.0875, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2852526307106018, "rewards/margins": 2.7696566581726074, "rewards/rejected": -2.4844038486480713, "step": 7310 }, { "epoch": 0.29, "learning_rate": 4.4548895801389755e-06, "logits/chosen": -3.0176539421081543, "logits/rejected": -3.044365644454956, "logps/chosen": -5.998173236846924, "logps/rejected": -279.68572998046875, "loss": 0.1129, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.25809699296951294, "rewards/margins": 2.697303295135498, "rewards/rejected": -2.439206600189209, "step": 7320 }, { "epoch": 0.29, "learning_rate": 4.452711829696158e-06, "logits/chosen": -2.9796807765960693, "logits/rejected": -3.004234790802002, "logps/chosen": -5.798430919647217, "logps/rejected": -282.01483154296875, "loss": 0.1077, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.25889381766319275, "rewards/margins": 2.7237014770507812, "rewards/rejected": -2.4648075103759766, "step": 7330 }, { "epoch": 0.29, "learning_rate": 4.45053027234171e-06, "logits/chosen": -3.0491721630096436, "logits/rejected": -3.0741920471191406, "logps/chosen": -0.20453906059265137, "logps/rejected": -291.5619812011719, "loss": 0.0553, "rewards/accuracies": 1.0, "rewards/chosen": 0.3159957230091095, "rewards/margins": 2.8726675510406494, "rewards/rejected": -2.5566718578338623, "step": 7340 }, { "epoch": 0.29, "learning_rate": 4.448344912328686e-06, "logits/chosen": -3.019390821456909, "logits/rejected": -3.0465312004089355, "logps/chosen": -0.20245349407196045, "logps/rejected": -291.8934631347656, "loss": 0.0548, "rewards/accuracies": 1.0, "rewards/chosen": 0.3132074475288391, "rewards/margins": 2.878539800643921, "rewards/rejected": -2.5653324127197266, "step": 7350 }, { "epoch": 0.29, "learning_rate": 4.446155753917559e-06, "logits/chosen": -3.009413480758667, "logits/rejected": -3.0348405838012695, "logps/chosen": -0.20815546810626984, "logps/rejected": -290.0909729003906, "loss": 0.0561, "rewards/accuracies": 1.0, "rewards/chosen": 0.3144787549972534, "rewards/margins": 2.8601088523864746, "rewards/rejected": -2.5456299781799316, "step": 7360 }, { "epoch": 0.29, "learning_rate": 4.443962801376206e-06, "logits/chosen": -3.034597873687744, "logits/rejected": -3.0591189861297607, "logps/chosen": -0.21966294944286346, "logps/rejected": -288.8503112792969, "loss": 0.0571, "rewards/accuracies": 1.0, "rewards/chosen": 0.3164210915565491, "rewards/margins": 2.8474066257476807, "rewards/rejected": -2.5309855937957764, "step": 7370 }, { "epoch": 0.3, "learning_rate": 4.441766058979898e-06, "logits/chosen": -3.0069379806518555, "logits/rejected": -3.033338785171509, "logps/chosen": -8.992049217224121, "logps/rejected": -281.1443786621094, "loss": 0.1294, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.23003384470939636, "rewards/margins": 2.685431957244873, "rewards/rejected": -2.4553980827331543, "step": 7380 }, { "epoch": 0.3, "learning_rate": 4.439565531011299e-06, "logits/chosen": -3.008774757385254, "logits/rejected": -3.0360922813415527, "logps/chosen": -2.5592219829559326, "logps/rejected": -290.1780090332031, "loss": 0.0634, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2931903004646301, "rewards/margins": 2.840549945831299, "rewards/rejected": -2.5473597049713135, "step": 7390 }, { "epoch": 0.3, "learning_rate": 4.437361221760449e-06, "logits/chosen": -3.0299830436706543, "logits/rejected": -3.0547571182250977, "logps/chosen": -2.3193018436431885, "logps/rejected": -287.4052429199219, "loss": 0.0632, "rewards/accuracies": 1.0, "rewards/chosen": 0.29224663972854614, "rewards/margins": 2.8144803047180176, "rewards/rejected": -2.522233486175537, "step": 7400 }, { "epoch": 0.3, "eval_logits/chosen": -3.0646960735321045, "eval_logits/rejected": -3.0851635932922363, "eval_logps/chosen": -0.156338170170784, "eval_logps/rejected": -280.6742858886719, "eval_loss": 0.06316900253295898, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3150361478328705, "eval_rewards/margins": 2.7616238594055176, "eval_rewards/rejected": -2.446587324142456, "eval_runtime": 2.5414, "eval_samples_per_second": 1.967, "eval_steps_per_second": 0.393, "step": 7400 }, { "epoch": 0.3, "learning_rate": 4.4351531355247634e-06, "logits/chosen": -3.0058391094207764, "logits/rejected": -3.028214693069458, "logps/chosen": -6.574278831481934, "logps/rejected": -287.753662109375, "loss": 0.1015, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.25180643796920776, "rewards/margins": 2.7758030891418457, "rewards/rejected": -2.523996591567993, "step": 7410 }, { "epoch": 0.3, "learning_rate": 4.432941276609018e-06, "logits/chosen": -3.0295209884643555, "logits/rejected": -3.050924062728882, "logps/chosen": -6.207491874694824, "logps/rejected": -286.0004577636719, "loss": 0.076, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2540804445743561, "rewards/margins": 2.761577606201172, "rewards/rejected": -2.5074970722198486, "step": 7420 }, { "epoch": 0.3, "learning_rate": 4.430725649325346e-06, "logits/chosen": -2.9999659061431885, "logits/rejected": -3.02959942817688, "logps/chosen": -2.5313313007354736, "logps/rejected": -287.2167663574219, "loss": 0.0684, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2899082899093628, "rewards/margins": 2.8087167739868164, "rewards/rejected": -2.518808126449585, "step": 7430 }, { "epoch": 0.3, "learning_rate": 4.428506257993226e-06, "logits/chosen": -3.008122205734253, "logits/rejected": -3.036853313446045, "logps/chosen": -0.2166595160961151, "logps/rejected": -289.02020263671875, "loss": 0.0568, "rewards/accuracies": 1.0, "rewards/chosen": 0.313212126493454, "rewards/margins": 2.849079132080078, "rewards/rejected": -2.5358669757843018, "step": 7440 }, { "epoch": 0.3, "learning_rate": 4.426283106939474e-06, "logits/chosen": -3.0236709117889404, "logits/rejected": -3.0529556274414062, "logps/chosen": -0.19414207339286804, "logps/rejected": -287.9114074707031, "loss": 0.0578, "rewards/accuracies": 1.0, "rewards/chosen": 0.3158376216888428, "rewards/margins": 2.8362503051757812, "rewards/rejected": -2.5204126834869385, "step": 7450 }, { "epoch": 0.3, "learning_rate": 4.424056200498237e-06, "logits/chosen": -3.027188777923584, "logits/rejected": -3.051544427871704, "logps/chosen": -5.177372455596924, "logps/rejected": -284.54083251953125, "loss": 0.0942, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2684333324432373, "rewards/margins": 2.758702039718628, "rewards/rejected": -2.4902689456939697, "step": 7460 }, { "epoch": 0.3, "learning_rate": 4.421825543010983e-06, "logits/chosen": -2.985415458679199, "logits/rejected": -3.0179619789123535, "logps/chosen": -0.16816648840904236, "logps/rejected": -288.78680419921875, "loss": 0.057, "rewards/accuracies": 1.0, "rewards/chosen": 0.3148905634880066, "rewards/margins": 2.8475778102874756, "rewards/rejected": -2.532686948776245, "step": 7470 }, { "epoch": 0.3, "learning_rate": 4.419591138826495e-06, "logits/chosen": -3.0144920349121094, "logits/rejected": -3.0450167655944824, "logps/chosen": -1.8679510354995728, "logps/rejected": -285.1022033691406, "loss": 0.0748, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.29757195711135864, "rewards/margins": 2.7932040691375732, "rewards/rejected": -2.4956321716308594, "step": 7480 }, { "epoch": 0.3, "learning_rate": 4.417352992300854e-06, "logits/chosen": -3.0302577018737793, "logits/rejected": -3.0572826862335205, "logps/chosen": -3.281696319580078, "logps/rejected": -287.0039978027344, "loss": 0.0869, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28468233346939087, "rewards/margins": 2.8001208305358887, "rewards/rejected": -2.5154383182525635, "step": 7490 }, { "epoch": 0.3, "learning_rate": 4.415111107797445e-06, "logits/chosen": -3.000828266143799, "logits/rejected": -3.0292396545410156, "logps/chosen": -5.772553443908691, "logps/rejected": -283.12432861328125, "loss": 0.1134, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2603955864906311, "rewards/margins": 2.7377724647521973, "rewards/rejected": -2.477376699447632, "step": 7500 }, { "epoch": 0.3, "eval_logits/chosen": -3.0592215061187744, "eval_logits/rejected": -3.0884835720062256, "eval_logps/chosen": -0.1470063179731369, "eval_logps/rejected": -281.2076110839844, "eval_loss": 0.06260915845632553, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31512945890426636, "eval_rewards/margins": 2.767049789428711, "eval_rewards/rejected": -2.4519200325012207, "eval_runtime": 2.5429, "eval_samples_per_second": 1.966, "eval_steps_per_second": 0.393, "step": 7500 }, { "epoch": 0.3, "learning_rate": 4.412865489686936e-06, "logits/chosen": -3.0331413745880127, "logits/rejected": -3.0615525245666504, "logps/chosen": -3.855762004852295, "logps/rejected": -279.81207275390625, "loss": 0.1003, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.277926504611969, "rewards/margins": 2.720366954803467, "rewards/rejected": -2.4424405097961426, "step": 7510 }, { "epoch": 0.3, "learning_rate": 4.4106161423472726e-06, "logits/chosen": -3.013096570968628, "logits/rejected": -3.044609546661377, "logps/chosen": -0.188820019364357, "logps/rejected": -288.97137451171875, "loss": 0.0569, "rewards/accuracies": 1.0, "rewards/chosen": 0.31332170963287354, "rewards/margins": 2.847641706466675, "rewards/rejected": -2.534320116043091, "step": 7520 }, { "epoch": 0.3, "learning_rate": 4.408363070163675e-06, "logits/chosen": -3.0392589569091797, "logits/rejected": -3.069523811340332, "logps/chosen": -0.18037372827529907, "logps/rejected": -287.79400634765625, "loss": 0.0578, "rewards/accuracies": 1.0, "rewards/chosen": 0.3161497712135315, "rewards/margins": 2.836453676223755, "rewards/rejected": -2.520303964614868, "step": 7530 }, { "epoch": 0.3, "learning_rate": 4.40610627752862e-06, "logits/chosen": -3.029813766479492, "logits/rejected": -3.059356927871704, "logps/chosen": -0.30074065923690796, "logps/rejected": -287.65374755859375, "loss": 0.0583, "rewards/accuracies": 1.0, "rewards/chosen": 0.31165263056755066, "rewards/margins": 2.8314311504364014, "rewards/rejected": -2.5197787284851074, "step": 7540 }, { "epoch": 0.3, "learning_rate": 4.403845768841842e-06, "logits/chosen": -2.97127103805542, "logits/rejected": -3.005615472793579, "logps/chosen": -0.22427527606487274, "logps/rejected": -286.1227111816406, "loss": 0.0589, "rewards/accuracies": 1.0, "rewards/chosen": 0.3119141161441803, "rewards/margins": 2.8200955390930176, "rewards/rejected": -2.508181095123291, "step": 7550 }, { "epoch": 0.3, "learning_rate": 4.401581548510319e-06, "logits/chosen": -3.022850751876831, "logits/rejected": -3.0491456985473633, "logps/chosen": -5.553562641143799, "logps/rejected": -281.31207275390625, "loss": 0.1088, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.26220327615737915, "rewards/margins": 2.723283052444458, "rewards/rejected": -2.4610798358917236, "step": 7560 }, { "epoch": 0.3, "learning_rate": 4.399313620948262e-06, "logits/chosen": -3.0092358589172363, "logits/rejected": -3.0407872200012207, "logps/chosen": -0.16817566752433777, "logps/rejected": -290.6370544433594, "loss": 0.0558, "rewards/accuracies": 1.0, "rewards/chosen": 0.318454772233963, "rewards/margins": 2.8644204139709473, "rewards/rejected": -2.5459656715393066, "step": 7570 }, { "epoch": 0.3, "learning_rate": 4.3970419905771145e-06, "logits/chosen": -2.979546070098877, "logits/rejected": -3.0099425315856934, "logps/chosen": -3.764805555343628, "logps/rejected": -288.53369140625, "loss": 0.0901, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27752599120140076, "rewards/margins": 2.8102428913116455, "rewards/rejected": -2.532716989517212, "step": 7580 }, { "epoch": 0.3, "learning_rate": 4.3947666618255335e-06, "logits/chosen": -3.0280356407165527, "logits/rejected": -3.053832530975342, "logps/chosen": -0.23698779940605164, "logps/rejected": -289.9613342285156, "loss": 0.0561, "rewards/accuracies": 1.0, "rewards/chosen": 0.3153940439224243, "rewards/margins": 2.861375570297241, "rewards/rejected": -2.5459816455841064, "step": 7590 }, { "epoch": 0.3, "learning_rate": 4.3924876391293915e-06, "logits/chosen": -3.0168216228485107, "logits/rejected": -3.0446274280548096, "logps/chosen": -2.691309690475464, "logps/rejected": -288.4864196777344, "loss": 0.0802, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.29166680574417114, "rewards/margins": 2.8173584938049316, "rewards/rejected": -2.525691509246826, "step": 7600 }, { "epoch": 0.3, "eval_logits/chosen": -3.0624380111694336, "eval_logits/rejected": -3.0896475315093994, "eval_logps/chosen": -0.131982684135437, "eval_logps/rejected": -281.33758544921875, "eval_loss": 0.06257272511720657, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3152797222137451, "eval_rewards/margins": 2.7684996128082275, "eval_rewards/rejected": -2.4532198905944824, "eval_runtime": 2.5377, "eval_samples_per_second": 1.97, "eval_steps_per_second": 0.394, "step": 7600 }, { "epoch": 0.3, "learning_rate": 4.3902049269317585e-06, "logits/chosen": -3.0246734619140625, "logits/rejected": -3.052237033843994, "logps/chosen": -0.20475634932518005, "logps/rejected": -286.98883056640625, "loss": 0.0582, "rewards/accuracies": 1.0, "rewards/chosen": 0.3166448473930359, "rewards/margins": 2.8319320678710938, "rewards/rejected": -2.515287160873413, "step": 7610 }, { "epoch": 0.3, "learning_rate": 4.387918529682898e-06, "logits/chosen": -2.981903553009033, "logits/rejected": -3.0143277645111084, "logps/chosen": -10.789712905883789, "logps/rejected": -279.9483947753906, "loss": 0.1605, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.20618680119514465, "rewards/margins": 2.6586263179779053, "rewards/rejected": -2.452439546585083, "step": 7620 }, { "epoch": 0.31, "learning_rate": 4.38562845184026e-06, "logits/chosen": -3.0256474018096924, "logits/rejected": -3.0549850463867188, "logps/chosen": -3.870032548904419, "logps/rejected": -285.9415588378906, "loss": 0.0928, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2814982235431671, "rewards/margins": 2.7824230194091797, "rewards/rejected": -2.500924587249756, "step": 7630 }, { "epoch": 0.31, "learning_rate": 4.383334697868468e-06, "logits/chosen": -3.005939483642578, "logits/rejected": -3.0339207649230957, "logps/chosen": -3.8760924339294434, "logps/rejected": -285.02032470703125, "loss": 0.093, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2783811390399933, "rewards/margins": 2.7746615409851074, "rewards/rejected": -2.4962804317474365, "step": 7640 }, { "epoch": 0.31, "learning_rate": 4.381037272239311e-06, "logits/chosen": -3.0356433391571045, "logits/rejected": -3.0630974769592285, "logps/chosen": -0.22952798008918762, "logps/rejected": -288.86883544921875, "loss": 0.0569, "rewards/accuracies": 1.0, "rewards/chosen": 0.31489425897598267, "rewards/margins": 2.8486487865448, "rewards/rejected": -2.533754825592041, "step": 7650 }, { "epoch": 0.31, "learning_rate": 4.3787361794317405e-06, "logits/chosen": -2.9965035915374756, "logits/rejected": -3.0263218879699707, "logps/chosen": -0.6003082990646362, "logps/rejected": -289.49493408203125, "loss": 0.0567, "rewards/accuracies": 1.0, "rewards/chosen": 0.3121141195297241, "rewards/margins": 2.8518824577331543, "rewards/rejected": -2.5397682189941406, "step": 7660 }, { "epoch": 0.31, "learning_rate": 4.3764314239318534e-06, "logits/chosen": -3.01352858543396, "logits/rejected": -3.0446834564208984, "logps/chosen": -3.422391414642334, "logps/rejected": -286.31854248046875, "loss": 0.0862, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28231143951416016, "rewards/margins": 2.7942354679107666, "rewards/rejected": -2.5119240283966064, "step": 7670 }, { "epoch": 0.31, "learning_rate": 4.374123010232888e-06, "logits/chosen": -3.0036120414733887, "logits/rejected": -3.0365962982177734, "logps/chosen": -0.20604737102985382, "logps/rejected": -290.4273986816406, "loss": 0.0558, "rewards/accuracies": 1.0, "rewards/chosen": 0.31468966603279114, "rewards/margins": 2.8639485836029053, "rewards/rejected": -2.5492587089538574, "step": 7680 }, { "epoch": 0.31, "learning_rate": 4.3718109428352155e-06, "logits/chosen": -3.011878728866577, "logits/rejected": -3.041116714477539, "logps/chosen": -6.53342342376709, "logps/rejected": -283.59124755859375, "loss": 0.1186, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2541262209415436, "rewards/margins": 2.7343811988830566, "rewards/rejected": -2.480255126953125, "step": 7690 }, { "epoch": 0.31, "learning_rate": 4.36949522624633e-06, "logits/chosen": -3.002983808517456, "logits/rejected": -3.0362462997436523, "logps/chosen": -0.23992709815502167, "logps/rejected": -287.99713134765625, "loss": 0.0577, "rewards/accuracies": 1.0, "rewards/chosen": 0.31469619274139404, "rewards/margins": 2.837397575378418, "rewards/rejected": -2.5227015018463135, "step": 7700 }, { "epoch": 0.31, "eval_logits/chosen": -3.0603606700897217, "eval_logits/rejected": -3.0868396759033203, "eval_logps/chosen": -0.14503708481788635, "eval_logps/rejected": -245.5485382080078, "eval_loss": 0.1374204009771347, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3151491582393646, "eval_rewards/margins": 2.4104785919189453, "eval_rewards/rejected": -2.095329761505127, "eval_runtime": 2.5395, "eval_samples_per_second": 1.969, "eval_steps_per_second": 0.394, "step": 7700 }, { "epoch": 0.31, "learning_rate": 4.36717586498084e-06, "logits/chosen": -3.025954008102417, "logits/rejected": -3.054868698120117, "logps/chosen": -3.77653431892395, "logps/rejected": -282.7917785644531, "loss": 0.0964, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2789495587348938, "rewards/margins": 2.7560153007507324, "rewards/rejected": -2.4770658016204834, "step": 7710 }, { "epoch": 0.31, "learning_rate": 4.364852863560456e-06, "logits/chosen": -3.021491050720215, "logits/rejected": -3.0557496547698975, "logps/chosen": -6.824210166931152, "logps/rejected": -283.78875732421875, "loss": 0.1214, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.25147876143455505, "rewards/margins": 2.7334372997283936, "rewards/rejected": -2.4819588661193848, "step": 7720 }, { "epoch": 0.31, "learning_rate": 4.362526226513991e-06, "logits/chosen": -3.0170459747314453, "logits/rejected": -3.0488429069519043, "logps/chosen": -1.9890756607055664, "logps/rejected": -287.61492919921875, "loss": 0.0671, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.29687774181365967, "rewards/margins": 2.817232847213745, "rewards/rejected": -2.520354747772217, "step": 7730 }, { "epoch": 0.31, "learning_rate": 4.3601959583773415e-06, "logits/chosen": -3.016472816467285, "logits/rejected": -3.0480704307556152, "logps/chosen": -8.048700332641602, "logps/rejected": -279.92138671875, "loss": 0.1248, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2362148016691208, "rewards/margins": 2.680874824523926, "rewards/rejected": -2.444659948348999, "step": 7740 }, { "epoch": 0.31, "learning_rate": 4.357862063693486e-06, "logits/chosen": -3.019310474395752, "logits/rejected": -3.049877882003784, "logps/chosen": -0.2907094955444336, "logps/rejected": -289.30712890625, "loss": 0.0566, "rewards/accuracies": 1.0, "rewards/chosen": 0.3152601718902588, "rewards/margins": 2.85465669631958, "rewards/rejected": -2.539396286010742, "step": 7750 }, { "epoch": 0.31, "learning_rate": 4.355524547012471e-06, "logits/chosen": -3.0062129497528076, "logits/rejected": -3.036728858947754, "logps/chosen": -7.172127723693848, "logps/rejected": -284.37408447265625, "loss": 0.1242, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24120593070983887, "rewards/margins": 2.7347092628479004, "rewards/rejected": -2.4935030937194824, "step": 7760 }, { "epoch": 0.31, "learning_rate": 4.353183412891403e-06, "logits/chosen": -3.0516180992126465, "logits/rejected": -3.08355450630188, "logps/chosen": -0.21175889670848846, "logps/rejected": -289.5698547363281, "loss": 0.0562, "rewards/accuracies": 1.0, "rewards/chosen": 0.3155624568462372, "rewards/margins": 2.858109712600708, "rewards/rejected": -2.5425469875335693, "step": 7770 }, { "epoch": 0.31, "learning_rate": 4.3508386658944455e-06, "logits/chosen": -3.020514726638794, "logits/rejected": -3.0497374534606934, "logps/chosen": -1.6621907949447632, "logps/rejected": -284.1875305175781, "loss": 0.0657, "rewards/accuracies": 1.0, "rewards/chosen": 0.3015104830265045, "rewards/margins": 2.785940170288086, "rewards/rejected": -2.4844295978546143, "step": 7780 }, { "epoch": 0.31, "learning_rate": 4.348490310592801e-06, "logits/chosen": -3.0085697174072266, "logits/rejected": -3.0388941764831543, "logps/chosen": -2.468095064163208, "logps/rejected": -285.6866455078125, "loss": 0.0789, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2926560342311859, "rewards/margins": 2.792243719100952, "rewards/rejected": -2.4995875358581543, "step": 7790 }, { "epoch": 0.31, "learning_rate": 4.346138351564711e-06, "logits/chosen": -3.02864146232605, "logits/rejected": -3.0590591430664062, "logps/chosen": -9.392985343933105, "logps/rejected": -278.06854248046875, "loss": 0.1363, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2237921506166458, "rewards/margins": 2.65079927444458, "rewards/rejected": -2.4270074367523193, "step": 7800 }, { "epoch": 0.31, "eval_logits/chosen": -3.06512451171875, "eval_logits/rejected": -3.091974973678589, "eval_logps/chosen": -0.16365018486976624, "eval_logps/rejected": -281.36737060546875, "eval_loss": 0.06263256072998047, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31496304273605347, "eval_rewards/margins": 2.7684810161590576, "eval_rewards/rejected": -2.4535179138183594, "eval_runtime": 2.5434, "eval_samples_per_second": 1.966, "eval_steps_per_second": 0.393, "step": 7800 }, { "epoch": 0.31, "learning_rate": 4.343782793395435e-06, "logits/chosen": -3.0194382667541504, "logits/rejected": -3.047722578048706, "logps/chosen": -5.847398281097412, "logps/rejected": -283.4359130859375, "loss": 0.0984, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.25661906599998474, "rewards/margins": 2.7406792640686035, "rewards/rejected": -2.484060525894165, "step": 7810 }, { "epoch": 0.31, "learning_rate": 4.341423640677259e-06, "logits/chosen": -2.988325357437134, "logits/rejected": -3.0206289291381836, "logps/chosen": -0.21904389560222626, "logps/rejected": -290.58587646484375, "loss": 0.0557, "rewards/accuracies": 1.0, "rewards/chosen": 0.3147842288017273, "rewards/margins": 2.866633892059326, "rewards/rejected": -2.551849842071533, "step": 7820 }, { "epoch": 0.31, "learning_rate": 4.339060898009469e-06, "logits/chosen": -3.039980888366699, "logits/rejected": -3.0689263343811035, "logps/chosen": -0.2703496217727661, "logps/rejected": -289.1882019042969, "loss": 0.0568, "rewards/accuracies": 1.0, "rewards/chosen": 0.31429779529571533, "rewards/margins": 2.8510375022888184, "rewards/rejected": -2.5367398262023926, "step": 7830 }, { "epoch": 0.31, "learning_rate": 4.336694569998354e-06, "logits/chosen": -3.0164599418640137, "logits/rejected": -3.0442585945129395, "logps/chosen": -4.061544418334961, "logps/rejected": -285.739990234375, "loss": 0.0928, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27728351950645447, "rewards/margins": 2.7808287143707275, "rewards/rejected": -2.503545045852661, "step": 7840 }, { "epoch": 0.31, "learning_rate": 4.334324661257191e-06, "logits/chosen": -3.0215904712677, "logits/rejected": -3.0533881187438965, "logps/chosen": -3.3063735961914062, "logps/rejected": -286.475341796875, "loss": 0.0869, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28214046359062195, "rewards/margins": 2.798236131668091, "rewards/rejected": -2.5160958766937256, "step": 7850 }, { "epoch": 0.31, "learning_rate": 4.33195117640624e-06, "logits/chosen": -2.9932701587677, "logits/rejected": -3.0215182304382324, "logps/chosen": -5.774084091186523, "logps/rejected": -279.79559326171875, "loss": 0.1143, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2591385841369629, "rewards/margins": 2.70304536819458, "rewards/rejected": -2.443906784057617, "step": 7860 }, { "epoch": 0.31, "learning_rate": 4.329574120072728e-06, "logits/chosen": -3.0040152072906494, "logits/rejected": -3.0362064838409424, "logps/chosen": -6.090770721435547, "logps/rejected": -283.0311584472656, "loss": 0.1154, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.25501593947410583, "rewards/margins": 2.7290446758270264, "rewards/rejected": -2.4740288257598877, "step": 7870 }, { "epoch": 0.32, "learning_rate": 4.327193496890852e-06, "logits/chosen": -2.9992194175720215, "logits/rejected": -3.03322696685791, "logps/chosen": -0.31556209921836853, "logps/rejected": -284.04412841796875, "loss": 0.0626, "rewards/accuracies": 1.0, "rewards/chosen": 0.3116791844367981, "rewards/margins": 2.7994117736816406, "rewards/rejected": -2.487732410430908, "step": 7880 }, { "epoch": 0.32, "learning_rate": 4.3248093115017544e-06, "logits/chosen": -2.998715877532959, "logits/rejected": -3.03102970123291, "logps/chosen": -0.29486995935440063, "logps/rejected": -288.87237548828125, "loss": 0.0568, "rewards/accuracies": 1.0, "rewards/chosen": 0.3117997348308563, "rewards/margins": 2.851933002471924, "rewards/rejected": -2.540133237838745, "step": 7890 }, { "epoch": 0.32, "learning_rate": 4.322421568553529e-06, "logits/chosen": -3.008972644805908, "logits/rejected": -3.039386749267578, "logps/chosen": -7.3018012046813965, "logps/rejected": -282.7086181640625, "loss": 0.1268, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2432614266872406, "rewards/margins": 2.7159571647644043, "rewards/rejected": -2.4726955890655518, "step": 7900 }, { "epoch": 0.32, "eval_logits/chosen": -3.0626156330108643, "eval_logits/rejected": -3.091022253036499, "eval_logps/chosen": -0.1512150913476944, "eval_logps/rejected": -281.7812805175781, "eval_loss": 0.062134891748428345, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31508737802505493, "eval_rewards/margins": 2.772744655609131, "eval_rewards/rejected": -2.4576573371887207, "eval_runtime": 2.5345, "eval_samples_per_second": 1.973, "eval_steps_per_second": 0.395, "step": 7900 }, { "epoch": 0.32, "learning_rate": 4.320030272701203e-06, "logits/chosen": -3.018584728240967, "logits/rejected": -3.0474307537078857, "logps/chosen": -5.976608753204346, "logps/rejected": -282.3760681152344, "loss": 0.1146, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.25598612427711487, "rewards/margins": 2.723888397216797, "rewards/rejected": -2.467902183532715, "step": 7910 }, { "epoch": 0.32, "learning_rate": 4.31763542860673e-06, "logits/chosen": -3.0066521167755127, "logits/rejected": -3.0395219326019287, "logps/chosen": -0.19280868768692017, "logps/rejected": -289.30682373046875, "loss": 0.0566, "rewards/accuracies": 1.0, "rewards/chosen": 0.31496530771255493, "rewards/margins": 2.8512442111968994, "rewards/rejected": -2.53627872467041, "step": 7920 }, { "epoch": 0.32, "learning_rate": 4.3152370409389795e-06, "logits/chosen": -3.0178937911987305, "logits/rejected": -3.0498130321502686, "logps/chosen": -6.027385234832764, "logps/rejected": -280.75469970703125, "loss": 0.1154, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2539643943309784, "rewards/margins": 2.713449716567993, "rewards/rejected": -2.4594850540161133, "step": 7930 }, { "epoch": 0.32, "learning_rate": 4.3128351143737335e-06, "logits/chosen": -3.0345659255981445, "logits/rejected": -3.066953182220459, "logps/chosen": -1.897690773010254, "logps/rejected": -287.07525634765625, "loss": 0.0633, "rewards/accuracies": 1.0, "rewards/chosen": 0.2978537976741791, "rewards/margins": 2.81268310546875, "rewards/rejected": -2.514829635620117, "step": 7940 }, { "epoch": 0.32, "learning_rate": 4.3104296535936695e-06, "logits/chosen": -3.0189313888549805, "logits/rejected": -3.0499672889709473, "logps/chosen": -4.248266220092773, "logps/rejected": -281.99652099609375, "loss": 0.096, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27232927083969116, "rewards/margins": 2.7414608001708984, "rewards/rejected": -2.4691314697265625, "step": 7950 }, { "epoch": 0.32, "learning_rate": 4.308020663288356e-06, "logits/chosen": -3.0152790546417236, "logits/rejected": -3.0444602966308594, "logps/chosen": -3.876478910446167, "logps/rejected": -281.259033203125, "loss": 0.0957, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28065091371536255, "rewards/margins": 2.738988161087036, "rewards/rejected": -2.4583373069763184, "step": 7960 }, { "epoch": 0.32, "learning_rate": 4.305608148154242e-06, "logits/chosen": -3.0113565921783447, "logits/rejected": -3.043264627456665, "logps/chosen": -5.503933906555176, "logps/rejected": -282.4359130859375, "loss": 0.1104, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2617030143737793, "rewards/margins": 2.7308366298675537, "rewards/rejected": -2.4691338539123535, "step": 7970 }, { "epoch": 0.32, "learning_rate": 4.303192112894652e-06, "logits/chosen": -3.0246214866638184, "logits/rejected": -3.054961919784546, "logps/chosen": -3.2152256965637207, "logps/rejected": -285.565673828125, "loss": 0.087, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2847989499568939, "rewards/margins": 2.782620906829834, "rewards/rejected": -2.4978222846984863, "step": 7980 }, { "epoch": 0.32, "learning_rate": 4.3007725622197675e-06, "logits/chosen": -3.0002055168151855, "logits/rejected": -3.031437397003174, "logps/chosen": -3.973933696746826, "logps/rejected": -284.6603698730469, "loss": 0.0931, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27395275235176086, "rewards/margins": 2.7721621990203857, "rewards/rejected": -2.4982094764709473, "step": 7990 }, { "epoch": 0.32, "learning_rate": 4.2983495008466285e-06, "logits/chosen": -2.9959068298339844, "logits/rejected": -3.0277419090270996, "logps/chosen": -0.2885099947452545, "logps/rejected": -285.79852294921875, "loss": 0.0593, "rewards/accuracies": 1.0, "rewards/chosen": 0.3106728196144104, "rewards/margins": 2.8164658546447754, "rewards/rejected": -2.505793333053589, "step": 8000 }, { "epoch": 0.32, "eval_logits/chosen": -3.062608242034912, "eval_logits/rejected": -3.0912280082702637, "eval_logps/chosen": -0.13419964909553528, "eval_logps/rejected": -282.2266845703125, "eval_loss": 0.06181494519114494, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31525754928588867, "eval_rewards/margins": 2.7773690223693848, "eval_rewards/rejected": -2.462111234664917, "eval_runtime": 2.535, "eval_samples_per_second": 1.972, "eval_steps_per_second": 0.394, "step": 8000 }, { "epoch": 0.32, "learning_rate": 4.295922933499116e-06, "logits/chosen": -3.029618978500366, "logits/rejected": -3.0614144802093506, "logps/chosen": -0.6702224612236023, "logps/rejected": -287.09368896484375, "loss": 0.0593, "rewards/accuracies": 1.0, "rewards/chosen": 0.3110712170600891, "rewards/margins": 2.8282103538513184, "rewards/rejected": -2.517139196395874, "step": 8010 }, { "epoch": 0.32, "learning_rate": 4.293492864907947e-06, "logits/chosen": -2.999912977218628, "logits/rejected": -3.0300452709198, "logps/chosen": -7.332828521728516, "logps/rejected": -281.9374694824219, "loss": 0.1273, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24365773797035217, "rewards/margins": 2.7099692821502686, "rewards/rejected": -2.4663116931915283, "step": 8020 }, { "epoch": 0.32, "learning_rate": 4.291059299810665e-06, "logits/chosen": -3.0051019191741943, "logits/rejected": -3.038139820098877, "logps/chosen": -2.5250954627990723, "logps/rejected": -287.80810546875, "loss": 0.0637, "rewards/accuracies": 1.0, "rewards/chosen": 0.28897562623023987, "rewards/margins": 2.8189783096313477, "rewards/rejected": -2.5300028324127197, "step": 8030 }, { "epoch": 0.32, "learning_rate": 4.28862224295163e-06, "logits/chosen": -3.0135691165924072, "logits/rejected": -3.0424771308898926, "logps/chosen": -2.4876837730407715, "logps/rejected": -286.40325927734375, "loss": 0.0766, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2931550145149231, "rewards/margins": 2.801525592803955, "rewards/rejected": -2.5083706378936768, "step": 8040 }, { "epoch": 0.32, "learning_rate": 4.286181699082008e-06, "logits/chosen": -3.0263938903808594, "logits/rejected": -3.056776523590088, "logps/chosen": -0.20842976868152618, "logps/rejected": -292.4732360839844, "loss": 0.0544, "rewards/accuracies": 1.0, "rewards/chosen": 0.31678181886672974, "rewards/margins": 2.8835558891296387, "rewards/rejected": -2.5667741298675537, "step": 8050 }, { "epoch": 0.32, "learning_rate": 4.283737672959766e-06, "logits/chosen": -2.9939537048339844, "logits/rejected": -3.0235705375671387, "logps/chosen": -8.902975082397461, "logps/rejected": -280.50982666015625, "loss": 0.1304, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.2265726774930954, "rewards/margins": 2.6785354614257812, "rewards/rejected": -2.45196270942688, "step": 8060 }, { "epoch": 0.32, "learning_rate": 4.281290169349656e-06, "logits/chosen": -3.0104727745056152, "logits/rejected": -3.0427212715148926, "logps/chosen": -2.9770047664642334, "logps/rejected": -288.72802734375, "loss": 0.0641, "rewards/accuracies": 1.0, "rewards/chosen": 0.285952091217041, "rewards/margins": 2.8183951377868652, "rewards/rejected": -2.532442808151245, "step": 8070 }, { "epoch": 0.32, "learning_rate": 4.278839193023214e-06, "logits/chosen": -3.0286638736724854, "logits/rejected": -3.0595905780792236, "logps/chosen": -0.18124507367610931, "logps/rejected": -288.02667236328125, "loss": 0.0575, "rewards/accuracies": 1.0, "rewards/chosen": 0.31229302287101746, "rewards/margins": 2.8395562171936035, "rewards/rejected": -2.527263641357422, "step": 8080 }, { "epoch": 0.32, "learning_rate": 4.27638474875874e-06, "logits/chosen": -3.0290465354919434, "logits/rejected": -3.0561256408691406, "logps/chosen": -13.289111137390137, "logps/rejected": -273.1376953125, "loss": 0.186, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.1856473684310913, "rewards/margins": 2.561218023300171, "rewards/rejected": -2.375570774078369, "step": 8090 }, { "epoch": 0.32, "learning_rate": 4.273926841341303e-06, "logits/chosen": -3.0323429107666016, "logits/rejected": -3.0592849254608154, "logps/chosen": -10.501602172851562, "logps/rejected": -273.84197998046875, "loss": 0.1578, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.21087178587913513, "rewards/margins": 2.595773220062256, "rewards/rejected": -2.384901285171509, "step": 8100 }, { "epoch": 0.32, "eval_logits/chosen": -3.0658538341522217, "eval_logits/rejected": -3.0946455001831055, "eval_logps/chosen": -0.1229260191321373, "eval_logps/rejected": -281.64642333984375, "eval_loss": 0.06207127124071121, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31537026166915894, "eval_rewards/margins": 2.7716784477233887, "eval_rewards/rejected": -2.456308364868164, "eval_runtime": 2.5344, "eval_samples_per_second": 1.973, "eval_steps_per_second": 0.395, "step": 8100 }, { "epoch": 0.32, "learning_rate": 4.271465475562716e-06, "logits/chosen": -3.0178894996643066, "logits/rejected": -3.0496115684509277, "logps/chosen": -0.2416851967573166, "logps/rejected": -288.17828369140625, "loss": 0.0574, "rewards/accuracies": 1.0, "rewards/chosen": 0.3113313615322113, "rewards/margins": 2.8401663303375244, "rewards/rejected": -2.528834819793701, "step": 8110 }, { "epoch": 0.32, "learning_rate": 4.269000656221539e-06, "logits/chosen": -3.0066001415252686, "logits/rejected": -3.0403316020965576, "logps/chosen": -1.4313130378723145, "logps/rejected": -285.1078186035156, "loss": 0.0692, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.3020394742488861, "rewards/margins": 2.799365520477295, "rewards/rejected": -2.497325897216797, "step": 8120 }, { "epoch": 0.33, "learning_rate": 4.266532388123063e-06, "logits/chosen": -3.018533945083618, "logits/rejected": -3.051710605621338, "logps/chosen": -0.22839811444282532, "logps/rejected": -286.2483825683594, "loss": 0.0591, "rewards/accuracies": 1.0, "rewards/chosen": 0.31522053480148315, "rewards/margins": 2.819514513015747, "rewards/rejected": -2.504293918609619, "step": 8130 }, { "epoch": 0.33, "learning_rate": 4.264060676079302e-06, "logits/chosen": -3.0283305644989014, "logits/rejected": -3.0612168312072754, "logps/chosen": -1.2813503742218018, "logps/rejected": -284.17510986328125, "loss": 0.0684, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.30288246273994446, "rewards/margins": 2.7916042804718018, "rewards/rejected": -2.4887218475341797, "step": 8140 }, { "epoch": 0.33, "learning_rate": 4.261585524908987e-06, "logits/chosen": -3.003441333770752, "logits/rejected": -3.0371720790863037, "logps/chosen": -0.24574688076972961, "logps/rejected": -290.7889709472656, "loss": 0.0556, "rewards/accuracies": 1.0, "rewards/chosen": 0.31507039070129395, "rewards/margins": 2.8680758476257324, "rewards/rejected": -2.5530054569244385, "step": 8150 }, { "epoch": 0.33, "learning_rate": 4.259106939437551e-06, "logits/chosen": -3.02311635017395, "logits/rejected": -3.052551746368408, "logps/chosen": -0.31633976101875305, "logps/rejected": -288.75982666015625, "loss": 0.0571, "rewards/accuracies": 1.0, "rewards/chosen": 0.31197649240493774, "rewards/margins": 2.8456180095672607, "rewards/rejected": -2.5336413383483887, "step": 8160 }, { "epoch": 0.33, "learning_rate": 4.256624924497124e-06, "logits/chosen": -3.0265748500823975, "logits/rejected": -3.056164503097534, "logps/chosen": -4.3442463874816895, "logps/rejected": -282.9743347167969, "loss": 0.0997, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2749686539173126, "rewards/margins": 2.746640682220459, "rewards/rejected": -2.471672296524048, "step": 8170 }, { "epoch": 0.33, "learning_rate": 4.254139484926519e-06, "logits/chosen": -3.0080761909484863, "logits/rejected": -3.0358948707580566, "logps/chosen": -11.249069213867188, "logps/rejected": -280.0832824707031, "loss": 0.1289, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.20630034804344177, "rewards/margins": 2.6489994525909424, "rewards/rejected": -2.4426989555358887, "step": 8180 }, { "epoch": 0.33, "learning_rate": 4.25165062557123e-06, "logits/chosen": -3.0086286067962646, "logits/rejected": -3.037066698074341, "logps/chosen": -4.187119483947754, "logps/rejected": -285.1722106933594, "loss": 0.0932, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27426761388778687, "rewards/margins": 2.7725555896759033, "rewards/rejected": -2.4982876777648926, "step": 8190 }, { "epoch": 0.33, "learning_rate": 4.249158351283414e-06, "logits/chosen": -3.053661584854126, "logits/rejected": -3.0794758796691895, "logps/chosen": -7.117027282714844, "logps/rejected": -278.56756591796875, "loss": 0.1307, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24686793982982635, "rewards/margins": 2.677532911300659, "rewards/rejected": -2.4306652545928955, "step": 8200 }, { "epoch": 0.33, "eval_logits/chosen": -3.069944143295288, "eval_logits/rejected": -3.0971267223358154, "eval_logps/chosen": -0.13105586171150208, "eval_logps/rejected": -281.8946228027344, "eval_loss": 0.06201434135437012, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3152889609336853, "eval_rewards/margins": 2.7740795612335205, "eval_rewards/rejected": -2.4587905406951904, "eval_runtime": 2.5363, "eval_samples_per_second": 1.971, "eval_steps_per_second": 0.394, "step": 8200 }, { "epoch": 0.33, "learning_rate": 4.246662666921888e-06, "logits/chosen": -3.006622076034546, "logits/rejected": -3.0365962982177734, "logps/chosen": -0.20696131885051727, "logps/rejected": -291.5624694824219, "loss": 0.0551, "rewards/accuracies": 1.0, "rewards/chosen": 0.3176666498184204, "rewards/margins": 2.874582529067993, "rewards/rejected": -2.5569159984588623, "step": 8210 }, { "epoch": 0.33, "learning_rate": 4.244163577352116e-06, "logits/chosen": -3.039670705795288, "logits/rejected": -3.068167209625244, "logps/chosen": -0.16925418376922607, "logps/rejected": -287.41375732421875, "loss": 0.061, "rewards/accuracies": 1.0, "rewards/chosen": 0.3136782944202423, "rewards/margins": 2.8341116905212402, "rewards/rejected": -2.520433187484741, "step": 8220 }, { "epoch": 0.33, "learning_rate": 4.241661087446202e-06, "logits/chosen": -3.031627655029297, "logits/rejected": -3.057117462158203, "logps/chosen": -2.4648070335388184, "logps/rejected": -288.20452880859375, "loss": 0.0743, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.293587327003479, "rewards/margins": 2.8185977935791016, "rewards/rejected": -2.525010585784912, "step": 8230 }, { "epoch": 0.33, "learning_rate": 4.239155202082878e-06, "logits/chosen": -3.0055079460144043, "logits/rejected": -3.0346875190734863, "logps/chosen": -1.760890245437622, "logps/rejected": -286.859375, "loss": 0.0741, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.30106833577156067, "rewards/margins": 2.812706470489502, "rewards/rejected": -2.5116379261016846, "step": 8240 }, { "epoch": 0.33, "learning_rate": 4.236645926147493e-06, "logits/chosen": -3.0279932022094727, "logits/rejected": -3.055891990661621, "logps/chosen": -0.14484158158302307, "logps/rejected": -289.632080078125, "loss": 0.0564, "rewards/accuracies": 1.0, "rewards/chosen": 0.31583303213119507, "rewards/margins": 2.8568034172058105, "rewards/rejected": -2.5409703254699707, "step": 8250 }, { "epoch": 0.33, "learning_rate": 4.234133264532012e-06, "logits/chosen": -3.034210681915283, "logits/rejected": -3.0631816387176514, "logps/chosen": -0.22756648063659668, "logps/rejected": -287.9378967285156, "loss": 0.0608, "rewards/accuracies": 1.0, "rewards/chosen": 0.3151630461215973, "rewards/margins": 2.839789867401123, "rewards/rejected": -2.5246264934539795, "step": 8260 }, { "epoch": 0.33, "learning_rate": 4.231617222134997e-06, "logits/chosen": -3.008037567138672, "logits/rejected": -3.0387911796569824, "logps/chosen": -0.21012946963310242, "logps/rejected": -289.8028869628906, "loss": 0.0561, "rewards/accuracies": 1.0, "rewards/chosen": 0.3172453045845032, "rewards/margins": 2.8590025901794434, "rewards/rejected": -2.541757106781006, "step": 8270 }, { "epoch": 0.33, "learning_rate": 4.229097803861601e-06, "logits/chosen": -3.014268159866333, "logits/rejected": -3.0486488342285156, "logps/chosen": -0.24079516530036926, "logps/rejected": -289.5904235839844, "loss": 0.0566, "rewards/accuracies": 1.0, "rewards/chosen": 0.31582948565483093, "rewards/margins": 2.852527141571045, "rewards/rejected": -2.53669810295105, "step": 8280 }, { "epoch": 0.33, "learning_rate": 4.226575014623557e-06, "logits/chosen": -3.036571502685547, "logits/rejected": -3.0650463104248047, "logps/chosen": -3.6106762886047363, "logps/rejected": -282.4620361328125, "loss": 0.0929, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27843326330184937, "rewards/margins": 2.752340793609619, "rewards/rejected": -2.473907470703125, "step": 8290 }, { "epoch": 0.33, "learning_rate": 4.224048859339175e-06, "logits/chosen": -2.9999027252197266, "logits/rejected": -3.0318379402160645, "logps/chosen": -4.017712116241455, "logps/rejected": -286.9378662109375, "loss": 0.0743, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2749783396720886, "rewards/margins": 2.7945733070373535, "rewards/rejected": -2.519595146179199, "step": 8300 }, { "epoch": 0.33, "eval_logits/chosen": -3.069061756134033, "eval_logits/rejected": -3.0962021350860596, "eval_logps/chosen": -0.14297524094581604, "eval_logps/rejected": -282.95477294921875, "eval_loss": 0.0612737312912941, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3151697814464569, "eval_rewards/margins": 2.7845618724823, "eval_rewards/rejected": -2.4693920612335205, "eval_runtime": 2.5378, "eval_samples_per_second": 1.97, "eval_steps_per_second": 0.394, "step": 8300 }, { "epoch": 0.33, "learning_rate": 4.221519342933321e-06, "logits/chosen": -3.04472279548645, "logits/rejected": -3.074188232421875, "logps/chosen": -0.19305722415447235, "logps/rejected": -287.210205078125, "loss": 0.0582, "rewards/accuracies": 1.0, "rewards/chosen": 0.3148444592952728, "rewards/margins": 2.828742504119873, "rewards/rejected": -2.5138983726501465, "step": 8310 }, { "epoch": 0.33, "learning_rate": 4.218986470337419e-06, "logits/chosen": -3.0246658325195312, "logits/rejected": -3.0526344776153564, "logps/chosen": -2.3388516902923584, "logps/rejected": -289.8243713378906, "loss": 0.0622, "rewards/accuracies": 1.0, "rewards/chosen": 0.2936604619026184, "rewards/margins": 2.8356592655181885, "rewards/rejected": -2.541999101638794, "step": 8320 }, { "epoch": 0.33, "learning_rate": 4.216450246489432e-06, "logits/chosen": -3.0406782627105713, "logits/rejected": -3.0643138885498047, "logps/chosen": -22.12420654296875, "logps/rejected": -273.7965087890625, "loss": 0.2014, "rewards/accuracies": 0.9375, "rewards/chosen": 0.09603555500507355, "rewards/margins": 2.479140281677246, "rewards/rejected": -2.3831043243408203, "step": 8330 }, { "epoch": 0.33, "learning_rate": 4.2139106763338595e-06, "logits/chosen": -3.0343053340911865, "logits/rejected": -3.061241626739502, "logps/chosen": -6.272188186645508, "logps/rejected": -283.3048400878906, "loss": 0.1047, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2519668936729431, "rewards/margins": 2.7365949153900146, "rewards/rejected": -2.484628200531006, "step": 8340 }, { "epoch": 0.33, "learning_rate": 4.211367764821722e-06, "logits/chosen": -3.023876667022705, "logits/rejected": -3.052757501602173, "logps/chosen": -5.836031436920166, "logps/rejected": -279.3314514160156, "loss": 0.1098, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2584336996078491, "rewards/margins": 2.6948065757751465, "rewards/rejected": -2.436372995376587, "step": 8350 }, { "epoch": 0.33, "learning_rate": 4.208821516910557e-06, "logits/chosen": -3.0266098976135254, "logits/rejected": -3.0531959533691406, "logps/chosen": -3.8821704387664795, "logps/rejected": -286.99798583984375, "loss": 0.0918, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27804794907569885, "rewards/margins": 2.790902853012085, "rewards/rejected": -2.512855052947998, "step": 8360 }, { "epoch": 0.33, "learning_rate": 4.206271937564404e-06, "logits/chosen": -3.0540504455566406, "logits/rejected": -3.0814120769500732, "logps/chosen": -3.0819811820983887, "logps/rejected": -284.63787841796875, "loss": 0.0846, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28758636116981506, "rewards/margins": 2.7813191413879395, "rewards/rejected": -2.4937326908111572, "step": 8370 }, { "epoch": 0.34, "learning_rate": 4.2037190317538e-06, "logits/chosen": -3.043522357940674, "logits/rejected": -3.072047710418701, "logps/chosen": -0.8609404563903809, "logps/rejected": -284.7071228027344, "loss": 0.0608, "rewards/accuracies": 1.0, "rewards/chosen": 0.30952081084251404, "rewards/margins": 2.8023250102996826, "rewards/rejected": -2.4928042888641357, "step": 8380 }, { "epoch": 0.34, "learning_rate": 4.201162804455764e-06, "logits/chosen": -3.024015188217163, "logits/rejected": -3.053104877471924, "logps/chosen": -0.19978323578834534, "logps/rejected": -289.64373779296875, "loss": 0.0563, "rewards/accuracies": 1.0, "rewards/chosen": 0.31415772438049316, "rewards/margins": 2.856935501098633, "rewards/rejected": -2.5427772998809814, "step": 8390 }, { "epoch": 0.34, "learning_rate": 4.198603260653792e-06, "logits/chosen": -3.026977062225342, "logits/rejected": -3.0551483631134033, "logps/chosen": -3.2890167236328125, "logps/rejected": -283.4964904785156, "loss": 0.0892, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.284371554851532, "rewards/margins": 2.765049457550049, "rewards/rejected": -2.480678081512451, "step": 8400 }, { "epoch": 0.34, "eval_logits/chosen": -3.0686428546905518, "eval_logits/rejected": -3.0968594551086426, "eval_logps/chosen": -0.14062325656414032, "eval_logps/rejected": -282.71466064453125, "eval_loss": 0.06142845004796982, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3151932954788208, "eval_rewards/margins": 2.782183885574341, "eval_rewards/rejected": -2.4669904708862305, "eval_runtime": 2.5346, "eval_samples_per_second": 1.973, "eval_steps_per_second": 0.395, "step": 8400 }, { "epoch": 0.34, "learning_rate": 4.196040405337846e-06, "logits/chosen": -3.003312587738037, "logits/rejected": -3.0336151123046875, "logps/chosen": -4.4034318923950195, "logps/rejected": -283.4423522949219, "loss": 0.0759, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2697734236717224, "rewards/margins": 2.7557997703552246, "rewards/rejected": -2.4860262870788574, "step": 8410 }, { "epoch": 0.34, "learning_rate": 4.193474243504343e-06, "logits/chosen": -3.0250229835510254, "logits/rejected": -3.04999041557312, "logps/chosen": -9.058464050292969, "logps/rejected": -281.903564453125, "loss": 0.1289, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.22796078026294708, "rewards/margins": 2.690561294555664, "rewards/rejected": -2.4626004695892334, "step": 8420 }, { "epoch": 0.34, "learning_rate": 4.190904780156149e-06, "logits/chosen": -3.0260753631591797, "logits/rejected": -3.0558228492736816, "logps/chosen": -5.1435699462890625, "logps/rejected": -285.4373474121094, "loss": 0.0726, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.26135390996932983, "rewards/margins": 2.7719931602478027, "rewards/rejected": -2.510639190673828, "step": 8430 }, { "epoch": 0.34, "learning_rate": 4.188332020302561e-06, "logits/chosen": -3.0041818618774414, "logits/rejected": -3.0359199047088623, "logps/chosen": -7.516046047210693, "logps/rejected": -280.8125, "loss": 0.1296, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2426423579454422, "rewards/margins": 2.6963114738464355, "rewards/rejected": -2.4536690711975098, "step": 8440 }, { "epoch": 0.34, "learning_rate": 4.185755968959308e-06, "logits/chosen": -3.0169591903686523, "logits/rejected": -3.0513358116149902, "logps/chosen": -0.20896100997924805, "logps/rejected": -288.6285705566406, "loss": 0.057, "rewards/accuracies": 1.0, "rewards/chosen": 0.31383928656578064, "rewards/margins": 2.847358226776123, "rewards/rejected": -2.5335185527801514, "step": 8450 }, { "epoch": 0.34, "learning_rate": 4.1831766311485345e-06, "logits/chosen": -3.015139102935791, "logits/rejected": -3.046295642852783, "logps/chosen": -3.685558795928955, "logps/rejected": -282.3558654785156, "loss": 0.0968, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.278008371591568, "rewards/margins": 2.748293161392212, "rewards/rejected": -2.470284938812256, "step": 8460 }, { "epoch": 0.34, "learning_rate": 4.180594011898791e-06, "logits/chosen": -3.027128219604492, "logits/rejected": -3.06001615524292, "logps/chosen": -0.1608521044254303, "logps/rejected": -290.02215576171875, "loss": 0.0561, "rewards/accuracies": 1.0, "rewards/chosen": 0.317554235458374, "rewards/margins": 2.861114025115967, "rewards/rejected": -2.5435595512390137, "step": 8470 }, { "epoch": 0.34, "learning_rate": 4.178008116245024e-06, "logits/chosen": -3.0358595848083496, "logits/rejected": -3.0648727416992188, "logps/chosen": -2.7931177616119385, "logps/rejected": -286.7034912109375, "loss": 0.0824, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2896449565887451, "rewards/margins": 2.7998805046081543, "rewards/rejected": -2.5102360248565674, "step": 8480 }, { "epoch": 0.34, "learning_rate": 4.175418949228571e-06, "logits/chosen": -3.009207248687744, "logits/rejected": -3.041529893875122, "logps/chosen": -2.378652811050415, "logps/rejected": -286.110595703125, "loss": 0.078, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.29128125309944153, "rewards/margins": 2.798572301864624, "rewards/rejected": -2.507290840148926, "step": 8490 }, { "epoch": 0.34, "learning_rate": 4.172826515897146e-06, "logits/chosen": -3.0335817337036133, "logits/rejected": -3.06345534324646, "logps/chosen": -3.7546768188476562, "logps/rejected": -287.0608215332031, "loss": 0.091, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2775704562664032, "rewards/margins": 2.7967119216918945, "rewards/rejected": -2.519141435623169, "step": 8500 }, { "epoch": 0.34, "eval_logits/chosen": -3.0734243392944336, "eval_logits/rejected": -3.105238914489746, "eval_logps/chosen": -0.14898176491260529, "eval_logps/rejected": -281.4597473144531, "eval_loss": 0.06253378838300705, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3151097297668457, "eval_rewards/margins": 2.7695515155792236, "eval_rewards/rejected": -2.454441785812378, "eval_runtime": 2.5363, "eval_samples_per_second": 1.971, "eval_steps_per_second": 0.394, "step": 8500 }, { "epoch": 0.34, "learning_rate": 4.17023082130483e-06, "logits/chosen": -3.0244507789611816, "logits/rejected": -3.055126190185547, "logps/chosen": -1.6601946353912354, "logps/rejected": -286.95172119140625, "loss": 0.0692, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.3001658022403717, "rewards/margins": 2.8191635608673096, "rewards/rejected": -2.5189976692199707, "step": 8510 }, { "epoch": 0.34, "learning_rate": 4.167631870512061e-06, "logits/chosen": -3.034938335418701, "logits/rejected": -3.066685914993286, "logps/chosen": -3.8190503120422363, "logps/rejected": -288.28900146484375, "loss": 0.091, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2791939675807953, "rewards/margins": 2.807143449783325, "rewards/rejected": -2.527949094772339, "step": 8520 }, { "epoch": 0.34, "learning_rate": 4.16502966858563e-06, "logits/chosen": -3.0126419067382812, "logits/rejected": -3.042632579803467, "logps/chosen": -9.990955352783203, "logps/rejected": -279.9924621582031, "loss": 0.1536, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.21649110317230225, "rewards/margins": 2.6640236377716064, "rewards/rejected": -2.4475324153900146, "step": 8530 }, { "epoch": 0.34, "learning_rate": 4.162424220598659e-06, "logits/chosen": -3.0343945026397705, "logits/rejected": -3.066210985183716, "logps/chosen": -0.24550583958625793, "logps/rejected": -289.763427734375, "loss": 0.0562, "rewards/accuracies": 1.0, "rewards/chosen": 0.3130923807621002, "rewards/margins": 2.859348773956299, "rewards/rejected": -2.5462565422058105, "step": 8540 }, { "epoch": 0.34, "learning_rate": 4.159815531630604e-06, "logits/chosen": -3.039658308029175, "logits/rejected": -3.0698723793029785, "logps/chosen": -0.7834663987159729, "logps/rejected": -283.64154052734375, "loss": 0.072, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.30983856320381165, "rewards/margins": 2.7896831035614014, "rewards/rejected": -2.479844570159912, "step": 8550 }, { "epoch": 0.34, "learning_rate": 4.1572036067672386e-06, "logits/chosen": -3.0242760181427, "logits/rejected": -3.05612850189209, "logps/chosen": -3.819730281829834, "logps/rejected": -287.47808837890625, "loss": 0.0913, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27996426820755005, "rewards/margins": 2.8031201362609863, "rewards/rejected": -2.523155689239502, "step": 8560 }, { "epoch": 0.34, "learning_rate": 4.154588451100642e-06, "logits/chosen": -3.0207467079162598, "logits/rejected": -3.053049325942993, "logps/chosen": -0.23569254577159882, "logps/rejected": -286.39691162109375, "loss": 0.0595, "rewards/accuracies": 1.0, "rewards/chosen": 0.31670528650283813, "rewards/margins": 2.8243353366851807, "rewards/rejected": -2.5076301097869873, "step": 8570 }, { "epoch": 0.34, "learning_rate": 4.1519700697291945e-06, "logits/chosen": -3.005507230758667, "logits/rejected": -3.0352680683135986, "logps/chosen": -3.137134313583374, "logps/rejected": -287.4003601074219, "loss": 0.0806, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2861426770687103, "rewards/margins": 2.8055307865142822, "rewards/rejected": -2.51938796043396, "step": 8580 }, { "epoch": 0.34, "learning_rate": 4.149348467757566e-06, "logits/chosen": -3.0106050968170166, "logits/rejected": -3.0355255603790283, "logps/chosen": -9.393930435180664, "logps/rejected": -283.07635498046875, "loss": 0.1117, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.22419460117816925, "rewards/margins": 2.699465274810791, "rewards/rejected": -2.4752705097198486, "step": 8590 }, { "epoch": 0.34, "learning_rate": 4.146723650296701e-06, "logits/chosen": -3.002495050430298, "logits/rejected": -3.0315916538238525, "logps/chosen": -6.905428886413574, "logps/rejected": -281.4727478027344, "loss": 0.1245, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24747169017791748, "rewards/margins": 2.7074742317199707, "rewards/rejected": -2.4600024223327637, "step": 8600 }, { "epoch": 0.34, "eval_logits/chosen": -3.074193000793457, "eval_logits/rejected": -3.103393077850342, "eval_logps/chosen": -0.12731966376304626, "eval_logps/rejected": -281.7121276855469, "eval_loss": 0.061776746064424515, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31532636284828186, "eval_rewards/margins": 2.772292137145996, "eval_rewards/rejected": -2.456965684890747, "eval_runtime": 2.5365, "eval_samples_per_second": 1.971, "eval_steps_per_second": 0.394, "step": 8600 }, { "epoch": 0.34, "learning_rate": 4.1440956224638186e-06, "logits/chosen": -3.035569667816162, "logits/rejected": -3.0624325275421143, "logps/chosen": -4.372953414916992, "logps/rejected": -273.8067321777344, "loss": 0.1134, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27554672956466675, "rewards/margins": 2.6553232669830322, "rewards/rejected": -2.3797767162323, "step": 8610 }, { "epoch": 0.34, "learning_rate": 4.141464389382392e-06, "logits/chosen": -3.0568175315856934, "logits/rejected": -3.0849342346191406, "logps/chosen": -2.846418857574463, "logps/rejected": -282.7684326171875, "loss": 0.0882, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.29177147150039673, "rewards/margins": 2.754880428314209, "rewards/rejected": -2.463109016418457, "step": 8620 }, { "epoch": 0.35, "learning_rate": 4.138829956182144e-06, "logits/chosen": -2.9944357872009277, "logits/rejected": -3.0242042541503906, "logps/chosen": -4.046751022338867, "logps/rejected": -280.9881896972656, "loss": 0.1029, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.27593377232551575, "rewards/margins": 2.728095531463623, "rewards/rejected": -2.4521615505218506, "step": 8630 }, { "epoch": 0.35, "learning_rate": 4.136192327999037e-06, "logits/chosen": -3.0027449131011963, "logits/rejected": -3.03151798248291, "logps/chosen": -6.866029262542725, "logps/rejected": -274.35028076171875, "loss": 0.1307, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24514810740947723, "rewards/margins": 2.642991542816162, "rewards/rejected": -2.397843360900879, "step": 8640 }, { "epoch": 0.35, "learning_rate": 4.133551509975264e-06, "logits/chosen": -3.0157954692840576, "logits/rejected": -3.0458555221557617, "logps/chosen": -5.915412425994873, "logps/rejected": -281.86541748046875, "loss": 0.1149, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.25817734003067017, "rewards/margins": 2.7214605808258057, "rewards/rejected": -2.4632835388183594, "step": 8650 }, { "epoch": 0.35, "learning_rate": 4.130907507259233e-06, "logits/chosen": -3.033623456954956, "logits/rejected": -3.06449556350708, "logps/chosen": -0.19016405940055847, "logps/rejected": -286.12933349609375, "loss": 0.0618, "rewards/accuracies": 1.0, "rewards/chosen": 0.31424909830093384, "rewards/margins": 2.8213086128234863, "rewards/rejected": -2.5070595741271973, "step": 8660 }, { "epoch": 0.35, "learning_rate": 4.128260325005563e-06, "logits/chosen": -3.0328896045684814, "logits/rejected": -3.0647082328796387, "logps/chosen": -0.17532429099082947, "logps/rejected": -288.7752990722656, "loss": 0.0571, "rewards/accuracies": 1.0, "rewards/chosen": 0.3160879611968994, "rewards/margins": 2.8451919555664062, "rewards/rejected": -2.529104232788086, "step": 8670 }, { "epoch": 0.35, "learning_rate": 4.125609968375073e-06, "logits/chosen": -3.054759979248047, "logits/rejected": -3.084221124649048, "logps/chosen": -3.7043919563293457, "logps/rejected": -284.87774658203125, "loss": 0.0924, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28131163120269775, "rewards/margins": 2.7708840370178223, "rewards/rejected": -2.489572048187256, "step": 8680 }, { "epoch": 0.35, "learning_rate": 4.122956442534765e-06, "logits/chosen": -3.0096027851104736, "logits/rejected": -3.041719675064087, "logps/chosen": -3.8049349784851074, "logps/rejected": -284.8905029296875, "loss": 0.0934, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.278511106967926, "rewards/margins": 2.7715234756469727, "rewards/rejected": -2.4930121898651123, "step": 8690 }, { "epoch": 0.35, "learning_rate": 4.120299752657828e-06, "logits/chosen": -3.0293784141540527, "logits/rejected": -3.058102607727051, "logps/chosen": -6.5447211265563965, "logps/rejected": -281.2080078125, "loss": 0.1212, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.25127077102661133, "rewards/margins": 2.707335948944092, "rewards/rejected": -2.4560649394989014, "step": 8700 }, { "epoch": 0.35, "eval_logits/chosen": -3.0750112533569336, "eval_logits/rejected": -3.101567506790161, "eval_logps/chosen": -0.15089267492294312, "eval_logps/rejected": -283.2621154785156, "eval_loss": 0.06092270463705063, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3150905966758728, "eval_rewards/margins": 2.7875561714172363, "eval_rewards/rejected": -2.4724655151367188, "eval_runtime": 2.5388, "eval_samples_per_second": 1.969, "eval_steps_per_second": 0.394, "step": 8700 }, { "epoch": 0.35, "learning_rate": 4.117639903923611e-06, "logits/chosen": -3.0167343616485596, "logits/rejected": -3.044999837875366, "logps/chosen": -3.6057350635528564, "logps/rejected": -284.56634521484375, "loss": 0.092, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2801312804222107, "rewards/margins": 2.7702386379241943, "rewards/rejected": -2.490107297897339, "step": 8710 }, { "epoch": 0.35, "learning_rate": 4.114976901517628e-06, "logits/chosen": -3.005967617034912, "logits/rejected": -3.0326640605926514, "logps/chosen": -4.874532222747803, "logps/rejected": -277.74542236328125, "loss": 0.1136, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.268318772315979, "rewards/margins": 2.6889238357543945, "rewards/rejected": -2.420604705810547, "step": 8720 }, { "epoch": 0.35, "learning_rate": 4.1123107506315366e-06, "logits/chosen": -3.025261402130127, "logits/rejected": -3.0559487342834473, "logps/chosen": -0.4424453675746918, "logps/rejected": -284.0508728027344, "loss": 0.0654, "rewards/accuracies": 1.0, "rewards/chosen": 0.3130002021789551, "rewards/margins": 2.794659376144409, "rewards/rejected": -2.481658935546875, "step": 8730 }, { "epoch": 0.35, "learning_rate": 4.109641456463135e-06, "logits/chosen": -3.024473190307617, "logits/rejected": -3.051532506942749, "logps/chosen": -3.853530168533325, "logps/rejected": -286.0424499511719, "loss": 0.0926, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27820271253585815, "rewards/margins": 2.7857933044433594, "rewards/rejected": -2.5075907707214355, "step": 8740 }, { "epoch": 0.35, "learning_rate": 4.106969024216348e-06, "logits/chosen": -3.0160326957702637, "logits/rejected": -3.045384407043457, "logps/chosen": -3.0020909309387207, "logps/rejected": -278.9119567871094, "loss": 0.0906, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2870939373970032, "rewards/margins": 2.7199018001556396, "rewards/rejected": -2.4328079223632812, "step": 8750 }, { "epoch": 0.35, "learning_rate": 4.104293459101222e-06, "logits/chosen": -3.0087802410125732, "logits/rejected": -3.040501356124878, "logps/chosen": -4.0029802322387695, "logps/rejected": -285.57757568359375, "loss": 0.0932, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27606648206710815, "rewards/margins": 2.781075954437256, "rewards/rejected": -2.505009412765503, "step": 8760 }, { "epoch": 0.35, "learning_rate": 4.101614766333904e-06, "logits/chosen": -3.008749008178711, "logits/rejected": -3.0408074855804443, "logps/chosen": -3.5635809898376465, "logps/rejected": -286.2124938964844, "loss": 0.0899, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28111711144447327, "rewards/margins": 2.789407730102539, "rewards/rejected": -2.5082907676696777, "step": 8770 }, { "epoch": 0.35, "learning_rate": 4.0989329511366455e-06, "logits/chosen": -3.012439250946045, "logits/rejected": -3.042617082595825, "logps/chosen": -2.9187636375427246, "logps/rejected": -285.34454345703125, "loss": 0.0836, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2867857813835144, "rewards/margins": 2.786262273788452, "rewards/rejected": -2.499476432800293, "step": 8780 }, { "epoch": 0.35, "learning_rate": 4.096248018737781e-06, "logits/chosen": -3.0257749557495117, "logits/rejected": -3.052208423614502, "logps/chosen": -3.916616439819336, "logps/rejected": -285.1211242675781, "loss": 0.0964, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27588510513305664, "rewards/margins": 2.7796428203582764, "rewards/rejected": -2.503757953643799, "step": 8790 }, { "epoch": 0.35, "learning_rate": 4.093559974371725e-06, "logits/chosen": -3.0539979934692383, "logits/rejected": -3.084097385406494, "logps/chosen": -2.341153621673584, "logps/rejected": -288.9178161621094, "loss": 0.062, "rewards/accuracies": 1.0, "rewards/chosen": 0.2915419638156891, "rewards/margins": 2.8271737098693848, "rewards/rejected": -2.5356318950653076, "step": 8800 }, { "epoch": 0.35, "eval_logits/chosen": -3.0740058422088623, "eval_logits/rejected": -3.102139949798584, "eval_logps/chosen": -0.1799881011247635, "eval_logps/rejected": -283.4043884277344, "eval_loss": 0.060875050723552704, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31479963660240173, "eval_rewards/margins": 2.7886874675750732, "eval_rewards/rejected": -2.4738879203796387, "eval_runtime": 2.5358, "eval_samples_per_second": 1.972, "eval_steps_per_second": 0.394, "step": 8800 }, { "epoch": 0.35, "learning_rate": 4.090868823278956e-06, "logits/chosen": -3.0180397033691406, "logits/rejected": -3.0471231937408447, "logps/chosen": -2.37870192527771, "logps/rejected": -287.43829345703125, "loss": 0.0761, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.29468420147895813, "rewards/margins": 2.818254232406616, "rewards/rejected": -2.5235702991485596, "step": 8810 }, { "epoch": 0.35, "learning_rate": 4.088174570706011e-06, "logits/chosen": -3.030181407928467, "logits/rejected": -3.0611469745635986, "logps/chosen": -2.105882406234741, "logps/rejected": -287.0720520019531, "loss": 0.0633, "rewards/accuracies": 1.0, "rewards/chosen": 0.29396456480026245, "rewards/margins": 2.813629627227783, "rewards/rejected": -2.519665002822876, "step": 8820 }, { "epoch": 0.35, "learning_rate": 4.0854772219054735e-06, "logits/chosen": -3.0283451080322266, "logits/rejected": -3.060281276702881, "logps/chosen": -3.433452606201172, "logps/rejected": -285.8135986328125, "loss": 0.0891, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2815479636192322, "rewards/margins": 2.786006450653076, "rewards/rejected": -2.504458427429199, "step": 8830 }, { "epoch": 0.35, "learning_rate": 4.082776782135964e-06, "logits/chosen": -3.0328171253204346, "logits/rejected": -3.062441825866699, "logps/chosen": -5.375236988067627, "logps/rejected": -287.2413024902344, "loss": 0.0911, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2628691792488098, "rewards/margins": 2.78450083732605, "rewards/rejected": -2.521632194519043, "step": 8840 }, { "epoch": 0.35, "learning_rate": 4.080073256662128e-06, "logits/chosen": -3.027712345123291, "logits/rejected": -3.0587103366851807, "logps/chosen": -9.363143920898438, "logps/rejected": -283.9390563964844, "loss": 0.1292, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.22450461983680725, "rewards/margins": 2.709766387939453, "rewards/rejected": -2.4852616786956787, "step": 8850 }, { "epoch": 0.35, "learning_rate": 4.077366650754624e-06, "logits/chosen": -3.0322697162628174, "logits/rejected": -3.061861515045166, "logps/chosen": -7.040236473083496, "logps/rejected": -283.0162658691406, "loss": 0.1239, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2504313588142395, "rewards/margins": 2.7251181602478027, "rewards/rejected": -2.474686861038208, "step": 8860 }, { "epoch": 0.35, "learning_rate": 4.074656969690122e-06, "logits/chosen": -3.0212206840515137, "logits/rejected": -3.0510997772216797, "logps/chosen": -11.200246810913086, "logps/rejected": -277.72308349609375, "loss": 0.1583, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.20381513237953186, "rewards/margins": 2.6280155181884766, "rewards/rejected": -2.4242005348205566, "step": 8870 }, { "epoch": 0.36, "learning_rate": 4.071944218751283e-06, "logits/chosen": -3.0100715160369873, "logits/rejected": -3.04150390625, "logps/chosen": -0.2615768015384674, "logps/rejected": -289.7945861816406, "loss": 0.0564, "rewards/accuracies": 1.0, "rewards/chosen": 0.3132995367050171, "rewards/margins": 2.8558740615844727, "rewards/rejected": -2.542574405670166, "step": 8880 }, { "epoch": 0.36, "learning_rate": 4.069228403226751e-06, "logits/chosen": -3.0136189460754395, "logits/rejected": -3.0465381145477295, "logps/chosen": -0.21107903122901917, "logps/rejected": -291.146484375, "loss": 0.0553, "rewards/accuracies": 1.0, "rewards/chosen": 0.3163599669933319, "rewards/margins": 2.8714022636413574, "rewards/rejected": -2.555042266845703, "step": 8890 }, { "epoch": 0.36, "learning_rate": 4.066509528411151e-06, "logits/chosen": -3.0196681022644043, "logits/rejected": -3.0501480102539062, "logps/chosen": -9.33665657043457, "logps/rejected": -280.4502258300781, "loss": 0.1407, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.22271688282489777, "rewards/margins": 2.6784987449645996, "rewards/rejected": -2.455782413482666, "step": 8900 }, { "epoch": 0.36, "eval_logits/chosen": -3.072763681411743, "eval_logits/rejected": -3.1023120880126953, "eval_logps/chosen": -0.1549394428730011, "eval_logps/rejected": -283.366943359375, "eval_loss": 0.06090143322944641, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3150501549243927, "eval_rewards/margins": 2.7885639667510986, "eval_rewards/rejected": -2.473513603210449, "eval_runtime": 2.5372, "eval_samples_per_second": 1.971, "eval_steps_per_second": 0.394, "step": 8900 }, { "epoch": 0.36, "learning_rate": 4.063787599605068e-06, "logits/chosen": -3.0200061798095703, "logits/rejected": -3.048372745513916, "logps/chosen": -2.2564361095428467, "logps/rejected": -289.78277587890625, "loss": 0.0603, "rewards/accuracies": 1.0, "rewards/chosen": 0.2928429841995239, "rewards/margins": 2.841250419616699, "rewards/rejected": -2.5484073162078857, "step": 8910 }, { "epoch": 0.36, "learning_rate": 4.06106262211504e-06, "logits/chosen": -3.0366530418395996, "logits/rejected": -3.068213939666748, "logps/chosen": -1.6136109828948975, "logps/rejected": -287.15386962890625, "loss": 0.0675, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.30161404609680176, "rewards/margins": 2.8143563270568848, "rewards/rejected": -2.512742280960083, "step": 8920 }, { "epoch": 0.36, "learning_rate": 4.058334601253551e-06, "logits/chosen": -3.003812789916992, "logits/rejected": -3.0342764854431152, "logps/chosen": -7.081292629241943, "logps/rejected": -281.1756896972656, "loss": 0.1221, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.245984748005867, "rewards/margins": 2.7055282592773438, "rewards/rejected": -2.4595437049865723, "step": 8930 }, { "epoch": 0.36, "learning_rate": 4.055603542339017e-06, "logits/chosen": -3.037954807281494, "logits/rejected": -3.067399024963379, "logps/chosen": -3.7737839221954346, "logps/rejected": -286.78106689453125, "loss": 0.0909, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2789372205734253, "rewards/margins": 2.7960543632507324, "rewards/rejected": -2.5171172618865967, "step": 8940 }, { "epoch": 0.36, "learning_rate": 4.052869450695776e-06, "logits/chosen": -3.0153093338012695, "logits/rejected": -3.0436344146728516, "logps/chosen": -0.2503168284893036, "logps/rejected": -291.42327880859375, "loss": 0.0553, "rewards/accuracies": 1.0, "rewards/chosen": 0.315650999546051, "rewards/margins": 2.8717586994171143, "rewards/rejected": -2.556107759475708, "step": 8950 }, { "epoch": 0.36, "learning_rate": 4.050132331654082e-06, "logits/chosen": -3.0223050117492676, "logits/rejected": -3.0512633323669434, "logps/chosen": -3.936835527420044, "logps/rejected": -284.13018798828125, "loss": 0.0938, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27684348821640015, "rewards/margins": 2.7651312351226807, "rewards/rejected": -2.4882874488830566, "step": 8960 }, { "epoch": 0.36, "learning_rate": 4.047392190550087e-06, "logits/chosen": -3.0041027069091797, "logits/rejected": -3.034830331802368, "logps/chosen": -5.3209123611450195, "logps/rejected": -281.2002868652344, "loss": 0.0979, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.26263895630836487, "rewards/margins": 2.722567081451416, "rewards/rejected": -2.459928512573242, "step": 8970 }, { "epoch": 0.36, "learning_rate": 4.044649032725836e-06, "logits/chosen": -3.010528087615967, "logits/rejected": -3.0405995845794678, "logps/chosen": -3.8588359355926514, "logps/rejected": -285.48846435546875, "loss": 0.0929, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27792760729789734, "rewards/margins": 2.7791383266448975, "rewards/rejected": -2.5012106895446777, "step": 8980 }, { "epoch": 0.36, "learning_rate": 4.041902863529257e-06, "logits/chosen": -3.018022060394287, "logits/rejected": -3.050459861755371, "logps/chosen": -0.24496333301067352, "logps/rejected": -288.51611328125, "loss": 0.0568, "rewards/accuracies": 1.0, "rewards/chosen": 0.3107416033744812, "rewards/margins": 2.850687026977539, "rewards/rejected": -2.539945602416992, "step": 8990 }, { "epoch": 0.36, "learning_rate": 4.039153688314146e-06, "logits/chosen": -3.0306644439697266, "logits/rejected": -3.0594818592071533, "logps/chosen": -2.067131757736206, "logps/rejected": -290.1772155761719, "loss": 0.0594, "rewards/accuracies": 1.0, "rewards/chosen": 0.2971288859844208, "rewards/margins": 2.841440200805664, "rewards/rejected": -2.544311285018921, "step": 9000 }, { "epoch": 0.36, "eval_logits/chosen": -3.0702552795410156, "eval_logits/rejected": -3.1002700328826904, "eval_logps/chosen": -0.15854749083518982, "eval_logps/rejected": -283.95770263671875, "eval_loss": 0.06045776605606079, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3150140345096588, "eval_rewards/margins": 2.7944350242614746, "eval_rewards/rejected": -2.4794211387634277, "eval_runtime": 2.5389, "eval_samples_per_second": 1.969, "eval_steps_per_second": 0.394, "step": 9000 }, { "epoch": 0.36, "learning_rate": 4.036401512440161e-06, "logits/chosen": -3.0123820304870605, "logits/rejected": -3.044328212738037, "logps/chosen": -3.2765164375305176, "logps/rejected": -285.1115417480469, "loss": 0.0874, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28319182991981506, "rewards/margins": 2.782421588897705, "rewards/rejected": -2.4992294311523438, "step": 9010 }, { "epoch": 0.36, "learning_rate": 4.033646341272811e-06, "logits/chosen": -3.0244317054748535, "logits/rejected": -3.0560832023620605, "logps/chosen": -1.4219744205474854, "logps/rejected": -290.03692626953125, "loss": 0.0587, "rewards/accuracies": 1.0, "rewards/chosen": 0.30339470505714417, "rewards/margins": 2.8491501808166504, "rewards/rejected": -2.54575514793396, "step": 9020 }, { "epoch": 0.36, "learning_rate": 4.030888180183442e-06, "logits/chosen": -3.0249876976013184, "logits/rejected": -3.057555675506592, "logps/chosen": -0.2186092883348465, "logps/rejected": -289.8997802734375, "loss": 0.0562, "rewards/accuracies": 1.0, "rewards/chosen": 0.3160155415534973, "rewards/margins": 2.8570868968963623, "rewards/rejected": -2.5410714149475098, "step": 9030 }, { "epoch": 0.36, "learning_rate": 4.02812703454923e-06, "logits/chosen": -3.025486469268799, "logits/rejected": -3.0586891174316406, "logps/chosen": -4.600053310394287, "logps/rejected": -281.52374267578125, "loss": 0.102, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.27088290452957153, "rewards/margins": 2.733848810195923, "rewards/rejected": -2.462965726852417, "step": 9040 }, { "epoch": 0.36, "learning_rate": 4.02536290975317e-06, "logits/chosen": -3.0071072578430176, "logits/rejected": -3.039186477661133, "logps/chosen": -0.35776105523109436, "logps/rejected": -288.5356140136719, "loss": 0.0599, "rewards/accuracies": 1.0, "rewards/chosen": 0.31157225370407104, "rewards/margins": 2.8443799018859863, "rewards/rejected": -2.5328075885772705, "step": 9050 }, { "epoch": 0.36, "learning_rate": 4.022595811184064e-06, "logits/chosen": -3.0443942546844482, "logits/rejected": -3.0747833251953125, "logps/chosen": -0.2996397316455841, "logps/rejected": -290.9875793457031, "loss": 0.0554, "rewards/accuracies": 1.0, "rewards/chosen": 0.31302300095558167, "rewards/margins": 2.8734683990478516, "rewards/rejected": -2.5604453086853027, "step": 9060 }, { "epoch": 0.36, "learning_rate": 4.019825744236514e-06, "logits/chosen": -3.0210883617401123, "logits/rejected": -3.053675413131714, "logps/chosen": -3.1654045581817627, "logps/rejected": -283.96356201171875, "loss": 0.0783, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.286098837852478, "rewards/margins": 2.7694783210754395, "rewards/rejected": -2.483379602432251, "step": 9070 }, { "epoch": 0.36, "learning_rate": 4.017052714310906e-06, "logits/chosen": -3.0339643955230713, "logits/rejected": -3.06463360786438, "logps/chosen": -1.3188668489456177, "logps/rejected": -289.95953369140625, "loss": 0.0578, "rewards/accuracies": 1.0, "rewards/chosen": 0.30397334694862366, "rewards/margins": 2.8462367057800293, "rewards/rejected": -2.5422632694244385, "step": 9080 }, { "epoch": 0.36, "learning_rate": 4.014276726813404e-06, "logits/chosen": -3.0092575550079346, "logits/rejected": -3.037255048751831, "logps/chosen": -4.7793169021606445, "logps/rejected": -283.03192138671875, "loss": 0.0933, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2717859745025635, "rewards/margins": 2.741720199584961, "rewards/rejected": -2.4699342250823975, "step": 9090 }, { "epoch": 0.36, "learning_rate": 4.011497787155938e-06, "logits/chosen": -3.021306276321411, "logits/rejected": -3.051379680633545, "logps/chosen": -0.4507691264152527, "logps/rejected": -286.0801696777344, "loss": 0.065, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.3134113848209381, "rewards/margins": 2.816711902618408, "rewards/rejected": -2.503300189971924, "step": 9100 }, { "epoch": 0.36, "eval_logits/chosen": -3.0733325481414795, "eval_logits/rejected": -3.1034600734710693, "eval_logps/chosen": -0.1291310340166092, "eval_logps/rejected": -284.04071044921875, "eval_loss": 0.06040976569056511, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31530818343162537, "eval_rewards/margins": 2.7955596446990967, "eval_rewards/rejected": -2.4802517890930176, "eval_runtime": 2.5326, "eval_samples_per_second": 1.974, "eval_steps_per_second": 0.395, "step": 9100 }, { "epoch": 0.36, "learning_rate": 4.008715900756192e-06, "logits/chosen": -3.0409793853759766, "logits/rejected": -3.0701375007629395, "logps/chosen": -3.8987255096435547, "logps/rejected": -288.0821228027344, "loss": 0.0917, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27975931763648987, "rewards/margins": 2.8029918670654297, "rewards/rejected": -2.523232936859131, "step": 9110 }, { "epoch": 0.36, "learning_rate": 4.005931073037597e-06, "logits/chosen": -3.0415525436401367, "logits/rejected": -3.0703537464141846, "logps/chosen": -0.22415503859519958, "logps/rejected": -290.8861083984375, "loss": 0.0557, "rewards/accuracies": 1.0, "rewards/chosen": 0.3174096643924713, "rewards/margins": 2.8657264709472656, "rewards/rejected": -2.548316717147827, "step": 9120 }, { "epoch": 0.37, "learning_rate": 4.003143309429317e-06, "logits/chosen": -3.027820587158203, "logits/rejected": -3.0581212043762207, "logps/chosen": -3.68280029296875, "logps/rejected": -285.29302978515625, "loss": 0.0917, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.278579980134964, "rewards/margins": 2.780479907989502, "rewards/rejected": -2.5019004344940186, "step": 9130 }, { "epoch": 0.37, "learning_rate": 4.000352615366239e-06, "logits/chosen": -3.0407252311706543, "logits/rejected": -3.071312189102173, "logps/chosen": -5.659231185913086, "logps/rejected": -282.3970031738281, "loss": 0.1119, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2612738013267517, "rewards/margins": 2.729611873626709, "rewards/rejected": -2.4683384895324707, "step": 9140 }, { "epoch": 0.37, "learning_rate": 3.997558996288965e-06, "logits/chosen": -3.0143682956695557, "logits/rejected": -3.0464630126953125, "logps/chosen": -3.162733554840088, "logps/rejected": -287.09271240234375, "loss": 0.0855, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28733623027801514, "rewards/margins": 2.8022000789642334, "rewards/rejected": -2.5148637294769287, "step": 9150 }, { "epoch": 0.37, "learning_rate": 3.9947624576437975e-06, "logits/chosen": -3.0171642303466797, "logits/rejected": -3.047095537185669, "logps/chosen": -3.38093900680542, "logps/rejected": -283.604736328125, "loss": 0.0915, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28531140089035034, "rewards/margins": 2.7667860984802246, "rewards/rejected": -2.4814748764038086, "step": 9160 }, { "epoch": 0.37, "learning_rate": 3.991963004882732e-06, "logits/chosen": -3.0509161949157715, "logits/rejected": -3.0816895961761475, "logps/chosen": -0.3160349428653717, "logps/rejected": -288.79107666015625, "loss": 0.0569, "rewards/accuracies": 1.0, "rewards/chosen": 0.31164485216140747, "rewards/margins": 2.8497283458709717, "rewards/rejected": -2.53808331489563, "step": 9170 }, { "epoch": 0.37, "learning_rate": 3.989160643463446e-06, "logits/chosen": -3.000516891479492, "logits/rejected": -3.0329086780548096, "logps/chosen": -0.25152871012687683, "logps/rejected": -290.1000061035156, "loss": 0.0563, "rewards/accuracies": 1.0, "rewards/chosen": 0.3135625720024109, "rewards/margins": 2.8583266735076904, "rewards/rejected": -2.5447640419006348, "step": 9180 }, { "epoch": 0.37, "learning_rate": 3.986355378849284e-06, "logits/chosen": -3.027947187423706, "logits/rejected": -3.0566115379333496, "logps/chosen": -3.7503440380096436, "logps/rejected": -288.0802307128906, "loss": 0.0903, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28228679299354553, "rewards/margins": 2.806100368499756, "rewards/rejected": -2.5238137245178223, "step": 9190 }, { "epoch": 0.37, "learning_rate": 3.983547216509254e-06, "logits/chosen": -3.0277178287506104, "logits/rejected": -3.0589797496795654, "logps/chosen": -3.7195651531219482, "logps/rejected": -283.25518798828125, "loss": 0.0942, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28250402212142944, "rewards/margins": 2.756988525390625, "rewards/rejected": -2.474484443664551, "step": 9200 }, { "epoch": 0.37, "eval_logits/chosen": -3.075105905532837, "eval_logits/rejected": -3.1043105125427246, "eval_logps/chosen": -0.14469298720359802, "eval_logps/rejected": -283.8191223144531, "eval_loss": 0.06051688268780708, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3151525855064392, "eval_rewards/margins": 2.7931880950927734, "eval_rewards/rejected": -2.4780354499816895, "eval_runtime": 2.5381, "eval_samples_per_second": 1.97, "eval_steps_per_second": 0.394, "step": 9200 }, { "epoch": 0.37, "learning_rate": 3.980736161918013e-06, "logits/chosen": -3.0197224617004395, "logits/rejected": -3.0496859550476074, "logps/chosen": -0.25036340951919556, "logps/rejected": -290.3255310058594, "loss": 0.0558, "rewards/accuracies": 1.0, "rewards/chosen": 0.31573542952537537, "rewards/margins": 2.8647637367248535, "rewards/rejected": -2.549028158187866, "step": 9210 }, { "epoch": 0.37, "learning_rate": 3.977922220555855e-06, "logits/chosen": -3.0480878353118896, "logits/rejected": -3.0759596824645996, "logps/chosen": -0.26382502913475037, "logps/rejected": -288.5233154296875, "loss": 0.0573, "rewards/accuracies": 1.0, "rewards/chosen": 0.3148043751716614, "rewards/margins": 2.8438637256622314, "rewards/rejected": -2.5290591716766357, "step": 9220 }, { "epoch": 0.37, "learning_rate": 3.975105397908703e-06, "logits/chosen": -3.0239810943603516, "logits/rejected": -3.053370952606201, "logps/chosen": -0.2275981605052948, "logps/rejected": -287.86602783203125, "loss": 0.0577, "rewards/accuracies": 1.0, "rewards/chosen": 0.3158736526966095, "rewards/margins": 2.8367772102355957, "rewards/rejected": -2.5209033489227295, "step": 9230 }, { "epoch": 0.37, "learning_rate": 3.972285699468097e-06, "logits/chosen": -3.011216640472412, "logits/rejected": -3.043971061706543, "logps/chosen": -0.23796196281909943, "logps/rejected": -291.56298828125, "loss": 0.0551, "rewards/accuracies": 1.0, "rewards/chosen": 0.3136194348335266, "rewards/margins": 2.8755526542663574, "rewards/rejected": -2.5619332790374756, "step": 9240 }, { "epoch": 0.37, "learning_rate": 3.969463130731183e-06, "logits/chosen": -3.0087056159973145, "logits/rejected": -3.038595199584961, "logps/chosen": -5.572820663452148, "logps/rejected": -283.7800598144531, "loss": 0.1076, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.26063448190689087, "rewards/margins": 2.744101047515869, "rewards/rejected": -2.483466863632202, "step": 9250 }, { "epoch": 0.37, "learning_rate": 3.966637697200704e-06, "logits/chosen": -3.0549023151397705, "logits/rejected": -3.0826375484466553, "logps/chosen": -3.946232557296753, "logps/rejected": -284.52752685546875, "loss": 0.0928, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2783627510070801, "rewards/margins": 2.7743422985076904, "rewards/rejected": -2.4959795475006104, "step": 9260 }, { "epoch": 0.37, "learning_rate": 3.963809404384986e-06, "logits/chosen": -3.005295753479004, "logits/rejected": -3.0382018089294434, "logps/chosen": -0.5083450078964233, "logps/rejected": -287.57843017578125, "loss": 0.0598, "rewards/accuracies": 1.0, "rewards/chosen": 0.30989205837249756, "rewards/margins": 2.834585189819336, "rewards/rejected": -2.524693012237549, "step": 9270 }, { "epoch": 0.37, "learning_rate": 3.9609782577979305e-06, "logits/chosen": -3.0146427154541016, "logits/rejected": -3.044870615005493, "logps/chosen": -0.23693056404590607, "logps/rejected": -290.50677490234375, "loss": 0.0558, "rewards/accuracies": 1.0, "rewards/chosen": 0.3133693039417267, "rewards/margins": 2.865630865097046, "rewards/rejected": -2.5522613525390625, "step": 9280 }, { "epoch": 0.37, "learning_rate": 3.958144262959004e-06, "logits/chosen": -3.0142149925231934, "logits/rejected": -3.045680522918701, "logps/chosen": -0.4789581298828125, "logps/rejected": -288.4486389160156, "loss": 0.0577, "rewards/accuracies": 1.0, "rewards/chosen": 0.3120472729206085, "rewards/margins": 2.843381643295288, "rewards/rejected": -2.531334400177002, "step": 9290 }, { "epoch": 0.37, "learning_rate": 3.955307425393224e-06, "logits/chosen": -3.0034890174865723, "logits/rejected": -3.036752700805664, "logps/chosen": -3.4521400928497314, "logps/rejected": -285.7259826660156, "loss": 0.0863, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2838490605354309, "rewards/margins": 2.7842283248901367, "rewards/rejected": -2.5003793239593506, "step": 9300 }, { "epoch": 0.37, "eval_logits/chosen": -3.071390151977539, "eval_logits/rejected": -3.101571559906006, "eval_logps/chosen": -0.14110131561756134, "eval_logps/rejected": -283.5404357910156, "eval_loss": 0.06079016998410225, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3151884973049164, "eval_rewards/margins": 2.7904369831085205, "eval_rewards/rejected": -2.4752485752105713, "eval_runtime": 2.5301, "eval_samples_per_second": 1.976, "eval_steps_per_second": 0.395, "step": 9300 }, { "epoch": 0.37, "learning_rate": 3.9524677506311505e-06, "logits/chosen": -3.0512688159942627, "logits/rejected": -3.0806357860565186, "logps/chosen": -3.8408589363098145, "logps/rejected": -287.3253173828125, "loss": 0.0916, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27803999185562134, "rewards/margins": 2.796814441680908, "rewards/rejected": -2.5187742710113525, "step": 9310 }, { "epoch": 0.37, "learning_rate": 3.949625244208873e-06, "logits/chosen": -3.0251386165618896, "logits/rejected": -3.057833194732666, "logps/chosen": -3.879056215286255, "logps/rejected": -288.7135314941406, "loss": 0.0907, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2807185649871826, "rewards/margins": 2.8130502700805664, "rewards/rejected": -2.532331943511963, "step": 9320 }, { "epoch": 0.37, "learning_rate": 3.946779911668006e-06, "logits/chosen": -3.0434703826904297, "logits/rejected": -3.0767745971679688, "logps/chosen": -0.2448156774044037, "logps/rejected": -289.400390625, "loss": 0.0575, "rewards/accuracies": 1.0, "rewards/chosen": 0.3158368468284607, "rewards/margins": 2.8508100509643555, "rewards/rejected": -2.534972906112671, "step": 9330 }, { "epoch": 0.37, "learning_rate": 3.943931758555669e-06, "logits/chosen": -3.0118002891540527, "logits/rejected": -3.044593334197998, "logps/chosen": -3.5196170806884766, "logps/rejected": -286.48687744140625, "loss": 0.0891, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2785336673259735, "rewards/margins": 2.7964463233947754, "rewards/rejected": -2.5179126262664795, "step": 9340 }, { "epoch": 0.37, "learning_rate": 3.941080790424483e-06, "logits/chosen": -3.016038417816162, "logits/rejected": -3.0480971336364746, "logps/chosen": -0.23875954747200012, "logps/rejected": -291.549072265625, "loss": 0.0553, "rewards/accuracies": 1.0, "rewards/chosen": 0.3155813217163086, "rewards/margins": 2.8733644485473633, "rewards/rejected": -2.5577831268310547, "step": 9350 }, { "epoch": 0.37, "learning_rate": 3.938227012832557e-06, "logits/chosen": -3.013118267059326, "logits/rejected": -3.0453178882598877, "logps/chosen": -3.0232434272766113, "logps/rejected": -287.3465270996094, "loss": 0.0786, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28672534227371216, "rewards/margins": 2.8070271015167236, "rewards/rejected": -2.520301342010498, "step": 9360 }, { "epoch": 0.37, "learning_rate": 3.935370431343475e-06, "logits/chosen": -3.0455799102783203, "logits/rejected": -3.0728869438171387, "logps/chosen": -2.892730236053467, "logps/rejected": -288.11224365234375, "loss": 0.0817, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2887338697910309, "rewards/margins": 2.812598466873169, "rewards/rejected": -2.523864269256592, "step": 9370 }, { "epoch": 0.38, "learning_rate": 3.932511051526289e-06, "logits/chosen": -3.0419929027557373, "logits/rejected": -3.073577642440796, "logps/chosen": -1.1974107027053833, "logps/rejected": -286.02178955078125, "loss": 0.0679, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.30409783124923706, "rewards/margins": 2.812343120574951, "rewards/rejected": -2.5082454681396484, "step": 9380 }, { "epoch": 0.38, "learning_rate": 3.929648878955507e-06, "logits/chosen": -3.0316154956817627, "logits/rejected": -3.0626723766326904, "logps/chosen": -3.8625781536102295, "logps/rejected": -288.135498046875, "loss": 0.0915, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27924492955207825, "rewards/margins": 2.802497625350952, "rewards/rejected": -2.5232529640197754, "step": 9390 }, { "epoch": 0.38, "learning_rate": 3.92678391921108e-06, "logits/chosen": -3.009316921234131, "logits/rejected": -3.04038143157959, "logps/chosen": -3.2190327644348145, "logps/rejected": -289.1029357910156, "loss": 0.084, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28173762559890747, "rewards/margins": 2.8257923126220703, "rewards/rejected": -2.5440547466278076, "step": 9400 }, { "epoch": 0.38, "eval_logits/chosen": -3.0725579261779785, "eval_logits/rejected": -3.100677013397217, "eval_logps/chosen": -0.1348893940448761, "eval_logps/rejected": -283.83428955078125, "eval_loss": 0.06058691069483757, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3152506351470947, "eval_rewards/margins": 2.7934377193450928, "eval_rewards/rejected": -2.478187084197998, "eval_runtime": 2.5357, "eval_samples_per_second": 1.972, "eval_steps_per_second": 0.394, "step": 9400 }, { "epoch": 0.38, "learning_rate": 3.923916177878394e-06, "logits/chosen": -3.011488199234009, "logits/rejected": -3.041553497314453, "logps/chosen": -6.1088666915893555, "logps/rejected": -283.27984619140625, "loss": 0.1039, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.25513511896133423, "rewards/margins": 2.7360153198242188, "rewards/rejected": -2.48088002204895, "step": 9410 }, { "epoch": 0.38, "learning_rate": 3.921045660548258e-06, "logits/chosen": -3.0007545948028564, "logits/rejected": -3.0328445434570312, "logps/chosen": -3.1441314220428467, "logps/rejected": -288.3407287597656, "loss": 0.0834, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28607338666915894, "rewards/margins": 2.814926862716675, "rewards/rejected": -2.528853178024292, "step": 9420 }, { "epoch": 0.38, "learning_rate": 3.918172372816892e-06, "logits/chosen": -3.028923749923706, "logits/rejected": -3.0602505207061768, "logps/chosen": -6.645760536193848, "logps/rejected": -286.02264404296875, "loss": 0.1035, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.25030502676963806, "rewards/margins": 2.753694534301758, "rewards/rejected": -2.503389596939087, "step": 9430 }, { "epoch": 0.38, "learning_rate": 3.915296320285917e-06, "logits/chosen": -3.0360124111175537, "logits/rejected": -3.066115379333496, "logps/chosen": -0.27737969160079956, "logps/rejected": -290.28314208984375, "loss": 0.0562, "rewards/accuracies": 1.0, "rewards/chosen": 0.31571778655052185, "rewards/margins": 2.8624064922332764, "rewards/rejected": -2.5466887950897217, "step": 9440 }, { "epoch": 0.38, "learning_rate": 3.912417508562345e-06, "logits/chosen": -3.0418543815612793, "logits/rejected": -3.069399833679199, "logps/chosen": -0.289710134267807, "logps/rejected": -290.09954833984375, "loss": 0.0564, "rewards/accuracies": 1.0, "rewards/chosen": 0.3141762912273407, "rewards/margins": 2.859255075454712, "rewards/rejected": -2.545078754425049, "step": 9450 }, { "epoch": 0.38, "learning_rate": 3.909535943258567e-06, "logits/chosen": -3.031963348388672, "logits/rejected": -3.0657339096069336, "logps/chosen": -0.220443457365036, "logps/rejected": -293.03936767578125, "loss": 0.054, "rewards/accuracies": 1.0, "rewards/chosen": 0.316396564245224, "rewards/margins": 2.89386248588562, "rewards/rejected": -2.5774660110473633, "step": 9460 }, { "epoch": 0.38, "learning_rate": 3.906651629992342e-06, "logits/chosen": -3.0285208225250244, "logits/rejected": -3.0577635765075684, "logps/chosen": -3.8675167560577393, "logps/rejected": -287.36456298828125, "loss": 0.0918, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27920758724212646, "rewards/margins": 2.7986207008361816, "rewards/rejected": -2.5194132328033447, "step": 9470 }, { "epoch": 0.38, "learning_rate": 3.903764574386786e-06, "logits/chosen": -3.007336378097534, "logits/rejected": -3.0406665802001953, "logps/chosen": -3.8540542125701904, "logps/rejected": -289.4119873046875, "loss": 0.0908, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2806088924407959, "rewards/margins": 2.8158068656921387, "rewards/rejected": -2.535198450088501, "step": 9480 }, { "epoch": 0.38, "learning_rate": 3.900874782070362e-06, "logits/chosen": -3.0196337699890137, "logits/rejected": -3.049776554107666, "logps/chosen": -2.692213535308838, "logps/rejected": -288.8630676269531, "loss": 0.0706, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2888302803039551, "rewards/margins": 2.8215436935424805, "rewards/rejected": -2.5327134132385254, "step": 9490 }, { "epoch": 0.38, "learning_rate": 3.897982258676867e-06, "logits/chosen": -3.0114104747772217, "logits/rejected": -3.043062210083008, "logps/chosen": -7.017190456390381, "logps/rejected": -282.20025634765625, "loss": 0.1181, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24719317257404327, "rewards/margins": 2.713496685028076, "rewards/rejected": -2.466303586959839, "step": 9500 }, { "epoch": 0.38, "eval_logits/chosen": -3.0748300552368164, "eval_logits/rejected": -3.103224992752075, "eval_logps/chosen": -0.15623719990253448, "eval_logps/rejected": -283.99310302734375, "eval_loss": 0.060487378388643265, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31503716111183167, "eval_rewards/margins": 2.794811964035034, "eval_rewards/rejected": -2.4797751903533936, "eval_runtime": 2.533, "eval_samples_per_second": 1.974, "eval_steps_per_second": 0.395, "step": 9500 }, { "epoch": 0.38, "learning_rate": 3.895087009845425e-06, "logits/chosen": -3.025664806365967, "logits/rejected": -3.056877613067627, "logps/chosen": -0.5490191578865051, "logps/rejected": -288.6778564453125, "loss": 0.058, "rewards/accuracies": 1.0, "rewards/chosen": 0.3107181191444397, "rewards/margins": 2.844878673553467, "rewards/rejected": -2.534160614013672, "step": 9510 }, { "epoch": 0.38, "learning_rate": 3.89218904122047e-06, "logits/chosen": -3.0124378204345703, "logits/rejected": -3.0447397232055664, "logps/chosen": -0.2121623307466507, "logps/rejected": -291.0927429199219, "loss": 0.0556, "rewards/accuracies": 1.0, "rewards/chosen": 0.31540781259536743, "rewards/margins": 2.86824107170105, "rewards/rejected": -2.552833318710327, "step": 9520 }, { "epoch": 0.38, "learning_rate": 3.889288358451742e-06, "logits/chosen": -3.0298287868499756, "logits/rejected": -3.059689998626709, "logps/chosen": -0.15787872672080994, "logps/rejected": -289.70562744140625, "loss": 0.0564, "rewards/accuracies": 1.0, "rewards/chosen": 0.31696218252182007, "rewards/margins": 2.858846664428711, "rewards/rejected": -2.541884422302246, "step": 9530 }, { "epoch": 0.38, "learning_rate": 3.886384967194269e-06, "logits/chosen": -3.021937131881714, "logits/rejected": -3.054514169692993, "logps/chosen": -3.577822208404541, "logps/rejected": -286.1370544433594, "loss": 0.0773, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27994492650032043, "rewards/margins": 2.7852511405944824, "rewards/rejected": -2.5053064823150635, "step": 9540 }, { "epoch": 0.38, "learning_rate": 3.88347887310836e-06, "logits/chosen": -3.0249972343444824, "logits/rejected": -3.054222583770752, "logps/chosen": -1.0822827816009521, "logps/rejected": -289.9466552734375, "loss": 0.057, "rewards/accuracies": 1.0, "rewards/chosen": 0.3052082061767578, "rewards/margins": 2.8520901203155518, "rewards/rejected": -2.546882152557373, "step": 9550 }, { "epoch": 0.38, "learning_rate": 3.880570081859597e-06, "logits/chosen": -3.013491153717041, "logits/rejected": -3.0457816123962402, "logps/chosen": -3.8939476013183594, "logps/rejected": -288.28106689453125, "loss": 0.0913, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2791001498699188, "rewards/margins": 2.8065555095672607, "rewards/rejected": -2.5274555683135986, "step": 9560 }, { "epoch": 0.38, "learning_rate": 3.8776585991188154e-06, "logits/chosen": -3.032379627227783, "logits/rejected": -3.0627143383026123, "logps/chosen": -0.22015085816383362, "logps/rejected": -289.6267395019531, "loss": 0.0565, "rewards/accuracies": 1.0, "rewards/chosen": 0.31512099504470825, "rewards/margins": 2.8549516201019287, "rewards/rejected": -2.5398306846618652, "step": 9570 }, { "epoch": 0.38, "learning_rate": 3.8747444305621e-06, "logits/chosen": -3.0215344429016113, "logits/rejected": -3.0524520874023438, "logps/chosen": -0.24648408591747284, "logps/rejected": -291.2984313964844, "loss": 0.0551, "rewards/accuracies": 1.0, "rewards/chosen": 0.31399619579315186, "rewards/margins": 2.8765456676483154, "rewards/rejected": -2.562549591064453, "step": 9580 }, { "epoch": 0.38, "learning_rate": 3.871827581870772e-06, "logits/chosen": -3.000330924987793, "logits/rejected": -3.034811496734619, "logps/chosen": -2.860887050628662, "logps/rejected": -290.2077941894531, "loss": 0.0713, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28863397240638733, "rewards/margins": 2.838409423828125, "rewards/rejected": -2.5497756004333496, "step": 9590 }, { "epoch": 0.38, "learning_rate": 3.868908058731376e-06, "logits/chosen": -3.0141167640686035, "logits/rejected": -3.044461250305176, "logps/chosen": -6.948902130126953, "logps/rejected": -283.1720275878906, "loss": 0.1231, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24775472283363342, "rewards/margins": 2.725048780441284, "rewards/rejected": -2.4772942066192627, "step": 9600 }, { "epoch": 0.38, "eval_logits/chosen": -3.072549819946289, "eval_logits/rejected": -3.100886344909668, "eval_logps/chosen": -0.1812925636768341, "eval_logps/rejected": -284.30572509765625, "eval_loss": 0.06026478484272957, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3147866129875183, "eval_rewards/margins": 2.7976882457733154, "eval_rewards/rejected": -2.4829015731811523, "eval_runtime": 2.53, "eval_samples_per_second": 1.976, "eval_steps_per_second": 0.395, "step": 9600 }, { "epoch": 0.38, "learning_rate": 3.8659858668356735e-06, "logits/chosen": -3.0153510570526123, "logits/rejected": -3.0452969074249268, "logps/chosen": -1.2613276243209839, "logps/rejected": -292.9415283203125, "loss": 0.0548, "rewards/accuracies": 1.0, "rewards/chosen": 0.30482062697410583, "rewards/margins": 2.881582736968994, "rewards/rejected": -2.5767617225646973, "step": 9610 }, { "epoch": 0.38, "learning_rate": 3.863061011880626e-06, "logits/chosen": -3.0376129150390625, "logits/rejected": -3.070037364959717, "logps/chosen": -0.19538690149784088, "logps/rejected": -290.0599365234375, "loss": 0.0562, "rewards/accuracies": 1.0, "rewards/chosen": 0.3165472447872162, "rewards/margins": 2.8602142333984375, "rewards/rejected": -2.5436670780181885, "step": 9620 }, { "epoch": 0.39, "learning_rate": 3.860133499568387e-06, "logits/chosen": -3.005978584289551, "logits/rejected": -3.040675163269043, "logps/chosen": -2.7413995265960693, "logps/rejected": -286.4681396484375, "loss": 0.0818, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2865421175956726, "rewards/margins": 2.801596164703369, "rewards/rejected": -2.5150537490844727, "step": 9630 }, { "epoch": 0.39, "learning_rate": 3.857203335606294e-06, "logits/chosen": -3.0058655738830566, "logits/rejected": -3.042450189590454, "logps/chosen": -3.3411026000976562, "logps/rejected": -286.6805725097656, "loss": 0.0888, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.285393625497818, "rewards/margins": 2.7963271141052246, "rewards/rejected": -2.5109336376190186, "step": 9640 }, { "epoch": 0.39, "learning_rate": 3.85427052570685e-06, "logits/chosen": -3.03371000289917, "logits/rejected": -3.064008951187134, "logps/chosen": -0.19248969852924347, "logps/rejected": -291.8348083496094, "loss": 0.0549, "rewards/accuracies": 1.0, "rewards/chosen": 0.3160797655582428, "rewards/margins": 2.880077600479126, "rewards/rejected": -2.563997983932495, "step": 9650 }, { "epoch": 0.39, "learning_rate": 3.851335075587717e-06, "logits/chosen": -3.025513172149658, "logits/rejected": -3.056286334991455, "logps/chosen": -3.845834255218506, "logps/rejected": -288.3327941894531, "loss": 0.0913, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27865535020828247, "rewards/margins": 2.807466506958008, "rewards/rejected": -2.528810977935791, "step": 9660 }, { "epoch": 0.39, "learning_rate": 3.848396990971709e-06, "logits/chosen": -3.0277726650238037, "logits/rejected": -3.059771776199341, "logps/chosen": -4.007667064666748, "logps/rejected": -285.5321350097656, "loss": 0.0935, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27839675545692444, "rewards/margins": 2.777935743331909, "rewards/rejected": -2.4995391368865967, "step": 9670 }, { "epoch": 0.39, "learning_rate": 3.845456277586768e-06, "logits/chosen": -3.0311837196350098, "logits/rejected": -3.0658819675445557, "logps/chosen": -0.2778701186180115, "logps/rejected": -291.1576232910156, "loss": 0.0555, "rewards/accuracies": 1.0, "rewards/chosen": 0.3138524889945984, "rewards/margins": 2.8700757026672363, "rewards/rejected": -2.5562233924865723, "step": 9680 }, { "epoch": 0.39, "learning_rate": 3.842512941165968e-06, "logits/chosen": -3.008892774581909, "logits/rejected": -3.0412583351135254, "logps/chosen": -1.5533616542816162, "logps/rejected": -285.98455810546875, "loss": 0.0718, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.30013003945350647, "rewards/margins": 2.8068063259124756, "rewards/rejected": -2.506676197052002, "step": 9690 }, { "epoch": 0.39, "learning_rate": 3.839566987447492e-06, "logits/chosen": -3.016157388687134, "logits/rejected": -3.048293352127075, "logps/chosen": -6.482266426086426, "logps/rejected": -283.84088134765625, "loss": 0.1187, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2524487376213074, "rewards/margins": 2.7371134757995605, "rewards/rejected": -2.4846649169921875, "step": 9700 }, { "epoch": 0.39, "eval_logits/chosen": -3.0728352069854736, "eval_logits/rejected": -3.1022584438323975, "eval_logps/chosen": -0.14741182327270508, "eval_logps/rejected": -284.14630126953125, "eval_loss": 0.0603378601372242, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31512540578842163, "eval_rewards/margins": 2.7964327335357666, "eval_rewards/rejected": -2.4813072681427, "eval_runtime": 2.5374, "eval_samples_per_second": 1.971, "eval_steps_per_second": 0.394, "step": 9700 }, { "epoch": 0.39, "learning_rate": 3.8366184221746285e-06, "logits/chosen": -3.0271379947662354, "logits/rejected": -3.0580990314483643, "logps/chosen": -0.21563836932182312, "logps/rejected": -287.5446472167969, "loss": 0.058, "rewards/accuracies": 1.0, "rewards/chosen": 0.3145095705986023, "rewards/margins": 2.8352723121643066, "rewards/rejected": -2.5207626819610596, "step": 9710 }, { "epoch": 0.39, "learning_rate": 3.833667251095757e-06, "logits/chosen": -3.0197415351867676, "logits/rejected": -3.049327850341797, "logps/chosen": -0.2288031131029129, "logps/rejected": -292.3929443359375, "loss": 0.0545, "rewards/accuracies": 1.0, "rewards/chosen": 0.3149993419647217, "rewards/margins": 2.8853135108947754, "rewards/rejected": -2.570314407348633, "step": 9720 }, { "epoch": 0.39, "learning_rate": 3.830713479964335e-06, "logits/chosen": -3.0291335582733154, "logits/rejected": -3.0587282180786133, "logps/chosen": -7.219078063964844, "logps/rejected": -282.136474609375, "loss": 0.1265, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24395711719989777, "rewards/margins": 2.712543487548828, "rewards/rejected": -2.4685862064361572, "step": 9730 }, { "epoch": 0.39, "learning_rate": 3.827757114538892e-06, "logits/chosen": -3.047727108001709, "logits/rejected": -3.0764245986938477, "logps/chosen": -4.855786323547363, "logps/rejected": -286.0008850097656, "loss": 0.0789, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.267627477645874, "rewards/margins": 2.7735326290130615, "rewards/rejected": -2.5059051513671875, "step": 9740 }, { "epoch": 0.39, "learning_rate": 3.824798160583012e-06, "logits/chosen": -3.0326461791992188, "logits/rejected": -3.066193103790283, "logps/chosen": -0.24766568839550018, "logps/rejected": -291.04229736328125, "loss": 0.0557, "rewards/accuracies": 1.0, "rewards/chosen": 0.3145206570625305, "rewards/margins": 2.8680567741394043, "rewards/rejected": -2.5535366535186768, "step": 9750 }, { "epoch": 0.39, "learning_rate": 3.82183662386533e-06, "logits/chosen": -3.0313305854797363, "logits/rejected": -3.061187982559204, "logps/chosen": -10.85000228881836, "logps/rejected": -276.90472412109375, "loss": 0.1548, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.2102328985929489, "rewards/margins": 2.623755693435669, "rewards/rejected": -2.4135231971740723, "step": 9760 }, { "epoch": 0.39, "learning_rate": 3.8188725101595094e-06, "logits/chosen": -3.0143818855285645, "logits/rejected": -3.0448157787323, "logps/chosen": -9.608235359191895, "logps/rejected": -283.01483154296875, "loss": 0.1367, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.22053857147693634, "rewards/margins": 2.6979103088378906, "rewards/rejected": -2.4773716926574707, "step": 9770 }, { "epoch": 0.39, "learning_rate": 3.815905825244245e-06, "logits/chosen": -3.047929048538208, "logits/rejected": -3.0760128498077393, "logps/chosen": -8.259798049926758, "logps/rejected": -280.7403869628906, "loss": 0.1201, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.23300810158252716, "rewards/margins": 2.690802574157715, "rewards/rejected": -2.457794189453125, "step": 9780 }, { "epoch": 0.39, "learning_rate": 3.8129365749032398e-06, "logits/chosen": -3.0240702629089355, "logits/rejected": -3.055184841156006, "logps/chosen": -3.81213641166687, "logps/rejected": -285.26953125, "loss": 0.0693, "rewards/accuracies": 1.0, "rewards/chosen": 0.2785521149635315, "rewards/margins": 2.777909517288208, "rewards/rejected": -2.499357223510742, "step": 9790 }, { "epoch": 0.39, "learning_rate": 3.8099647649251984e-06, "logits/chosen": -3.0308194160461426, "logits/rejected": -3.059825897216797, "logps/chosen": -11.198949813842773, "logps/rejected": -276.3506774902344, "loss": 0.1663, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.20539633929729462, "rewards/margins": 2.6174960136413574, "rewards/rejected": -2.4120993614196777, "step": 9800 }, { "epoch": 0.39, "eval_logits/chosen": -3.0729422569274902, "eval_logits/rejected": -3.1037511825561523, "eval_logps/chosen": -0.17167852818965912, "eval_logps/rejected": -284.00799560546875, "eval_loss": 0.060443274676799774, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31488272547721863, "eval_rewards/margins": 2.794807195663452, "eval_rewards/rejected": -2.479924201965332, "eval_runtime": 2.5422, "eval_samples_per_second": 1.967, "eval_steps_per_second": 0.393, "step": 9800 }, { "epoch": 0.39, "learning_rate": 3.8069904011038165e-06, "logits/chosen": -3.027992010116577, "logits/rejected": -3.0611627101898193, "logps/chosen": -3.671454668045044, "logps/rejected": -284.59857177734375, "loss": 0.092, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27874818444252014, "rewards/margins": 2.7699413299560547, "rewards/rejected": -2.4911930561065674, "step": 9810 }, { "epoch": 0.39, "learning_rate": 3.8040134892377702e-06, "logits/chosen": -3.018312692642212, "logits/rejected": -3.0487403869628906, "logps/chosen": -5.727156639099121, "logps/rejected": -282.79449462890625, "loss": 0.1002, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2595866620540619, "rewards/margins": 2.7378406524658203, "rewards/rejected": -2.4782540798187256, "step": 9820 }, { "epoch": 0.39, "learning_rate": 3.8010340351306997e-06, "logits/chosen": -3.0076661109924316, "logits/rejected": -3.040302276611328, "logps/chosen": -0.2762346565723419, "logps/rejected": -289.9649963378906, "loss": 0.0562, "rewards/accuracies": 1.0, "rewards/chosen": 0.31528788805007935, "rewards/margins": 2.8593499660491943, "rewards/rejected": -2.5440621376037598, "step": 9830 }, { "epoch": 0.39, "learning_rate": 3.798052044591204e-06, "logits/chosen": -3.0367751121520996, "logits/rejected": -3.0674474239349365, "logps/chosen": -0.20267710089683533, "logps/rejected": -290.72247314453125, "loss": 0.0558, "rewards/accuracies": 1.0, "rewards/chosen": 0.317672997713089, "rewards/margins": 2.865678071975708, "rewards/rejected": -2.5480051040649414, "step": 9840 }, { "epoch": 0.39, "learning_rate": 3.795067523432826e-06, "logits/chosen": -3.0128843784332275, "logits/rejected": -3.043696403503418, "logps/chosen": -2.532510757446289, "logps/rejected": -288.10394287109375, "loss": 0.0627, "rewards/accuracies": 1.0, "rewards/chosen": 0.2901805341243744, "rewards/margins": 2.817192554473877, "rewards/rejected": -2.5270121097564697, "step": 9850 }, { "epoch": 0.39, "learning_rate": 3.7920804774740427e-06, "logits/chosen": -3.010300397872925, "logits/rejected": -3.0431976318359375, "logps/chosen": -0.18990972638130188, "logps/rejected": -289.532958984375, "loss": 0.0566, "rewards/accuracies": 1.0, "rewards/chosen": 0.3131141662597656, "rewards/margins": 2.855344295501709, "rewards/rejected": -2.5422303676605225, "step": 9860 }, { "epoch": 0.39, "learning_rate": 3.789090912538253e-06, "logits/chosen": -3.0305609703063965, "logits/rejected": -3.0589776039123535, "logps/chosen": -3.3982620239257812, "logps/rejected": -286.8091125488281, "loss": 0.0881, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2842569351196289, "rewards/margins": 2.794076681137085, "rewards/rejected": -2.509819507598877, "step": 9870 }, { "epoch": 0.4, "learning_rate": 3.7860988344537664e-06, "logits/chosen": -3.0157313346862793, "logits/rejected": -3.044887065887451, "logps/chosen": -2.229020118713379, "logps/rejected": -285.7537536621094, "loss": 0.0716, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2947762906551361, "rewards/margins": 2.7974133491516113, "rewards/rejected": -2.5026373863220215, "step": 9880 }, { "epoch": 0.4, "learning_rate": 3.783104249053793e-06, "logits/chosen": -3.024186611175537, "logits/rejected": -3.0542304515838623, "logps/chosen": -3.5629634857177734, "logps/rejected": -286.6007385253906, "loss": 0.0897, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2819128930568695, "rewards/margins": 2.7912135124206543, "rewards/rejected": -2.509300470352173, "step": 9890 }, { "epoch": 0.4, "learning_rate": 3.780107162176429e-06, "logits/chosen": -3.014309883117676, "logits/rejected": -3.0460591316223145, "logps/chosen": -2.8252527713775635, "logps/rejected": -284.9360046386719, "loss": 0.082, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2875838279724121, "rewards/margins": 2.786921739578247, "rewards/rejected": -2.499337911605835, "step": 9900 }, { "epoch": 0.4, "eval_logits/chosen": -3.0724425315856934, "eval_logits/rejected": -3.1028432846069336, "eval_logps/chosen": -0.16076329350471497, "eval_logps/rejected": -284.0976257324219, "eval_loss": 0.060330361127853394, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31499189138412476, "eval_rewards/margins": 2.7958121299743652, "eval_rewards/rejected": -2.480820417404175, "eval_runtime": 2.5354, "eval_samples_per_second": 1.972, "eval_steps_per_second": 0.394, "step": 9900 }, { "epoch": 0.4, "learning_rate": 3.77710757966465e-06, "logits/chosen": -3.0349373817443848, "logits/rejected": -3.0665225982666016, "logps/chosen": -1.781911849975586, "logps/rejected": -290.0293884277344, "loss": 0.0578, "rewards/accuracies": 1.0, "rewards/chosen": 0.3007584810256958, "rewards/margins": 2.8442134857177734, "rewards/rejected": -2.543454647064209, "step": 9910 }, { "epoch": 0.4, "learning_rate": 3.7741055073662945e-06, "logits/chosen": -3.0044333934783936, "logits/rejected": -3.0383434295654297, "logps/chosen": -6.987421989440918, "logps/rejected": -279.3161926269531, "loss": 0.1282, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24587973952293396, "rewards/margins": 2.686222553253174, "rewards/rejected": -2.440342664718628, "step": 9920 }, { "epoch": 0.4, "learning_rate": 3.7711009511340573e-06, "logits/chosen": -3.03989839553833, "logits/rejected": -3.0728797912597656, "logps/chosen": -3.4937222003936768, "logps/rejected": -284.34832763671875, "loss": 0.0905, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28317776322364807, "rewards/margins": 2.7705225944519043, "rewards/rejected": -2.487344741821289, "step": 9930 }, { "epoch": 0.4, "learning_rate": 3.7680939168254733e-06, "logits/chosen": -3.004357099533081, "logits/rejected": -3.0394554138183594, "logps/chosen": -1.017765760421753, "logps/rejected": -285.3366394042969, "loss": 0.0687, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.3052360415458679, "rewards/margins": 2.81020188331604, "rewards/rejected": -2.5049657821655273, "step": 9940 }, { "epoch": 0.4, "learning_rate": 3.7650844103029093e-06, "logits/chosen": -3.01621413230896, "logits/rejected": -3.0465824604034424, "logps/chosen": -0.22176794707775116, "logps/rejected": -290.65142822265625, "loss": 0.0557, "rewards/accuracies": 1.0, "rewards/chosen": 0.3150060772895813, "rewards/margins": 2.867281436920166, "rewards/rejected": -2.5522751808166504, "step": 9950 }, { "epoch": 0.4, "learning_rate": 3.762072437433555e-06, "logits/chosen": -3.027705430984497, "logits/rejected": -3.059034585952759, "logps/chosen": -0.19888144731521606, "logps/rejected": -288.45806884765625, "loss": 0.0572, "rewards/accuracies": 1.0, "rewards/chosen": 0.31518158316612244, "rewards/margins": 2.843935489654541, "rewards/rejected": -2.5287539958953857, "step": 9960 }, { "epoch": 0.4, "learning_rate": 3.7590580040894025e-06, "logits/chosen": -3.0215325355529785, "logits/rejected": -3.050990104675293, "logps/chosen": -7.298097133636475, "logps/rejected": -281.2016296386719, "loss": 0.1282, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24582402408123016, "rewards/margins": 2.6981263160705566, "rewards/rejected": -2.4523022174835205, "step": 9970 }, { "epoch": 0.4, "learning_rate": 3.7560411161472454e-06, "logits/chosen": -3.0429916381835938, "logits/rejected": -3.0729215145111084, "logps/chosen": -3.900737762451172, "logps/rejected": -287.65972900390625, "loss": 0.0915, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2777913808822632, "rewards/margins": 2.803619146347046, "rewards/rejected": -2.5258278846740723, "step": 9980 }, { "epoch": 0.4, "learning_rate": 3.7530217794886607e-06, "logits/chosen": -3.0104305744171143, "logits/rejected": -3.0439484119415283, "logps/chosen": -0.2221047580242157, "logps/rejected": -289.3544921875, "loss": 0.0564, "rewards/accuracies": 1.0, "rewards/chosen": 0.3139277696609497, "rewards/margins": 2.85699200630188, "rewards/rejected": -2.5430641174316406, "step": 9990 }, { "epoch": 0.4, "learning_rate": 3.7500000000000005e-06, "logits/chosen": -3.0161287784576416, "logits/rejected": -3.0476582050323486, "logps/chosen": -0.20807823538780212, "logps/rejected": -290.0389709472656, "loss": 0.0561, "rewards/accuracies": 1.0, "rewards/chosen": 0.31478315591812134, "rewards/margins": 2.8610756397247314, "rewards/rejected": -2.546292304992676, "step": 10000 }, { "epoch": 0.4, "eval_logits/chosen": -3.073479652404785, "eval_logits/rejected": -3.102341890335083, "eval_logps/chosen": -0.16106143593788147, "eval_logps/rejected": -284.51025390625, "eval_loss": 0.06009570509195328, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.314988911151886, "eval_rewards/margins": 2.7999355792999268, "eval_rewards/rejected": -2.4849467277526855, "eval_runtime": 2.5435, "eval_samples_per_second": 1.966, "eval_steps_per_second": 0.393, "step": 10000 }, { "epoch": 0.4, "learning_rate": 3.7469757835723777e-06, "logits/chosen": -3.032891035079956, "logits/rejected": -3.0642600059509277, "logps/chosen": -0.19700443744659424, "logps/rejected": -292.34210205078125, "loss": 0.0548, "rewards/accuracies": 1.0, "rewards/chosen": 0.315030574798584, "rewards/margins": 2.8812127113342285, "rewards/rejected": -2.5661818981170654, "step": 10010 }, { "epoch": 0.4, "learning_rate": 3.743949136101657e-06, "logits/chosen": -3.0090365409851074, "logits/rejected": -3.0448391437530518, "logps/chosen": -0.2645478844642639, "logps/rejected": -289.3907775878906, "loss": 0.0565, "rewards/accuracies": 1.0, "rewards/chosen": 0.31322890520095825, "rewards/margins": 2.857140064239502, "rewards/rejected": -2.5439114570617676, "step": 10020 }, { "epoch": 0.4, "learning_rate": 3.7409200634884425e-06, "logits/chosen": -3.0095396041870117, "logits/rejected": -3.0396342277526855, "logps/chosen": -0.2288634330034256, "logps/rejected": -291.07379150390625, "loss": 0.0554, "rewards/accuracies": 1.0, "rewards/chosen": 0.31646957993507385, "rewards/margins": 2.8726248741149902, "rewards/rejected": -2.55615496635437, "step": 10030 }, { "epoch": 0.4, "learning_rate": 3.7378885716380665e-06, "logits/chosen": -3.010862350463867, "logits/rejected": -3.045457601547241, "logps/chosen": -1.2320950031280518, "logps/rejected": -290.7886657714844, "loss": 0.057, "rewards/accuracies": 1.0, "rewards/chosen": 0.30450278520584106, "rewards/margins": 2.8559935092926025, "rewards/rejected": -2.551490545272827, "step": 10040 }, { "epoch": 0.4, "learning_rate": 3.7348546664605777e-06, "logits/chosen": -3.017437219619751, "logits/rejected": -3.0504350662231445, "logps/chosen": -0.25469279289245605, "logps/rejected": -286.68072509765625, "loss": 0.0622, "rewards/accuracies": 1.0, "rewards/chosen": 0.31337571144104004, "rewards/margins": 2.825456380844116, "rewards/rejected": -2.512080669403076, "step": 10050 }, { "epoch": 0.4, "learning_rate": 3.731818353870729e-06, "logits/chosen": -3.0237369537353516, "logits/rejected": -3.0548949241638184, "logps/chosen": -0.21223053336143494, "logps/rejected": -291.2685852050781, "loss": 0.0555, "rewards/accuracies": 1.0, "rewards/chosen": 0.3169258236885071, "rewards/margins": 2.871586322784424, "rewards/rejected": -2.5546603202819824, "step": 10060 }, { "epoch": 0.4, "learning_rate": 3.7287796397879678e-06, "logits/chosen": -3.000044345855713, "logits/rejected": -3.0350501537323, "logps/chosen": -0.20151862502098083, "logps/rejected": -290.20306396484375, "loss": 0.056, "rewards/accuracies": 1.0, "rewards/chosen": 0.31236857175827026, "rewards/margins": 2.863222599029541, "rewards/rejected": -2.550853729248047, "step": 10070 }, { "epoch": 0.4, "learning_rate": 3.725738530136422e-06, "logits/chosen": -3.020693302154541, "logits/rejected": -3.0513997077941895, "logps/chosen": -3.922048568725586, "logps/rejected": -285.34942626953125, "loss": 0.095, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2793341279029846, "rewards/margins": 2.7740719318389893, "rewards/rejected": -2.4947376251220703, "step": 10080 }, { "epoch": 0.4, "learning_rate": 3.722695030844891e-06, "logits/chosen": -3.0287017822265625, "logits/rejected": -3.060159206390381, "logps/chosen": -3.742047071456909, "logps/rejected": -286.1482849121094, "loss": 0.0914, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2808932363986969, "rewards/margins": 2.7846474647521973, "rewards/rejected": -2.503754138946533, "step": 10090 }, { "epoch": 0.4, "learning_rate": 3.7196491478468322e-06, "logits/chosen": -3.0316810607910156, "logits/rejected": -3.06079363822937, "logps/chosen": -6.743491172790527, "logps/rejected": -282.6240234375, "loss": 0.1215, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2513134479522705, "rewards/margins": 2.7214512825012207, "rewards/rejected": -2.4701380729675293, "step": 10100 }, { "epoch": 0.4, "eval_logits/chosen": -3.0728888511657715, "eval_logits/rejected": -3.1024699211120605, "eval_logps/chosen": -0.1740756779909134, "eval_logps/rejected": -284.3609619140625, "eval_loss": 0.06022780388593674, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3148587644100189, "eval_rewards/margins": 2.7983126640319824, "eval_rewards/rejected": -2.4834537506103516, "eval_runtime": 2.5415, "eval_samples_per_second": 1.967, "eval_steps_per_second": 0.393, "step": 10100 }, { "epoch": 0.4, "learning_rate": 3.7166008870803505e-06, "logits/chosen": -3.057032823562622, "logits/rejected": -3.084669828414917, "logps/chosen": -3.313978910446167, "logps/rejected": -286.02191162109375, "loss": 0.0874, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28412094712257385, "rewards/margins": 2.788691520690918, "rewards/rejected": -2.504570722579956, "step": 10110 }, { "epoch": 0.4, "learning_rate": 3.713550254488185e-06, "logits/chosen": -2.994849681854248, "logits/rejected": -3.026200771331787, "logps/chosen": -3.438955783843994, "logps/rejected": -285.49761962890625, "loss": 0.0803, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2824697196483612, "rewards/margins": 2.7812137603759766, "rewards/rejected": -2.498743772506714, "step": 10120 }, { "epoch": 0.41, "learning_rate": 3.7104972560177022e-06, "logits/chosen": -3.046461820602417, "logits/rejected": -3.0758979320526123, "logps/chosen": -3.8233017921447754, "logps/rejected": -285.83563232421875, "loss": 0.0928, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28144609928131104, "rewards/margins": 2.7794017791748047, "rewards/rejected": -2.4979560375213623, "step": 10130 }, { "epoch": 0.41, "learning_rate": 3.707441897620877e-06, "logits/chosen": -3.038270950317383, "logits/rejected": -3.0699515342712402, "logps/chosen": -3.846540927886963, "logps/rejected": -286.45574951171875, "loss": 0.0923, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27969813346862793, "rewards/margins": 2.7910237312316895, "rewards/rejected": -2.5113255977630615, "step": 10140 }, { "epoch": 0.41, "learning_rate": 3.7043841852542884e-06, "logits/chosen": -3.060875654220581, "logits/rejected": -3.089674472808838, "logps/chosen": -4.317582130432129, "logps/rejected": -283.7353820800781, "loss": 0.0969, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.27611058950424194, "rewards/margins": 2.7576868534088135, "rewards/rejected": -2.481576442718506, "step": 10150 }, { "epoch": 0.41, "learning_rate": 3.701324124879102e-06, "logits/chosen": -3.033923387527466, "logits/rejected": -3.062638759613037, "logps/chosen": -0.1713109314441681, "logps/rejected": -288.6610412597656, "loss": 0.0592, "rewards/accuracies": 1.0, "rewards/chosen": 0.31646794080734253, "rewards/margins": 2.840909481048584, "rewards/rejected": -2.5244414806365967, "step": 10160 }, { "epoch": 0.41, "learning_rate": 3.698261722461063e-06, "logits/chosen": -3.0320801734924316, "logits/rejected": -3.063093423843384, "logps/chosen": -0.15092386305332184, "logps/rejected": -284.81610107421875, "loss": 0.0596, "rewards/accuracies": 1.0, "rewards/chosen": 0.31716492772102356, "rewards/margins": 2.809715747833252, "rewards/rejected": -2.492550849914551, "step": 10170 }, { "epoch": 0.41, "learning_rate": 3.695196983970481e-06, "logits/chosen": -3.0272812843322754, "logits/rejected": -3.057556390762329, "logps/chosen": -0.19017799198627472, "logps/rejected": -287.6793518066406, "loss": 0.0608, "rewards/accuracies": 1.0, "rewards/chosen": 0.31442028284072876, "rewards/margins": 2.835440158843994, "rewards/rejected": -2.5210201740264893, "step": 10180 }, { "epoch": 0.41, "learning_rate": 3.6921299153822198e-06, "logits/chosen": -3.016956329345703, "logits/rejected": -3.047764301300049, "logps/chosen": -2.9912962913513184, "logps/rejected": -284.1965026855469, "loss": 0.0874, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28866925835609436, "rewards/margins": 2.7699806690216064, "rewards/rejected": -2.481311321258545, "step": 10190 }, { "epoch": 0.41, "learning_rate": 3.689060522675689e-06, "logits/chosen": -3.0294394493103027, "logits/rejected": -3.0612380504608154, "logps/chosen": -0.5418800115585327, "logps/rejected": -283.04046630859375, "loss": 0.0617, "rewards/accuracies": 1.0, "rewards/chosen": 0.30853167176246643, "rewards/margins": 2.7874867916107178, "rewards/rejected": -2.478955030441284, "step": 10200 }, { "epoch": 0.41, "eval_logits/chosen": -3.073859691619873, "eval_logits/rejected": -3.1022133827209473, "eval_logps/chosen": -0.13599148392677307, "eval_logps/rejected": -284.53472900390625, "eval_loss": 0.059997715055942535, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3152396082878113, "eval_rewards/margins": 2.8004310131073, "eval_rewards/rejected": -2.4851913452148438, "eval_runtime": 2.5379, "eval_samples_per_second": 1.97, "eval_steps_per_second": 0.394, "step": 10200 }, { "epoch": 0.41, "learning_rate": 3.685988811834823e-06, "logits/chosen": -3.011826515197754, "logits/rejected": -3.0426318645477295, "logps/chosen": -3.17081618309021, "logps/rejected": -285.8729248046875, "loss": 0.0863, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.286054790019989, "rewards/margins": 2.7902770042419434, "rewards/rejected": -2.5042226314544678, "step": 10210 }, { "epoch": 0.41, "learning_rate": 3.682914788848083e-06, "logits/chosen": -2.986309766769409, "logits/rejected": -3.0162835121154785, "logps/chosen": -3.9766852855682373, "logps/rejected": -283.0857238769531, "loss": 0.097, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2785015106201172, "rewards/margins": 2.7578721046447754, "rewards/rejected": -2.479370594024658, "step": 10220 }, { "epoch": 0.41, "learning_rate": 3.6798384597084323e-06, "logits/chosen": -3.0357322692871094, "logits/rejected": -3.0669782161712646, "logps/chosen": -0.2329815924167633, "logps/rejected": -289.1054992675781, "loss": 0.0567, "rewards/accuracies": 1.0, "rewards/chosen": 0.31268149614334106, "rewards/margins": 2.8521199226379395, "rewards/rejected": -2.539438247680664, "step": 10230 }, { "epoch": 0.41, "learning_rate": 3.6767598304133325e-06, "logits/chosen": -3.0291748046875, "logits/rejected": -3.0611329078674316, "logps/chosen": -0.18661510944366455, "logps/rejected": -290.82208251953125, "loss": 0.0557, "rewards/accuracies": 1.0, "rewards/chosen": 0.31264957785606384, "rewards/margins": 2.8665294647216797, "rewards/rejected": -2.553880214691162, "step": 10240 }, { "epoch": 0.41, "learning_rate": 3.6736789069647273e-06, "logits/chosen": -3.0081305503845215, "logits/rejected": -3.039132595062256, "logps/chosen": -5.951282501220703, "logps/rejected": -282.86346435546875, "loss": 0.1132, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2567296028137207, "rewards/margins": 2.7317464351654053, "rewards/rejected": -2.4750168323516846, "step": 10250 }, { "epoch": 0.41, "learning_rate": 3.6705956953690364e-06, "logits/chosen": -3.023674488067627, "logits/rejected": -3.054795980453491, "logps/chosen": -0.2518368661403656, "logps/rejected": -290.05810546875, "loss": 0.0563, "rewards/accuracies": 1.0, "rewards/chosen": 0.31356582045555115, "rewards/margins": 2.8579983711242676, "rewards/rejected": -2.5444324016571045, "step": 10260 }, { "epoch": 0.41, "learning_rate": 3.6675102016371387e-06, "logits/chosen": -3.042997121810913, "logits/rejected": -3.0749258995056152, "logps/chosen": -0.21085044741630554, "logps/rejected": -290.1106872558594, "loss": 0.0564, "rewards/accuracies": 1.0, "rewards/chosen": 0.31226781010627747, "rewards/margins": 2.8607611656188965, "rewards/rejected": -2.5484931468963623, "step": 10270 }, { "epoch": 0.41, "learning_rate": 3.6644224317843607e-06, "logits/chosen": -3.0655293464660645, "logits/rejected": -3.0920658111572266, "logps/chosen": -2.1960835456848145, "logps/rejected": -287.95928955078125, "loss": 0.0635, "rewards/accuracies": 1.0, "rewards/chosen": 0.2960115671157837, "rewards/margins": 2.8185946941375732, "rewards/rejected": -2.5225830078125, "step": 10280 }, { "epoch": 0.41, "learning_rate": 3.66133239183047e-06, "logits/chosen": -3.016150951385498, "logits/rejected": -3.048229217529297, "logps/chosen": -0.2489519566297531, "logps/rejected": -291.26641845703125, "loss": 0.0552, "rewards/accuracies": 1.0, "rewards/chosen": 0.3142727017402649, "rewards/margins": 2.8745832443237305, "rewards/rejected": -2.5603108406066895, "step": 10290 }, { "epoch": 0.41, "learning_rate": 3.658240087799655e-06, "logits/chosen": -3.0285940170288086, "logits/rejected": -3.059143543243408, "logps/chosen": -2.99505877494812, "logps/rejected": -287.3403625488281, "loss": 0.0834, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2845422923564911, "rewards/margins": 2.8104405403137207, "rewards/rejected": -2.5258982181549072, "step": 10300 }, { "epoch": 0.41, "eval_logits/chosen": -3.073899745941162, "eval_logits/rejected": -3.101701021194458, "eval_logps/chosen": -0.14250628650188446, "eval_logps/rejected": -284.70452880859375, "eval_loss": 0.05992228537797928, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3151744604110718, "eval_rewards/margins": 2.8020641803741455, "eval_rewards/rejected": -2.4868898391723633, "eval_runtime": 2.5333, "eval_samples_per_second": 1.974, "eval_steps_per_second": 0.395, "step": 10300 }, { "epoch": 0.41, "learning_rate": 3.655145525720522e-06, "logits/chosen": -3.0221786499023438, "logits/rejected": -3.054999351501465, "logps/chosen": -3.1397228240966797, "logps/rejected": -286.2785339355469, "loss": 0.0834, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2846105992794037, "rewards/margins": 2.7960593700408936, "rewards/rejected": -2.511448860168457, "step": 10310 }, { "epoch": 0.41, "learning_rate": 3.6520487116260778e-06, "logits/chosen": -3.0077431201934814, "logits/rejected": -3.040616512298584, "logps/chosen": -0.15316204726696014, "logps/rejected": -289.77191162109375, "loss": 0.0564, "rewards/accuracies": 1.0, "rewards/chosen": 0.3171514868736267, "rewards/margins": 2.855785846710205, "rewards/rejected": -2.5386340618133545, "step": 10320 }, { "epoch": 0.41, "learning_rate": 3.6489496515537204e-06, "logits/chosen": -3.001176357269287, "logits/rejected": -3.034745931625366, "logps/chosen": -2.6801304817199707, "logps/rejected": -285.05133056640625, "loss": 0.0815, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2893649935722351, "rewards/margins": 2.7925221920013428, "rewards/rejected": -2.503157138824463, "step": 10330 }, { "epoch": 0.41, "learning_rate": 3.6458483515452246e-06, "logits/chosen": -3.018536329269409, "logits/rejected": -3.0522618293762207, "logps/chosen": -0.7490688562393188, "logps/rejected": -290.36016845703125, "loss": 0.0586, "rewards/accuracies": 1.0, "rewards/chosen": 0.3092349171638489, "rewards/margins": 2.8556971549987793, "rewards/rejected": -2.546462059020996, "step": 10340 }, { "epoch": 0.41, "learning_rate": 3.642744817646736e-06, "logits/chosen": -3.0292587280273438, "logits/rejected": -3.0614914894104004, "logps/chosen": -2.6439433097839355, "logps/rejected": -289.6534118652344, "loss": 0.0757, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.29156044125556946, "rewards/margins": 2.835463047027588, "rewards/rejected": -2.54390287399292, "step": 10350 }, { "epoch": 0.41, "learning_rate": 3.639639055908751e-06, "logits/chosen": -3.026121139526367, "logits/rejected": -3.0561978816986084, "logps/chosen": -3.5301883220672607, "logps/rejected": -290.2413635253906, "loss": 0.0751, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28300291299819946, "rewards/margins": 2.8263986110687256, "rewards/rejected": -2.54339599609375, "step": 10360 }, { "epoch": 0.41, "learning_rate": 3.63653107238611e-06, "logits/chosen": -3.0177791118621826, "logits/rejected": -3.0499989986419678, "logps/chosen": -3.183605432510376, "logps/rejected": -288.7574157714844, "loss": 0.0838, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2835812568664551, "rewards/margins": 2.8207192420959473, "rewards/rejected": -2.5371384620666504, "step": 10370 }, { "epoch": 0.42, "learning_rate": 3.6334208731379885e-06, "logits/chosen": -3.008058786392212, "logits/rejected": -3.0383243560791016, "logps/chosen": -6.605607032775879, "logps/rejected": -284.0404357910156, "loss": 0.1197, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.25148847699165344, "rewards/margins": 2.736046075820923, "rewards/rejected": -2.484557628631592, "step": 10380 }, { "epoch": 0.42, "learning_rate": 3.630308464227877e-06, "logits/chosen": -3.0289580821990967, "logits/rejected": -3.059213161468506, "logps/chosen": -0.1813901960849762, "logps/rejected": -291.527099609375, "loss": 0.0552, "rewards/accuracies": 1.0, "rewards/chosen": 0.3149797320365906, "rewards/margins": 2.874727964401245, "rewards/rejected": -2.5597481727600098, "step": 10390 }, { "epoch": 0.42, "learning_rate": 3.627193851723577e-06, "logits/chosen": -3.0145630836486816, "logits/rejected": -3.0406317710876465, "logps/chosen": -5.07248592376709, "logps/rejected": -287.0977478027344, "loss": 0.0931, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2675539255142212, "rewards/margins": 2.7851433753967285, "rewards/rejected": -2.517589569091797, "step": 10400 }, { "epoch": 0.42, "eval_logits/chosen": -3.0727384090423584, "eval_logits/rejected": -3.100797176361084, "eval_logps/chosen": -0.1699904054403305, "eval_logps/rejected": -284.15692138671875, "eval_loss": 0.06042417883872986, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31489962339401245, "eval_rewards/margins": 2.7963130474090576, "eval_rewards/rejected": -2.4814133644104004, "eval_runtime": 2.5402, "eval_samples_per_second": 1.968, "eval_steps_per_second": 0.394, "step": 10400 }, { "epoch": 0.42, "learning_rate": 3.624077041697185e-06, "logits/chosen": -3.0193543434143066, "logits/rejected": -3.05232572555542, "logps/chosen": -6.03057861328125, "logps/rejected": -286.47052001953125, "loss": 0.0977, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2556791603565216, "rewards/margins": 2.7642438411712646, "rewards/rejected": -2.5085644721984863, "step": 10410 }, { "epoch": 0.42, "learning_rate": 3.6209580402250816e-06, "logits/chosen": -3.0171823501586914, "logits/rejected": -3.0493972301483154, "logps/chosen": -2.2823522090911865, "logps/rejected": -289.4557800292969, "loss": 0.0631, "rewards/accuracies": 1.0, "rewards/chosen": 0.2951720356941223, "rewards/margins": 2.832364320755005, "rewards/rejected": -2.537191867828369, "step": 10420 }, { "epoch": 0.42, "learning_rate": 3.6178368533879183e-06, "logits/chosen": -3.0354695320129395, "logits/rejected": -3.0657005310058594, "logps/chosen": -2.1047897338867188, "logps/rejected": -284.7054443359375, "loss": 0.0771, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.29676321148872375, "rewards/margins": 2.7914295196533203, "rewards/rejected": -2.494666337966919, "step": 10430 }, { "epoch": 0.42, "learning_rate": 3.6147134872706107e-06, "logits/chosen": -3.00418758392334, "logits/rejected": -3.0365872383117676, "logps/chosen": -4.302163124084473, "logps/rejected": -281.42279052734375, "loss": 0.0988, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2747804820537567, "rewards/margins": 2.734954833984375, "rewards/rejected": -2.460174083709717, "step": 10440 }, { "epoch": 0.42, "learning_rate": 3.611587947962319e-06, "logits/chosen": -3.021115779876709, "logits/rejected": -3.053882598876953, "logps/chosen": -0.1847100704908371, "logps/rejected": -290.83795166015625, "loss": 0.0557, "rewards/accuracies": 1.0, "rewards/chosen": 0.3175128698348999, "rewards/margins": 2.8685035705566406, "rewards/rejected": -2.550990581512451, "step": 10450 }, { "epoch": 0.42, "learning_rate": 3.608460241556443e-06, "logits/chosen": -3.0189266204833984, "logits/rejected": -3.0517289638519287, "logps/chosen": -5.140239238739014, "logps/rejected": -283.64593505859375, "loss": 0.0995, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.26495909690856934, "rewards/margins": 2.747422695159912, "rewards/rejected": -2.4824633598327637, "step": 10460 }, { "epoch": 0.42, "learning_rate": 3.605330374150607e-06, "logits/chosen": -3.013087034225464, "logits/rejected": -3.042430877685547, "logps/chosen": -9.579559326171875, "logps/rejected": -281.152099609375, "loss": 0.1411, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.22092008590698242, "rewards/margins": 2.682337522506714, "rewards/rejected": -2.4614176750183105, "step": 10470 }, { "epoch": 0.42, "learning_rate": 3.6021983518466468e-06, "logits/chosen": -3.038149118423462, "logits/rejected": -3.0683281421661377, "logps/chosen": -3.069535255432129, "logps/rejected": -285.7442321777344, "loss": 0.086, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28837427496910095, "rewards/margins": 2.786895751953125, "rewards/rejected": -2.498521089553833, "step": 10480 }, { "epoch": 0.42, "learning_rate": 3.5990641807506e-06, "logits/chosen": -3.023714542388916, "logits/rejected": -3.0544657707214355, "logps/chosen": -3.2025623321533203, "logps/rejected": -283.16455078125, "loss": 0.0901, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2851731479167938, "rewards/margins": 2.7627205848693848, "rewards/rejected": -2.4775474071502686, "step": 10490 }, { "epoch": 0.42, "learning_rate": 3.595927866972694e-06, "logits/chosen": -3.027055263519287, "logits/rejected": -3.0545692443847656, "logps/chosen": -0.24843326210975647, "logps/rejected": -287.94671630859375, "loss": 0.0576, "rewards/accuracies": 1.0, "rewards/chosen": 0.3164141774177551, "rewards/margins": 2.8374462127685547, "rewards/rejected": -2.5210318565368652, "step": 10500 }, { "epoch": 0.42, "eval_logits/chosen": -3.070693016052246, "eval_logits/rejected": -3.098649024963379, "eval_logps/chosen": -0.14422574639320374, "eval_logps/rejected": -284.5180969238281, "eval_loss": 0.06000389903783798, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3151572644710541, "eval_rewards/margins": 2.800182819366455, "eval_rewards/rejected": -2.485025405883789, "eval_runtime": 2.5314, "eval_samples_per_second": 1.975, "eval_steps_per_second": 0.395, "step": 10500 }, { "epoch": 0.42, "learning_rate": 3.5927894166273324e-06, "logits/chosen": -3.0499765872955322, "logits/rejected": -3.0785481929779053, "logps/chosen": -0.17492982745170593, "logps/rejected": -291.8489990234375, "loss": 0.0547, "rewards/accuracies": 1.0, "rewards/chosen": 0.31725984811782837, "rewards/margins": 2.881492853164673, "rewards/rejected": -2.5642330646514893, "step": 10510 }, { "epoch": 0.42, "learning_rate": 3.5896488358330854e-06, "logits/chosen": -3.033432960510254, "logits/rejected": -3.063469171524048, "logps/chosen": -4.695204734802246, "logps/rejected": -286.19219970703125, "loss": 0.0896, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2724454700946808, "rewards/margins": 2.7773454189300537, "rewards/rejected": -2.5049004554748535, "step": 10520 }, { "epoch": 0.42, "learning_rate": 3.586506130712676e-06, "logits/chosen": -3.0286366939544678, "logits/rejected": -3.063830852508545, "logps/chosen": -0.20310504734516144, "logps/rejected": -288.8940124511719, "loss": 0.057, "rewards/accuracies": 1.0, "rewards/chosen": 0.31664347648620605, "rewards/margins": 2.8470213413238525, "rewards/rejected": -2.5303778648376465, "step": 10530 }, { "epoch": 0.42, "learning_rate": 3.5833613073929684e-06, "logits/chosen": -3.0164809226989746, "logits/rejected": -3.047337770462036, "logps/chosen": -3.1779978275299072, "logps/rejected": -285.79742431640625, "loss": 0.0872, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28544291853904724, "rewards/margins": 2.7870898246765137, "rewards/rejected": -2.5016472339630127, "step": 10540 }, { "epoch": 0.42, "learning_rate": 3.5802143720049565e-06, "logits/chosen": -3.034688711166382, "logits/rejected": -3.061648368835449, "logps/chosen": -9.865948677062988, "logps/rejected": -274.58056640625, "loss": 0.1576, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.2178497314453125, "rewards/margins": 2.610787868499756, "rewards/rejected": -2.3929381370544434, "step": 10550 }, { "epoch": 0.42, "learning_rate": 3.5770653306837515e-06, "logits/chosen": -3.0280420780181885, "logits/rejected": -3.0569846630096436, "logps/chosen": -7.481184959411621, "logps/rejected": -279.7231750488281, "loss": 0.1264, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2424856424331665, "rewards/margins": 2.6866676807403564, "rewards/rejected": -2.4441819190979004, "step": 10560 }, { "epoch": 0.42, "learning_rate": 3.5739141895685708e-06, "logits/chosen": -3.0228271484375, "logits/rejected": -3.0515458583831787, "logps/chosen": -0.204255148768425, "logps/rejected": -287.14764404296875, "loss": 0.0584, "rewards/accuracies": 1.0, "rewards/chosen": 0.3118208944797516, "rewards/margins": 2.8275465965270996, "rewards/rejected": -2.515725612640381, "step": 10570 }, { "epoch": 0.42, "learning_rate": 3.570760954802726e-06, "logits/chosen": -3.0287325382232666, "logits/rejected": -3.058549165725708, "logps/chosen": -2.460175037384033, "logps/rejected": -284.99432373046875, "loss": 0.0811, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2935877740383148, "rewards/margins": 2.785219669342041, "rewards/rejected": -2.4916319847106934, "step": 10580 }, { "epoch": 0.42, "learning_rate": 3.5676056325336084e-06, "logits/chosen": -3.0113017559051514, "logits/rejected": -3.045992851257324, "logps/chosen": -0.1808568835258484, "logps/rejected": -288.191162109375, "loss": 0.0595, "rewards/accuracies": 1.0, "rewards/chosen": 0.3138214945793152, "rewards/margins": 2.839625835418701, "rewards/rejected": -2.525804042816162, "step": 10590 }, { "epoch": 0.42, "learning_rate": 3.564448228912682e-06, "logits/chosen": -3.0244967937469482, "logits/rejected": -3.0582408905029297, "logps/chosen": -3.406099796295166, "logps/rejected": -284.90631103515625, "loss": 0.0882, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28445667028427124, "rewards/margins": 2.775338649749756, "rewards/rejected": -2.49088191986084, "step": 10600 }, { "epoch": 0.42, "eval_logits/chosen": -3.071179151535034, "eval_logits/rejected": -3.0995421409606934, "eval_logps/chosen": -0.19753366708755493, "eval_logps/rejected": -284.00946044921875, "eval_loss": 0.060553889721632004, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31462422013282776, "eval_rewards/margins": 2.794562816619873, "eval_rewards/rejected": -2.4799389839172363, "eval_runtime": 2.5388, "eval_samples_per_second": 1.969, "eval_steps_per_second": 0.394, "step": 10600 }, { "epoch": 0.42, "learning_rate": 3.561288750095465e-06, "logits/chosen": -3.0042595863342285, "logits/rejected": -3.0339713096618652, "logps/chosen": -5.034782886505127, "logps/rejected": -283.70758056640625, "loss": 0.0979, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.26592427492141724, "rewards/margins": 2.748032808303833, "rewards/rejected": -2.4821083545684814, "step": 10610 }, { "epoch": 0.42, "learning_rate": 3.5581272022415243e-06, "logits/chosen": -2.995601177215576, "logits/rejected": -3.029047966003418, "logps/chosen": -0.2259141504764557, "logps/rejected": -289.9962158203125, "loss": 0.0562, "rewards/accuracies": 1.0, "rewards/chosen": 0.3158877491950989, "rewards/margins": 2.8605973720550537, "rewards/rejected": -2.5447096824645996, "step": 10620 }, { "epoch": 0.43, "learning_rate": 3.5549635915144578e-06, "logits/chosen": -3.018254041671753, "logits/rejected": -3.0506649017333984, "logps/chosen": -3.9161548614501953, "logps/rejected": -280.82568359375, "loss": 0.0983, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27606749534606934, "rewards/margins": 2.7322816848754883, "rewards/rejected": -2.4562137126922607, "step": 10630 }, { "epoch": 0.43, "learning_rate": 3.5517979240818875e-06, "logits/chosen": -3.027857780456543, "logits/rejected": -3.056187152862549, "logps/chosen": -0.40424132347106934, "logps/rejected": -290.5710754394531, "loss": 0.056, "rewards/accuracies": 1.0, "rewards/chosen": 0.31581375002861023, "rewards/margins": 2.8629238605499268, "rewards/rejected": -2.5471103191375732, "step": 10640 }, { "epoch": 0.43, "learning_rate": 3.5486302061154433e-06, "logits/chosen": -3.0114822387695312, "logits/rejected": -3.0414793491363525, "logps/chosen": -0.32548776268959045, "logps/rejected": -291.21270751953125, "loss": 0.0556, "rewards/accuracies": 1.0, "rewards/chosen": 0.3144073486328125, "rewards/margins": 2.868354320526123, "rewards/rejected": -2.5539469718933105, "step": 10650 }, { "epoch": 0.43, "learning_rate": 3.5454604437907535e-06, "logits/chosen": -2.9988651275634766, "logits/rejected": -3.031625986099243, "logps/chosen": -0.20766112208366394, "logps/rejected": -291.05029296875, "loss": 0.0556, "rewards/accuracies": 1.0, "rewards/chosen": 0.31356820464134216, "rewards/margins": 2.867682695388794, "rewards/rejected": -2.554114580154419, "step": 10660 }, { "epoch": 0.43, "learning_rate": 3.5422886432874342e-06, "logits/chosen": -2.997727870941162, "logits/rejected": -3.028513193130493, "logps/chosen": -0.2937889099121094, "logps/rejected": -290.37225341796875, "loss": 0.056, "rewards/accuracies": 1.0, "rewards/chosen": 0.3122673034667969, "rewards/margins": 2.8625428676605225, "rewards/rejected": -2.5502755641937256, "step": 10670 }, { "epoch": 0.43, "learning_rate": 3.53911481078907e-06, "logits/chosen": -3.051021099090576, "logits/rejected": -3.078207492828369, "logps/chosen": -1.7184807062149048, "logps/rejected": -287.57659912109375, "loss": 0.06, "rewards/accuracies": 1.0, "rewards/chosen": 0.30019205808639526, "rewards/margins": 2.819626808166504, "rewards/rejected": -2.519434690475464, "step": 10680 }, { "epoch": 0.43, "learning_rate": 3.535938952483211e-06, "logits/chosen": -2.9944489002227783, "logits/rejected": -3.0264458656311035, "logps/chosen": -3.877232074737549, "logps/rejected": -285.70208740234375, "loss": 0.0931, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2773774564266205, "rewards/margins": 2.777222156524658, "rewards/rejected": -2.499845027923584, "step": 10690 }, { "epoch": 0.43, "learning_rate": 3.532761074561355e-06, "logits/chosen": -3.00486421585083, "logits/rejected": -3.035787343978882, "logps/chosen": -3.561161518096924, "logps/rejected": -286.80401611328125, "loss": 0.0893, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28243547677993774, "rewards/margins": 2.795241355895996, "rewards/rejected": -2.512805461883545, "step": 10700 }, { "epoch": 0.43, "eval_logits/chosen": -3.072807550430298, "eval_logits/rejected": -3.1019351482391357, "eval_logps/chosen": -0.1667693555355072, "eval_logps/rejected": -284.6656188964844, "eval_loss": 0.05991727113723755, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31493183970451355, "eval_rewards/margins": 2.8014323711395264, "eval_rewards/rejected": -2.4865005016326904, "eval_runtime": 2.5372, "eval_samples_per_second": 1.971, "eval_steps_per_second": 0.394, "step": 10700 }, { "epoch": 0.43, "learning_rate": 3.5295811832189374e-06, "logits/chosen": -3.026276111602783, "logits/rejected": -3.0570459365844727, "logps/chosen": -0.21526959538459778, "logps/rejected": -288.7207946777344, "loss": 0.0569, "rewards/accuracies": 1.0, "rewards/chosen": 0.31424614787101746, "rewards/margins": 2.8492491245269775, "rewards/rejected": -2.5350029468536377, "step": 10710 }, { "epoch": 0.43, "learning_rate": 3.5263992846553203e-06, "logits/chosen": -3.0396766662597656, "logits/rejected": -3.0705761909484863, "logps/chosen": -0.19065658748149872, "logps/rejected": -288.1814880371094, "loss": 0.0599, "rewards/accuracies": 1.0, "rewards/chosen": 0.31618812680244446, "rewards/margins": 2.8441667556762695, "rewards/rejected": -2.5279788970947266, "step": 10720 }, { "epoch": 0.43, "learning_rate": 3.5232153850737772e-06, "logits/chosen": -3.022895336151123, "logits/rejected": -3.051633596420288, "logps/chosen": -0.2169325053691864, "logps/rejected": -291.5577697753906, "loss": 0.0552, "rewards/accuracies": 1.0, "rewards/chosen": 0.31578144431114197, "rewards/margins": 2.87471342086792, "rewards/rejected": -2.558932304382324, "step": 10730 }, { "epoch": 0.43, "learning_rate": 3.5200294906814823e-06, "logits/chosen": -3.020777940750122, "logits/rejected": -3.052931070327759, "logps/chosen": -0.2188427448272705, "logps/rejected": -291.2657775878906, "loss": 0.0553, "rewards/accuracies": 1.0, "rewards/chosen": 0.31544262170791626, "rewards/margins": 2.874232769012451, "rewards/rejected": -2.558790445327759, "step": 10740 }, { "epoch": 0.43, "learning_rate": 3.516841607689501e-06, "logits/chosen": -3.0269646644592285, "logits/rejected": -3.058011770248413, "logps/chosen": -6.120855808258057, "logps/rejected": -285.661865234375, "loss": 0.1098, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.25610843300819397, "rewards/margins": 2.7605717182159424, "rewards/rejected": -2.5044631958007812, "step": 10750 }, { "epoch": 0.43, "learning_rate": 3.5136517423127737e-06, "logits/chosen": -3.015918254852295, "logits/rejected": -3.048419237136841, "logps/chosen": -3.852262020111084, "logps/rejected": -286.8875732421875, "loss": 0.092, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27681106328964233, "rewards/margins": 2.7933006286621094, "rewards/rejected": -2.5164895057678223, "step": 10760 }, { "epoch": 0.43, "learning_rate": 3.5104599007701057e-06, "logits/chosen": -3.033417224884033, "logits/rejected": -3.06300950050354, "logps/chosen": -3.2811362743377686, "logps/rejected": -285.10687255859375, "loss": 0.0866, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2853897213935852, "rewards/margins": 2.7790868282318115, "rewards/rejected": -2.493697166442871, "step": 10770 }, { "epoch": 0.43, "learning_rate": 3.507266089284157e-06, "logits/chosen": -3.013495445251465, "logits/rejected": -3.0452919006347656, "logps/chosen": -2.8331100940704346, "logps/rejected": -287.8431701660156, "loss": 0.0781, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28787270188331604, "rewards/margins": 2.8108887672424316, "rewards/rejected": -2.5230157375335693, "step": 10780 }, { "epoch": 0.43, "learning_rate": 3.5040703140814254e-06, "logits/chosen": -3.0207338333129883, "logits/rejected": -3.0490920543670654, "logps/chosen": -3.366196393966675, "logps/rejected": -285.4826965332031, "loss": 0.0881, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2797984182834625, "rewards/margins": 2.7837178707122803, "rewards/rejected": -2.5039196014404297, "step": 10790 }, { "epoch": 0.43, "learning_rate": 3.5008725813922383e-06, "logits/chosen": -3.029899835586548, "logits/rejected": -3.0590851306915283, "logps/chosen": -0.21079544723033905, "logps/rejected": -291.5541076660156, "loss": 0.0551, "rewards/accuracies": 1.0, "rewards/chosen": 0.31347930431365967, "rewards/margins": 2.876638412475586, "rewards/rejected": -2.563159227371216, "step": 10800 }, { "epoch": 0.43, "eval_logits/chosen": -3.074826717376709, "eval_logits/rejected": -3.1032252311706543, "eval_logps/chosen": -0.21086397767066956, "eval_logps/rejected": -285.1605224609375, "eval_loss": 0.059653520584106445, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3144908845424652, "eval_rewards/margins": 2.805940628051758, "eval_rewards/rejected": -2.4914495944976807, "eval_runtime": 2.5383, "eval_samples_per_second": 1.97, "eval_steps_per_second": 0.394, "step": 10800 }, { "epoch": 0.43, "learning_rate": 3.4976728974507387e-06, "logits/chosen": -3.0333304405212402, "logits/rejected": -3.062778949737549, "logps/chosen": -3.2063326835632324, "logps/rejected": -283.0634765625, "loss": 0.0911, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2811422049999237, "rewards/margins": 2.765364170074463, "rewards/rejected": -2.484221935272217, "step": 10810 }, { "epoch": 0.43, "learning_rate": 3.494471268494875e-06, "logits/chosen": -2.9809765815734863, "logits/rejected": -3.0155282020568848, "logps/chosen": -0.17534875869750977, "logps/rejected": -288.8387451171875, "loss": 0.0571, "rewards/accuracies": 1.0, "rewards/chosen": 0.3116900622844696, "rewards/margins": 2.8455607891082764, "rewards/rejected": -2.5338709354400635, "step": 10820 }, { "epoch": 0.43, "learning_rate": 3.4912677007663857e-06, "logits/chosen": -3.0392627716064453, "logits/rejected": -3.067262649536133, "logps/chosen": -3.1101396083831787, "logps/rejected": -285.3829650878906, "loss": 0.0867, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2875010371208191, "rewards/margins": 2.7831945419311523, "rewards/rejected": -2.4956936836242676, "step": 10830 }, { "epoch": 0.43, "learning_rate": 3.4880622005107916e-06, "logits/chosen": -3.011110782623291, "logits/rejected": -3.0406734943389893, "logps/chosen": -5.756687164306641, "logps/rejected": -284.504150390625, "loss": 0.1106, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2600354254245758, "rewards/margins": 2.750192165374756, "rewards/rejected": -2.490156888961792, "step": 10840 }, { "epoch": 0.43, "learning_rate": 3.4848547739773782e-06, "logits/chosen": -3.024691581726074, "logits/rejected": -3.0569381713867188, "logps/chosen": -0.19545194506645203, "logps/rejected": -289.2422180175781, "loss": 0.057, "rewards/accuracies": 1.0, "rewards/chosen": 0.31608301401138306, "rewards/margins": 2.850620985031128, "rewards/rejected": -2.5345375537872314, "step": 10850 }, { "epoch": 0.43, "learning_rate": 3.481645427419188e-06, "logits/chosen": -3.0189597606658936, "logits/rejected": -3.050621509552002, "logps/chosen": -2.4421768188476562, "logps/rejected": -288.0804138183594, "loss": 0.0755, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2911176085472107, "rewards/margins": 2.816887140274048, "rewards/rejected": -2.5257697105407715, "step": 10860 }, { "epoch": 0.43, "learning_rate": 3.4784341670930067e-06, "logits/chosen": -3.0105042457580566, "logits/rejected": -3.043752431869507, "logps/chosen": -0.1661367118358612, "logps/rejected": -292.32183837890625, "loss": 0.0548, "rewards/accuracies": 1.0, "rewards/chosen": 0.3148072361946106, "rewards/margins": 2.8819961547851562, "rewards/rejected": -2.5671889781951904, "step": 10870 }, { "epoch": 0.44, "learning_rate": 3.4752209992593495e-06, "logits/chosen": -3.009399652481079, "logits/rejected": -3.0422511100769043, "logps/chosen": -3.1238551139831543, "logps/rejected": -286.77490234375, "loss": 0.0837, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28606724739074707, "rewards/margins": 2.803361415863037, "rewards/rejected": -2.517294406890869, "step": 10880 }, { "epoch": 0.44, "learning_rate": 3.4720059301824527e-06, "logits/chosen": -3.0277581214904785, "logits/rejected": -3.057008981704712, "logps/chosen": -8.409955024719238, "logps/rejected": -281.18817138671875, "loss": 0.1233, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.23194298148155212, "rewards/margins": 2.6889686584472656, "rewards/rejected": -2.4570255279541016, "step": 10890 }, { "epoch": 0.44, "learning_rate": 3.4687889661302577e-06, "logits/chosen": -3.0364184379577637, "logits/rejected": -3.0668511390686035, "logps/chosen": -4.054972171783447, "logps/rejected": -284.2544860839844, "loss": 0.0875, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27614086866378784, "rewards/margins": 2.762629508972168, "rewards/rejected": -2.4864885807037354, "step": 10900 }, { "epoch": 0.44, "eval_logits/chosen": -3.0746726989746094, "eval_logits/rejected": -3.1026134490966797, "eval_logps/chosen": -0.1782694160938263, "eval_logps/rejected": -284.90277099609375, "eval_loss": 0.059764157980680466, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3148168623447418, "eval_rewards/margins": 2.8036885261535645, "eval_rewards/rejected": -2.4888718128204346, "eval_runtime": 2.5318, "eval_samples_per_second": 1.975, "eval_steps_per_second": 0.395, "step": 10900 }, { "epoch": 0.44, "learning_rate": 3.4655701133744002e-06, "logits/chosen": -3.022181749343872, "logits/rejected": -3.0513808727264404, "logps/chosen": -7.350113868713379, "logps/rejected": -283.4214172363281, "loss": 0.1267, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24311070144176483, "rewards/margins": 2.724165916442871, "rewards/rejected": -2.48105525970459, "step": 10910 }, { "epoch": 0.44, "learning_rate": 3.462349378190199e-06, "logits/chosen": -3.01334547996521, "logits/rejected": -3.044795513153076, "logps/chosen": -2.1014480590820312, "logps/rejected": -290.138427734375, "loss": 0.0605, "rewards/accuracies": 1.0, "rewards/chosen": 0.2956227660179138, "rewards/margins": 2.8423290252685547, "rewards/rejected": -2.546706199645996, "step": 10920 }, { "epoch": 0.44, "learning_rate": 3.4591267668566412e-06, "logits/chosen": -3.020385265350342, "logits/rejected": -3.0542593002319336, "logps/chosen": -0.21231015026569366, "logps/rejected": -291.6000061035156, "loss": 0.0554, "rewards/accuracies": 1.0, "rewards/chosen": 0.3153302073478699, "rewards/margins": 2.870304822921753, "rewards/rejected": -2.554974317550659, "step": 10930 }, { "epoch": 0.44, "learning_rate": 3.455902285656373e-06, "logits/chosen": -3.0741982460021973, "logits/rejected": -3.102668285369873, "logps/chosen": -0.1856914609670639, "logps/rejected": -287.8809509277344, "loss": 0.0576, "rewards/accuracies": 1.0, "rewards/chosen": 0.3165756165981293, "rewards/margins": 2.837599992752075, "rewards/rejected": -2.521024227142334, "step": 10940 }, { "epoch": 0.44, "learning_rate": 3.452675940875686e-06, "logits/chosen": -3.0335278511047363, "logits/rejected": -3.063797950744629, "logps/chosen": -2.6570096015930176, "logps/rejected": -289.0628662109375, "loss": 0.0803, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2914624810218811, "rewards/margins": 2.825956344604492, "rewards/rejected": -2.534493923187256, "step": 10950 }, { "epoch": 0.44, "learning_rate": 3.4494477388045035e-06, "logits/chosen": -3.0164337158203125, "logits/rejected": -3.0470244884490967, "logps/chosen": -0.2161095142364502, "logps/rejected": -290.0185852050781, "loss": 0.0563, "rewards/accuracies": 1.0, "rewards/chosen": 0.31355008482933044, "rewards/margins": 2.857205390930176, "rewards/rejected": -2.5436553955078125, "step": 10960 }, { "epoch": 0.44, "learning_rate": 3.4462176857363704e-06, "logits/chosen": -3.007636785507202, "logits/rejected": -3.040869951248169, "logps/chosen": -0.19804641604423523, "logps/rejected": -290.2425231933594, "loss": 0.0561, "rewards/accuracies": 1.0, "rewards/chosen": 0.31467491388320923, "rewards/margins": 2.860837459564209, "rewards/rejected": -2.5461628437042236, "step": 10970 }, { "epoch": 0.44, "learning_rate": 3.442985787968442e-06, "logits/chosen": -2.994535446166992, "logits/rejected": -3.028352975845337, "logps/chosen": -0.18631917238235474, "logps/rejected": -291.3114929199219, "loss": 0.0556, "rewards/accuracies": 1.0, "rewards/chosen": 0.3159770965576172, "rewards/margins": 2.8676934242248535, "rewards/rejected": -2.5517163276672363, "step": 10980 }, { "epoch": 0.44, "learning_rate": 3.439752051801467e-06, "logits/chosen": -3.0306906700134277, "logits/rejected": -3.0597147941589355, "logps/chosen": -3.8230316638946533, "logps/rejected": -288.9662170410156, "loss": 0.0906, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.278148353099823, "rewards/margins": 2.81341290473938, "rewards/rejected": -2.535264492034912, "step": 10990 }, { "epoch": 0.44, "learning_rate": 3.436516483539781e-06, "logits/chosen": -3.0154671669006348, "logits/rejected": -3.0473685264587402, "logps/chosen": -3.7063117027282715, "logps/rejected": -287.6932067871094, "loss": 0.0902, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27852994203567505, "rewards/margins": 2.803219795227051, "rewards/rejected": -2.5246899127960205, "step": 11000 }, { "epoch": 0.44, "eval_logits/chosen": -3.073409080505371, "eval_logits/rejected": -3.102830410003662, "eval_logps/chosen": -0.15951821208000183, "eval_logps/rejected": -284.93402099609375, "eval_loss": 0.05972428247332573, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3150043487548828, "eval_rewards/margins": 2.8041887283325195, "eval_rewards/rejected": -2.489184617996216, "eval_runtime": 2.5377, "eval_samples_per_second": 1.97, "eval_steps_per_second": 0.394, "step": 11000 }, { "epoch": 0.44, "learning_rate": 3.4332790894912877e-06, "logits/chosen": -3.031093120574951, "logits/rejected": -3.059837579727173, "logps/chosen": -6.025290012359619, "logps/rejected": -282.24176025390625, "loss": 0.1152, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.25695911049842834, "rewards/margins": 2.7246487140655518, "rewards/rejected": -2.4676897525787354, "step": 11010 }, { "epoch": 0.44, "learning_rate": 3.430039875967454e-06, "logits/chosen": -3.036600112915039, "logits/rejected": -3.0660316944122314, "logps/chosen": -0.18946175277233124, "logps/rejected": -289.53350830078125, "loss": 0.0591, "rewards/accuracies": 1.0, "rewards/chosen": 0.31446561217308044, "rewards/margins": 2.851633071899414, "rewards/rejected": -2.5371670722961426, "step": 11020 }, { "epoch": 0.44, "learning_rate": 3.4267988492832913e-06, "logits/chosen": -3.00632905960083, "logits/rejected": -3.0414726734161377, "logps/chosen": -6.843052864074707, "logps/rejected": -281.4859313964844, "loss": 0.1246, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.24943506717681885, "rewards/margins": 2.7101056575775146, "rewards/rejected": -2.4606704711914062, "step": 11030 }, { "epoch": 0.44, "learning_rate": 3.423556015757349e-06, "logits/chosen": -3.0119423866271973, "logits/rejected": -3.0433616638183594, "logps/chosen": -0.21078309416770935, "logps/rejected": -290.42132568359375, "loss": 0.056, "rewards/accuracies": 1.0, "rewards/chosen": 0.31499573588371277, "rewards/margins": 2.861760377883911, "rewards/rejected": -2.546764850616455, "step": 11040 }, { "epoch": 0.44, "learning_rate": 3.4203113817116955e-06, "logits/chosen": -3.014396905899048, "logits/rejected": -3.0508453845977783, "logps/chosen": -0.17261342704296112, "logps/rejected": -291.680908203125, "loss": 0.0551, "rewards/accuracies": 1.0, "rewards/chosen": 0.31556907296180725, "rewards/margins": 2.876316547393799, "rewards/rejected": -2.5607476234436035, "step": 11050 }, { "epoch": 0.44, "learning_rate": 3.417064953471911e-06, "logits/chosen": -2.995543956756592, "logits/rejected": -3.029461622238159, "logps/chosen": -0.2109740674495697, "logps/rejected": -290.75653076171875, "loss": 0.0554, "rewards/accuracies": 1.0, "rewards/chosen": 0.3108169436454773, "rewards/margins": 2.8736910820007324, "rewards/rejected": -2.5628740787506104, "step": 11060 }, { "epoch": 0.44, "learning_rate": 3.4138167373670726e-06, "logits/chosen": -3.0179085731506348, "logits/rejected": -3.0506746768951416, "logps/chosen": -1.1775410175323486, "logps/rejected": -288.61297607421875, "loss": 0.0627, "rewards/accuracies": 1.0, "rewards/chosen": 0.30549782514572144, "rewards/margins": 2.8371200561523438, "rewards/rejected": -2.5316219329833984, "step": 11070 }, { "epoch": 0.44, "learning_rate": 3.410566739729746e-06, "logits/chosen": -3.0148215293884277, "logits/rejected": -3.047804117202759, "logps/chosen": -0.1976940780878067, "logps/rejected": -290.9090270996094, "loss": 0.0557, "rewards/accuracies": 1.0, "rewards/chosen": 0.3150814175605774, "rewards/margins": 2.8675596714019775, "rewards/rejected": -2.552478313446045, "step": 11080 }, { "epoch": 0.44, "learning_rate": 3.407314966895966e-06, "logits/chosen": -3.0407631397247314, "logits/rejected": -3.0714457035064697, "logps/chosen": -0.17573638260364532, "logps/rejected": -291.5199279785156, "loss": 0.0552, "rewards/accuracies": 1.0, "rewards/chosen": 0.3183271586894989, "rewards/margins": 2.8751254081726074, "rewards/rejected": -2.556797981262207, "step": 11090 }, { "epoch": 0.44, "learning_rate": 3.4040614252052305e-06, "logits/chosen": -2.9882702827453613, "logits/rejected": -3.0221023559570312, "logps/chosen": -0.2827785015106201, "logps/rejected": -287.18463134765625, "loss": 0.0619, "rewards/accuracies": 1.0, "rewards/chosen": 0.31477364897727966, "rewards/margins": 2.8306941986083984, "rewards/rejected": -2.515920877456665, "step": 11100 }, { "epoch": 0.44, "eval_logits/chosen": -3.0740020275115967, "eval_logits/rejected": -3.1034419536590576, "eval_logps/chosen": -0.13424880802631378, "eval_logps/rejected": -284.68341064453125, "eval_loss": 0.059975408017635345, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3152570426464081, "eval_rewards/margins": 2.8019354343414307, "eval_rewards/rejected": -2.4866783618927, "eval_runtime": 2.539, "eval_samples_per_second": 1.969, "eval_steps_per_second": 0.394, "step": 11100 }, { "epoch": 0.44, "learning_rate": 3.4008061210004872e-06, "logits/chosen": -2.9986824989318848, "logits/rejected": -3.0337634086608887, "logps/chosen": -0.6783939599990845, "logps/rejected": -286.3694763183594, "loss": 0.0652, "rewards/accuracies": 1.0, "rewards/chosen": 0.31015461683273315, "rewards/margins": 2.817589044570923, "rewards/rejected": -2.507434368133545, "step": 11110 }, { "epoch": 0.44, "learning_rate": 3.3975490606281158e-06, "logits/chosen": -3.0047965049743652, "logits/rejected": -3.036348819732666, "logps/chosen": -6.453631401062012, "logps/rejected": -274.9714050292969, "loss": 0.1295, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.253856897354126, "rewards/margins": 2.6467857360839844, "rewards/rejected": -2.3929295539855957, "step": 11120 }, { "epoch": 0.45, "learning_rate": 3.394290250437924e-06, "logits/chosen": -3.039635181427002, "logits/rejected": -3.0691542625427246, "logps/chosen": -0.1705157607793808, "logps/rejected": -288.6853332519531, "loss": 0.0591, "rewards/accuracies": 1.0, "rewards/chosen": 0.31746283173561096, "rewards/margins": 2.848637104034424, "rewards/rejected": -2.531174659729004, "step": 11130 }, { "epoch": 0.45, "learning_rate": 3.391029696783127e-06, "logits/chosen": -2.982264995574951, "logits/rejected": -3.0113396644592285, "logps/chosen": -3.855074405670166, "logps/rejected": -287.44732666015625, "loss": 0.0922, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27626103162765503, "rewards/margins": 2.799611806869507, "rewards/rejected": -2.523350954055786, "step": 11140 }, { "epoch": 0.45, "learning_rate": 3.387767406020343e-06, "logits/chosen": -3.001302480697632, "logits/rejected": -3.0336928367614746, "logps/chosen": -0.20714060962200165, "logps/rejected": -291.36004638671875, "loss": 0.0554, "rewards/accuracies": 1.0, "rewards/chosen": 0.31248369812965393, "rewards/margins": 2.871558666229248, "rewards/rejected": -2.559074878692627, "step": 11150 }, { "epoch": 0.45, "learning_rate": 3.3845033845095737e-06, "logits/chosen": -3.0327541828155518, "logits/rejected": -3.059852123260498, "logps/chosen": -0.2096884697675705, "logps/rejected": -289.0070495605469, "loss": 0.0569, "rewards/accuracies": 1.0, "rewards/chosen": 0.316195011138916, "rewards/margins": 2.8505139350891113, "rewards/rejected": -2.5343189239501953, "step": 11160 }, { "epoch": 0.45, "learning_rate": 3.3812376386141966e-06, "logits/chosen": -3.0242373943328857, "logits/rejected": -3.054611921310425, "logps/chosen": -0.27567917108535767, "logps/rejected": -287.716796875, "loss": 0.0615, "rewards/accuracies": 1.0, "rewards/chosen": 0.3136114478111267, "rewards/margins": 2.8386611938476562, "rewards/rejected": -2.5250496864318848, "step": 11170 }, { "epoch": 0.45, "learning_rate": 3.3779701747009504e-06, "logits/chosen": -3.0076723098754883, "logits/rejected": -3.038818836212158, "logps/chosen": -3.9977920055389404, "logps/rejected": -284.97601318359375, "loss": 0.0919, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.272957444190979, "rewards/margins": 2.774411916732788, "rewards/rejected": -2.5014541149139404, "step": 11180 }, { "epoch": 0.45, "learning_rate": 3.3747009991399226e-06, "logits/chosen": -3.062875270843506, "logits/rejected": -3.092142105102539, "logps/chosen": -0.3205454349517822, "logps/rejected": -291.9322814941406, "loss": 0.0553, "rewards/accuracies": 1.0, "rewards/chosen": 0.31584516167640686, "rewards/margins": 2.8784162998199463, "rewards/rejected": -2.5625710487365723, "step": 11190 }, { "epoch": 0.45, "learning_rate": 3.3714301183045382e-06, "logits/chosen": -3.039712905883789, "logits/rejected": -3.068547010421753, "logps/chosen": -7.253874778747559, "logps/rejected": -284.1923522949219, "loss": 0.1255, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24244895577430725, "rewards/margins": 2.7324299812316895, "rewards/rejected": -2.489980697631836, "step": 11200 }, { "epoch": 0.45, "eval_logits/chosen": -3.0746049880981445, "eval_logits/rejected": -3.1023612022399902, "eval_logps/chosen": -0.12749890983104706, "eval_logps/rejected": -284.92694091796875, "eval_loss": 0.059802424162626266, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3153245449066162, "eval_rewards/margins": 2.8044381141662598, "eval_rewards/rejected": -2.4891133308410645, "eval_runtime": 2.5404, "eval_samples_per_second": 1.968, "eval_steps_per_second": 0.394, "step": 11200 }, { "epoch": 0.45, "learning_rate": 3.3681575385715475e-06, "logits/chosen": -3.029541015625, "logits/rejected": -3.0607542991638184, "logps/chosen": -2.2014102935791016, "logps/rejected": -290.10980224609375, "loss": 0.0614, "rewards/accuracies": 1.0, "rewards/chosen": 0.2966390550136566, "rewards/margins": 2.8410263061523438, "rewards/rejected": -2.544387102127075, "step": 11210 }, { "epoch": 0.45, "learning_rate": 3.364883266321012e-06, "logits/chosen": -3.026121139526367, "logits/rejected": -3.0559444427490234, "logps/chosen": -6.349219799041748, "logps/rejected": -282.33465576171875, "loss": 0.1188, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.25564926862716675, "rewards/margins": 2.720606565475464, "rewards/rejected": -2.4649577140808105, "step": 11220 }, { "epoch": 0.45, "learning_rate": 3.3616073079362925e-06, "logits/chosen": -3.029383420944214, "logits/rejected": -3.057013988494873, "logps/chosen": -7.423121452331543, "logps/rejected": -280.713134765625, "loss": 0.1318, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24399515986442566, "rewards/margins": 2.695363998413086, "rewards/rejected": -2.451368808746338, "step": 11230 }, { "epoch": 0.45, "learning_rate": 3.3583296698040384e-06, "logits/chosen": -3.0358474254608154, "logits/rejected": -3.0638651847839355, "logps/chosen": -12.242341995239258, "logps/rejected": -277.94622802734375, "loss": 0.163, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.19382353127002716, "rewards/margins": 2.6220812797546387, "rewards/rejected": -2.428257465362549, "step": 11240 }, { "epoch": 0.45, "learning_rate": 3.3550503583141726e-06, "logits/chosen": -3.055063486099243, "logits/rejected": -3.0842278003692627, "logps/chosen": -1.0170154571533203, "logps/rejected": -289.669189453125, "loss": 0.0569, "rewards/accuracies": 1.0, "rewards/chosen": 0.30601876974105835, "rewards/margins": 2.8520801067352295, "rewards/rejected": -2.5460615158081055, "step": 11250 }, { "epoch": 0.45, "learning_rate": 3.35176937985988e-06, "logits/chosen": -3.0223395824432373, "logits/rejected": -3.052320957183838, "logps/chosen": -3.4302585124969482, "logps/rejected": -287.0420227050781, "loss": 0.0881, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2826538383960724, "rewards/margins": 2.8002724647521973, "rewards/rejected": -2.5176186561584473, "step": 11260 }, { "epoch": 0.45, "learning_rate": 3.3484867408375953e-06, "logits/chosen": -3.0284173488616943, "logits/rejected": -3.058715343475342, "logps/chosen": -1.1961935758590698, "logps/rejected": -285.80987548828125, "loss": 0.0701, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.3059315085411072, "rewards/margins": 2.808253049850464, "rewards/rejected": -2.502321481704712, "step": 11270 }, { "epoch": 0.45, "learning_rate": 3.3452024476469937e-06, "logits/chosen": -3.006213665008545, "logits/rejected": -3.037385940551758, "logps/chosen": -2.94296932220459, "logps/rejected": -287.990966796875, "loss": 0.0833, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28806063532829285, "rewards/margins": 2.810873508453369, "rewards/rejected": -2.522812843322754, "step": 11280 }, { "epoch": 0.45, "learning_rate": 3.341916506690971e-06, "logits/chosen": -3.047236919403076, "logits/rejected": -3.0794239044189453, "logps/chosen": -2.389087677001953, "logps/rejected": -287.79132080078125, "loss": 0.0786, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.29310616850852966, "rewards/margins": 2.8167898654937744, "rewards/rejected": -2.523683786392212, "step": 11290 }, { "epoch": 0.45, "learning_rate": 3.338628924375638e-06, "logits/chosen": -3.0255045890808105, "logits/rejected": -3.0575926303863525, "logps/chosen": -0.216496080160141, "logps/rejected": -289.72833251953125, "loss": 0.0564, "rewards/accuracies": 1.0, "rewards/chosen": 0.3128843903541565, "rewards/margins": 2.8562004566192627, "rewards/rejected": -2.5433156490325928, "step": 11300 }, { "epoch": 0.45, "eval_logits/chosen": -3.0735976696014404, "eval_logits/rejected": -3.1028239727020264, "eval_logps/chosen": -0.11362417787313461, "eval_logps/rejected": -284.56158447265625, "eval_loss": 0.06004374101758003, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31546327471733093, "eval_rewards/margins": 2.8009231090545654, "eval_rewards/rejected": -2.485459804534912, "eval_runtime": 2.5428, "eval_samples_per_second": 1.966, "eval_steps_per_second": 0.393, "step": 11300 }, { "epoch": 0.45, "learning_rate": 3.3353397071103042e-06, "logits/chosen": -3.034010410308838, "logits/rejected": -3.0621609687805176, "logps/chosen": -6.920967102050781, "logps/rejected": -279.99603271484375, "loss": 0.13, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.24636629223823547, "rewards/margins": 2.690990686416626, "rewards/rejected": -2.444624185562134, "step": 11310 }, { "epoch": 0.45, "learning_rate": 3.332048861307467e-06, "logits/chosen": -3.039285182952881, "logits/rejected": -3.071018695831299, "logps/chosen": -0.1468442678451538, "logps/rejected": -287.1830749511719, "loss": 0.0582, "rewards/accuracies": 1.0, "rewards/chosen": 0.3168349862098694, "rewards/margins": 2.8347582817077637, "rewards/rejected": -2.517923355102539, "step": 11320 }, { "epoch": 0.45, "learning_rate": 3.3287563933827995e-06, "logits/chosen": -3.0226337909698486, "logits/rejected": -3.0491573810577393, "logps/chosen": -3.8490188121795654, "logps/rejected": -288.05438232421875, "loss": 0.0909, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27878883481025696, "rewards/margins": 2.802449941635132, "rewards/rejected": -2.5236611366271973, "step": 11330 }, { "epoch": 0.45, "learning_rate": 3.3254623097551343e-06, "logits/chosen": -3.018528938293457, "logits/rejected": -3.047792434692383, "logps/chosen": -0.7344974875450134, "logps/rejected": -288.7174987792969, "loss": 0.0592, "rewards/accuracies": 1.0, "rewards/chosen": 0.30991190671920776, "rewards/margins": 2.8385283946990967, "rewards/rejected": -2.5286171436309814, "step": 11340 }, { "epoch": 0.45, "learning_rate": 3.3221666168464584e-06, "logits/chosen": -3.0283827781677246, "logits/rejected": -3.054654598236084, "logps/chosen": -7.220911979675293, "logps/rejected": -279.46185302734375, "loss": 0.1282, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24442636966705322, "rewards/margins": 2.6854143142700195, "rewards/rejected": -2.4409878253936768, "step": 11350 }, { "epoch": 0.45, "learning_rate": 3.3188693210818925e-06, "logits/chosen": -3.0228641033172607, "logits/rejected": -3.0543417930603027, "logps/chosen": -0.20556649565696716, "logps/rejected": -290.4394836425781, "loss": 0.0557, "rewards/accuracies": 1.0, "rewards/chosen": 0.3152623474597931, "rewards/margins": 2.867230176925659, "rewards/rejected": -2.5519676208496094, "step": 11360 }, { "epoch": 0.45, "learning_rate": 3.315570428889684e-06, "logits/chosen": -3.0424137115478516, "logits/rejected": -3.070258855819702, "logps/chosen": -0.35057568550109863, "logps/rejected": -287.68157958984375, "loss": 0.0589, "rewards/accuracies": 1.0, "rewards/chosen": 0.31291699409484863, "rewards/margins": 2.833805561065674, "rewards/rejected": -2.520888566970825, "step": 11370 }, { "epoch": 0.46, "learning_rate": 3.3122699467011913e-06, "logits/chosen": -3.0364410877227783, "logits/rejected": -3.0624115467071533, "logps/chosen": -9.07752799987793, "logps/rejected": -280.6418151855469, "loss": 0.1443, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.2281256914138794, "rewards/margins": 2.6813321113586426, "rewards/rejected": -2.4532065391540527, "step": 11380 }, { "epoch": 0.46, "learning_rate": 3.308967880950874e-06, "logits/chosen": -3.0327236652374268, "logits/rejected": -3.0613198280334473, "logps/chosen": -0.3225216567516327, "logps/rejected": -290.8398132324219, "loss": 0.056, "rewards/accuracies": 1.0, "rewards/chosen": 0.3128299117088318, "rewards/margins": 2.866520643234253, "rewards/rejected": -2.5536906719207764, "step": 11390 }, { "epoch": 0.46, "learning_rate": 3.3056642380762783e-06, "logits/chosen": -3.0227253437042236, "logits/rejected": -3.0537221431732178, "logps/chosen": -0.22363857924938202, "logps/rejected": -291.2665710449219, "loss": 0.0554, "rewards/accuracies": 1.0, "rewards/chosen": 0.3152697682380676, "rewards/margins": 2.8731985092163086, "rewards/rejected": -2.5579288005828857, "step": 11400 }, { "epoch": 0.46, "eval_logits/chosen": -3.072228193283081, "eval_logits/rejected": -3.0997354984283447, "eval_logps/chosen": -0.14960050582885742, "eval_logps/rejected": -284.8240051269531, "eval_loss": 0.059888459742069244, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31510353088378906, "eval_rewards/margins": 2.803187847137451, "eval_rewards/rejected": -2.488084316253662, "eval_runtime": 2.5344, "eval_samples_per_second": 1.973, "eval_steps_per_second": 0.395, "step": 11400 }, { "epoch": 0.46, "learning_rate": 3.3023590245180237e-06, "logits/chosen": -3.026979923248291, "logits/rejected": -3.0569822788238525, "logps/chosen": -0.16776932775974274, "logps/rejected": -289.9206848144531, "loss": 0.0563, "rewards/accuracies": 1.0, "rewards/chosen": 0.3154517412185669, "rewards/margins": 2.858287811279297, "rewards/rejected": -2.5428359508514404, "step": 11410 }, { "epoch": 0.46, "learning_rate": 3.299052246719795e-06, "logits/chosen": -2.9869046211242676, "logits/rejected": -3.0190436840057373, "logps/chosen": -0.31483447551727295, "logps/rejected": -287.74554443359375, "loss": 0.0588, "rewards/accuracies": 1.0, "rewards/chosen": 0.3141240179538727, "rewards/margins": 2.8344528675079346, "rewards/rejected": -2.520328998565674, "step": 11420 }, { "epoch": 0.46, "learning_rate": 3.295743911128324e-06, "logits/chosen": -3.043656826019287, "logits/rejected": -3.0720956325531006, "logps/chosen": -3.250737428665161, "logps/rejected": -289.27996826171875, "loss": 0.0856, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28717365860939026, "rewards/margins": 2.820925235748291, "rewards/rejected": -2.5337517261505127, "step": 11430 }, { "epoch": 0.46, "learning_rate": 3.29243402419338e-06, "logits/chosen": -3.014308214187622, "logits/rejected": -3.042600154876709, "logps/chosen": -0.20152434706687927, "logps/rejected": -292.3260192871094, "loss": 0.0545, "rewards/accuracies": 1.0, "rewards/chosen": 0.31571996212005615, "rewards/margins": 2.886486768722534, "rewards/rejected": -2.5707669258117676, "step": 11440 }, { "epoch": 0.46, "learning_rate": 3.2891225923677565e-06, "logits/chosen": -3.0321967601776123, "logits/rejected": -3.062455177307129, "logps/chosen": -0.225734144449234, "logps/rejected": -291.3989562988281, "loss": 0.0554, "rewards/accuracies": 1.0, "rewards/chosen": 0.31544771790504456, "rewards/margins": 2.8720245361328125, "rewards/rejected": -2.5565762519836426, "step": 11450 }, { "epoch": 0.46, "learning_rate": 3.2858096221072605e-06, "logits/chosen": -3.0325112342834473, "logits/rejected": -3.0641462802886963, "logps/chosen": -4.308900833129883, "logps/rejected": -283.25787353515625, "loss": 0.0839, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27265554666519165, "rewards/margins": 2.752744436264038, "rewards/rejected": -2.480088949203491, "step": 11460 }, { "epoch": 0.46, "learning_rate": 3.2824951198706958e-06, "logits/chosen": -3.0081348419189453, "logits/rejected": -3.0377771854400635, "logps/chosen": -3.824134349822998, "logps/rejected": -288.0374450683594, "loss": 0.0915, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27922379970550537, "rewards/margins": 2.80488920211792, "rewards/rejected": -2.525665760040283, "step": 11470 }, { "epoch": 0.46, "learning_rate": 3.2791790921198546e-06, "logits/chosen": -3.039952278137207, "logits/rejected": -3.065901279449463, "logps/chosen": -13.549324035644531, "logps/rejected": -273.07977294921875, "loss": 0.1829, "rewards/accuracies": 0.9375, "rewards/chosen": 0.18121695518493652, "rewards/margins": 2.562190532684326, "rewards/rejected": -2.3809738159179688, "step": 11480 }, { "epoch": 0.46, "learning_rate": 3.275861545319504e-06, "logits/chosen": -3.0306763648986816, "logits/rejected": -3.0589756965637207, "logps/chosen": -4.933638572692871, "logps/rejected": -285.621826171875, "loss": 0.0941, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.26684778928756714, "rewards/margins": 2.7688746452331543, "rewards/rejected": -2.5020267963409424, "step": 11490 }, { "epoch": 0.46, "learning_rate": 3.272542485937369e-06, "logits/chosen": -3.022120237350464, "logits/rejected": -3.0547587871551514, "logps/chosen": -0.19600814580917358, "logps/rejected": -293.09710693359375, "loss": 0.0543, "rewards/accuracies": 1.0, "rewards/chosen": 0.3177993893623352, "rewards/margins": 2.8880386352539062, "rewards/rejected": -2.5702390670776367, "step": 11500 }, { "epoch": 0.46, "eval_logits/chosen": -3.0739831924438477, "eval_logits/rejected": -3.102828025817871, "eval_logps/chosen": -0.136082261800766, "eval_logps/rejected": -284.1210021972656, "eval_loss": 0.06037868186831474, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31523871421813965, "eval_rewards/margins": 2.796292781829834, "eval_rewards/rejected": -2.4810540676116943, "eval_runtime": 2.5429, "eval_samples_per_second": 1.966, "eval_steps_per_second": 0.393, "step": 11500 }, { "epoch": 0.46, "learning_rate": 3.269221920444127e-06, "logits/chosen": -3.0334362983703613, "logits/rejected": -3.06463623046875, "logps/chosen": -0.19515326619148254, "logps/rejected": -292.1339111328125, "loss": 0.0547, "rewards/accuracies": 1.0, "rewards/chosen": 0.3158779740333557, "rewards/margins": 2.8834729194641113, "rewards/rejected": -2.5675950050354004, "step": 11510 }, { "epoch": 0.46, "learning_rate": 3.26589985531339e-06, "logits/chosen": -3.023390293121338, "logits/rejected": -3.055325984954834, "logps/chosen": -0.1894482672214508, "logps/rejected": -289.8715515136719, "loss": 0.0562, "rewards/accuracies": 1.0, "rewards/chosen": 0.313986599445343, "rewards/margins": 2.858468770980835, "rewards/rejected": -2.5444817543029785, "step": 11520 }, { "epoch": 0.46, "learning_rate": 3.2625762970216944e-06, "logits/chosen": -3.0470967292785645, "logits/rejected": -3.0748908519744873, "logps/chosen": -2.5210907459259033, "logps/rejected": -284.75018310546875, "loss": 0.0826, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.28974252939224243, "rewards/margins": 2.7841649055480957, "rewards/rejected": -2.494422435760498, "step": 11530 }, { "epoch": 0.46, "learning_rate": 3.259251252048486e-06, "logits/chosen": -3.010385513305664, "logits/rejected": -3.041598081588745, "logps/chosen": -0.2234264314174652, "logps/rejected": -286.5186462402344, "loss": 0.0629, "rewards/accuracies": 1.0, "rewards/chosen": 0.3164612948894501, "rewards/margins": 2.8241896629333496, "rewards/rejected": -2.507728338241577, "step": 11540 }, { "epoch": 0.46, "learning_rate": 3.2559247268761117e-06, "logits/chosen": -3.0329082012176514, "logits/rejected": -3.0628931522369385, "logps/chosen": -0.19933222234249115, "logps/rejected": -289.4264221191406, "loss": 0.0584, "rewards/accuracies": 1.0, "rewards/chosen": 0.31515055894851685, "rewards/margins": 2.852552890777588, "rewards/rejected": -2.5374021530151367, "step": 11550 }, { "epoch": 0.46, "learning_rate": 3.2525967279898017e-06, "logits/chosen": -3.0262725353240967, "logits/rejected": -3.05673885345459, "logps/chosen": -0.21500329673290253, "logps/rejected": -289.3778381347656, "loss": 0.0567, "rewards/accuracies": 1.0, "rewards/chosen": 0.31429409980773926, "rewards/margins": 2.8528335094451904, "rewards/rejected": -2.5385396480560303, "step": 11560 }, { "epoch": 0.46, "learning_rate": 3.24926726187766e-06, "logits/chosen": -3.0211710929870605, "logits/rejected": -3.0513129234313965, "logps/chosen": -0.2081928700208664, "logps/rejected": -291.1797790527344, "loss": 0.0555, "rewards/accuracies": 1.0, "rewards/chosen": 0.3136049211025238, "rewards/margins": 2.869919538497925, "rewards/rejected": -2.556314468383789, "step": 11570 }, { "epoch": 0.46, "learning_rate": 3.245936335030651e-06, "logits/chosen": -3.0448696613311768, "logits/rejected": -3.0758023262023926, "logps/chosen": -0.2510937452316284, "logps/rejected": -291.8086242675781, "loss": 0.0551, "rewards/accuracies": 1.0, "rewards/chosen": 0.3138841390609741, "rewards/margins": 2.8776824474334717, "rewards/rejected": -2.563798427581787, "step": 11580 }, { "epoch": 0.46, "learning_rate": 3.2426039539425875e-06, "logits/chosen": -3.0356357097625732, "logits/rejected": -3.0636649131774902, "logps/chosen": -4.909475803375244, "logps/rejected": -283.94403076171875, "loss": 0.103, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2691786289215088, "rewards/margins": 2.7508482933044434, "rewards/rejected": -2.4816699028015137, "step": 11590 }, { "epoch": 0.46, "learning_rate": 3.2392701251101172e-06, "logits/chosen": -2.9914207458496094, "logits/rejected": -3.02219820022583, "logps/chosen": -0.23224100470542908, "logps/rejected": -289.1014099121094, "loss": 0.0568, "rewards/accuracies": 1.0, "rewards/chosen": 0.312468945980072, "rewards/margins": 2.8507120609283447, "rewards/rejected": -2.538243532180786, "step": 11600 }, { "epoch": 0.46, "eval_logits/chosen": -3.076162099838257, "eval_logits/rejected": -3.1031477451324463, "eval_logps/chosen": -0.18919691443443298, "eval_logps/rejected": -285.038818359375, "eval_loss": 0.059760719537734985, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31470757722854614, "eval_rewards/margins": 2.8049397468566895, "eval_rewards/rejected": -2.490232467651367, "eval_runtime": 2.5308, "eval_samples_per_second": 1.976, "eval_steps_per_second": 0.395, "step": 11600 }, { "epoch": 0.46, "learning_rate": 3.235934855032709e-06, "logits/chosen": -3.014233112335205, "logits/rejected": -3.0435471534729004, "logps/chosen": -3.9775843620300293, "logps/rejected": -286.94305419921875, "loss": 0.0925, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27774134278297424, "rewards/margins": 2.7920761108398438, "rewards/rejected": -2.514334201812744, "step": 11610 }, { "epoch": 0.46, "learning_rate": 3.2325981502126434e-06, "logits/chosen": -3.002807140350342, "logits/rejected": -3.0311179161071777, "logps/chosen": -9.781892776489258, "logps/rejected": -280.14013671875, "loss": 0.1514, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.21942615509033203, "rewards/margins": 2.6699249744415283, "rewards/rejected": -2.4504988193511963, "step": 11620 }, { "epoch": 0.47, "learning_rate": 3.2292600171549976e-06, "logits/chosen": -3.0277488231658936, "logits/rejected": -3.058452844619751, "logps/chosen": -3.3329415321350098, "logps/rejected": -289.04425048828125, "loss": 0.0781, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2840758264064789, "rewards/margins": 2.81640625, "rewards/rejected": -2.53233003616333, "step": 11630 }, { "epoch": 0.47, "learning_rate": 3.225920462367632e-06, "logits/chosen": -3.0099425315856934, "logits/rejected": -3.0378122329711914, "logps/chosen": -6.3656907081604, "logps/rejected": -281.6272888183594, "loss": 0.1146, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2537187933921814, "rewards/margins": 2.714641571044922, "rewards/rejected": -2.4609227180480957, "step": 11640 }, { "epoch": 0.47, "learning_rate": 3.222579492361179e-06, "logits/chosen": -3.017521858215332, "logits/rejected": -3.0473549365997314, "logps/chosen": -0.21333524584770203, "logps/rejected": -291.1153259277344, "loss": 0.0554, "rewards/accuracies": 1.0, "rewards/chosen": 0.31373605132102966, "rewards/margins": 2.8734753131866455, "rewards/rejected": -2.559739112854004, "step": 11650 }, { "epoch": 0.47, "learning_rate": 3.2192371136490325e-06, "logits/chosen": -3.0375499725341797, "logits/rejected": -3.0654232501983643, "logps/chosen": -0.16982993483543396, "logps/rejected": -291.1163024902344, "loss": 0.0556, "rewards/accuracies": 1.0, "rewards/chosen": 0.3147880434989929, "rewards/margins": 2.8690857887268066, "rewards/rejected": -2.554297924041748, "step": 11660 }, { "epoch": 0.47, "learning_rate": 3.2158933327473286e-06, "logits/chosen": -2.9866504669189453, "logits/rejected": -3.019932508468628, "logps/chosen": -0.1913146674633026, "logps/rejected": -291.17999267578125, "loss": 0.0553, "rewards/accuracies": 1.0, "rewards/chosen": 0.31269100308418274, "rewards/margins": 2.872335910797119, "rewards/rejected": -2.559645175933838, "step": 11670 }, { "epoch": 0.47, "learning_rate": 3.2125481561749406e-06, "logits/chosen": -3.0069453716278076, "logits/rejected": -3.0376434326171875, "logps/chosen": -0.21584686636924744, "logps/rejected": -291.65374755859375, "loss": 0.0553, "rewards/accuracies": 1.0, "rewards/chosen": 0.31403541564941406, "rewards/margins": 2.873595714569092, "rewards/rejected": -2.5595602989196777, "step": 11680 }, { "epoch": 0.47, "learning_rate": 3.2092015904534614e-06, "logits/chosen": -3.015669822692871, "logits/rejected": -3.04537296295166, "logps/chosen": -0.19910436868667603, "logps/rejected": -289.6183166503906, "loss": 0.0565, "rewards/accuracies": 1.0, "rewards/chosen": 0.31561368703842163, "rewards/margins": 2.855480670928955, "rewards/rejected": -2.5398669242858887, "step": 11690 }, { "epoch": 0.47, "learning_rate": 3.205853642107192e-06, "logits/chosen": -2.9911255836486816, "logits/rejected": -3.0217320919036865, "logps/chosen": -0.18146055936813354, "logps/rejected": -290.02020263671875, "loss": 0.0562, "rewards/accuracies": 1.0, "rewards/chosen": 0.31402844190597534, "rewards/margins": 2.8611388206481934, "rewards/rejected": -2.5471103191375732, "step": 11700 }, { "epoch": 0.47, "eval_logits/chosen": -3.073298692703247, "eval_logits/rejected": -3.100130796432495, "eval_logps/chosen": -0.17550702393054962, "eval_logps/rejected": -284.4625244140625, "eval_loss": 0.06022083759307861, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31484442949295044, "eval_rewards/margins": 2.799314022064209, "eval_rewards/rejected": -2.484469175338745, "eval_runtime": 2.533, "eval_samples_per_second": 1.974, "eval_steps_per_second": 0.395, "step": 11700 }, { "epoch": 0.47, "learning_rate": 3.2025043176631283e-06, "logits/chosen": -3.0496985912323, "logits/rejected": -3.0752358436584473, "logps/chosen": -11.798986434936523, "logps/rejected": -281.5522155761719, "loss": 0.1554, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.1963755339384079, "rewards/margins": 2.659810781478882, "rewards/rejected": -2.463435411453247, "step": 11710 }, { "epoch": 0.47, "learning_rate": 3.19915362365095e-06, "logits/chosen": -3.0164847373962402, "logits/rejected": -3.0456900596618652, "logps/chosen": -6.730201721191406, "logps/rejected": -279.3406982421875, "loss": 0.1167, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2502429485321045, "rewards/margins": 2.689094066619873, "rewards/rejected": -2.4388511180877686, "step": 11720 }, { "epoch": 0.47, "learning_rate": 3.1958015666030073e-06, "logits/chosen": -3.0093095302581787, "logits/rejected": -3.042585611343384, "logps/chosen": -0.3451077938079834, "logps/rejected": -290.71661376953125, "loss": 0.0562, "rewards/accuracies": 1.0, "rewards/chosen": 0.31253814697265625, "rewards/margins": 2.8624255657196045, "rewards/rejected": -2.5498876571655273, "step": 11730 }, { "epoch": 0.47, "learning_rate": 3.192448153054306e-06, "logits/chosen": -3.0012660026550293, "logits/rejected": -3.033997058868408, "logps/chosen": -0.18152721226215363, "logps/rejected": -291.81951904296875, "loss": 0.0552, "rewards/accuracies": 1.0, "rewards/chosen": 0.31595760583877563, "rewards/margins": 2.875988483428955, "rewards/rejected": -2.560030937194824, "step": 11740 }, { "epoch": 0.47, "learning_rate": 3.189093389542498e-06, "logits/chosen": -3.0230283737182617, "logits/rejected": -3.052605628967285, "logps/chosen": -0.1551436483860016, "logps/rejected": -289.3438415527344, "loss": 0.0568, "rewards/accuracies": 1.0, "rewards/chosen": 0.3135725259780884, "rewards/margins": 2.851120710372925, "rewards/rejected": -2.537548542022705, "step": 11750 }, { "epoch": 0.47, "learning_rate": 3.185737282607867e-06, "logits/chosen": -3.0249295234680176, "logits/rejected": -3.0559799671173096, "logps/chosen": -0.35653743147850037, "logps/rejected": -290.57220458984375, "loss": 0.057, "rewards/accuracies": 1.0, "rewards/chosen": 0.3151831328868866, "rewards/margins": 2.8649728298187256, "rewards/rejected": -2.5497899055480957, "step": 11760 }, { "epoch": 0.47, "learning_rate": 3.1823798387933134e-06, "logits/chosen": -3.049769163131714, "logits/rejected": -3.0780651569366455, "logps/chosen": -0.23368656635284424, "logps/rejected": -293.00445556640625, "loss": 0.0543, "rewards/accuracies": 1.0, "rewards/chosen": 0.3185974657535553, "rewards/margins": 2.88887357711792, "rewards/rejected": -2.5702760219573975, "step": 11770 }, { "epoch": 0.47, "learning_rate": 3.179021064644347e-06, "logits/chosen": -3.0271098613739014, "logits/rejected": -3.055205821990967, "logps/chosen": -3.7571308612823486, "logps/rejected": -287.71307373046875, "loss": 0.0909, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27841150760650635, "rewards/margins": 2.803370952606201, "rewards/rejected": -2.5249593257904053, "step": 11780 }, { "epoch": 0.47, "learning_rate": 3.17566096670907e-06, "logits/chosen": -2.9889655113220215, "logits/rejected": -3.0198299884796143, "logps/chosen": -0.2000828981399536, "logps/rejected": -289.8544006347656, "loss": 0.0563, "rewards/accuracies": 1.0, "rewards/chosen": 0.3142545223236084, "rewards/margins": 2.857910633087158, "rewards/rejected": -2.54365611076355, "step": 11790 }, { "epoch": 0.47, "learning_rate": 3.1722995515381644e-06, "logits/chosen": -3.024280548095703, "logits/rejected": -3.054004669189453, "logps/chosen": -0.20770792663097382, "logps/rejected": -290.34796142578125, "loss": 0.056, "rewards/accuracies": 1.0, "rewards/chosen": 0.3147450387477875, "rewards/margins": 2.863762378692627, "rewards/rejected": -2.5490174293518066, "step": 11800 }, { "epoch": 0.47, "eval_logits/chosen": -3.073770523071289, "eval_logits/rejected": -3.0999720096588135, "eval_logps/chosen": -0.2018321454524994, "eval_logps/rejected": -284.83892822265625, "eval_loss": 0.059977274388074875, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3145812153816223, "eval_rewards/margins": 2.8028149604797363, "eval_rewards/rejected": -2.4882335662841797, "eval_runtime": 2.5378, "eval_samples_per_second": 1.97, "eval_steps_per_second": 0.394, "step": 11800 }, { "epoch": 0.47, "learning_rate": 3.168936825684882e-06, "logits/chosen": -3.027028799057007, "logits/rejected": -3.055116653442383, "logps/chosen": -5.222654342651367, "logps/rejected": -286.5923767089844, "loss": 0.0891, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.26102370023727417, "rewards/margins": 2.7797420024871826, "rewards/rejected": -2.5187182426452637, "step": 11810 }, { "epoch": 0.47, "learning_rate": 3.1655727957050286e-06, "logits/chosen": -3.055684804916382, "logits/rejected": -3.0836498737335205, "logps/chosen": -2.60011625289917, "logps/rejected": -289.65814208984375, "loss": 0.0637, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28961291909217834, "rewards/margins": 2.8379359245300293, "rewards/rejected": -2.548323154449463, "step": 11820 }, { "epoch": 0.47, "learning_rate": 3.162207468156952e-06, "logits/chosen": -3.012568473815918, "logits/rejected": -3.0417637825012207, "logps/chosen": -0.20593750476837158, "logps/rejected": -290.8602600097656, "loss": 0.0558, "rewards/accuracies": 1.0, "rewards/chosen": 0.3159688115119934, "rewards/margins": 2.8655765056610107, "rewards/rejected": -2.549607753753662, "step": 11830 }, { "epoch": 0.47, "learning_rate": 3.1588408496015323e-06, "logits/chosen": -3.0430264472961426, "logits/rejected": -3.0722126960754395, "logps/chosen": -2.4419684410095215, "logps/rejected": -289.7901306152344, "loss": 0.0624, "rewards/accuracies": 1.0, "rewards/chosen": 0.292267769575119, "rewards/margins": 2.836458683013916, "rewards/rejected": -2.5441908836364746, "step": 11840 }, { "epoch": 0.47, "learning_rate": 3.155472946602162e-06, "logits/chosen": -3.0274155139923096, "logits/rejected": -3.055959701538086, "logps/chosen": -2.022535562515259, "logps/rejected": -290.3167724609375, "loss": 0.0588, "rewards/accuracies": 1.0, "rewards/chosen": 0.2996026575565338, "rewards/margins": 2.8454346656799316, "rewards/rejected": -2.545832395553589, "step": 11850 }, { "epoch": 0.47, "learning_rate": 3.152103765724743e-06, "logits/chosen": -3.0260519981384277, "logits/rejected": -3.054384231567383, "logps/chosen": -7.323905944824219, "logps/rejected": -284.5267333984375, "loss": 0.1265, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24317672848701477, "rewards/margins": 2.7348437309265137, "rewards/rejected": -2.491666793823242, "step": 11860 }, { "epoch": 0.47, "learning_rate": 3.1487333135376635e-06, "logits/chosen": -3.0246710777282715, "logits/rejected": -3.0542185306549072, "logps/chosen": -6.036604404449463, "logps/rejected": -284.85552978515625, "loss": 0.103, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2575143277645111, "rewards/margins": 2.753882884979248, "rewards/rejected": -2.496368646621704, "step": 11870 }, { "epoch": 0.48, "learning_rate": 3.1453615966117945e-06, "logits/chosen": -3.0015480518341064, "logits/rejected": -3.031167507171631, "logps/chosen": -0.2286984622478485, "logps/rejected": -292.32061767578125, "loss": 0.0547, "rewards/accuracies": 1.0, "rewards/chosen": 0.31408801674842834, "rewards/margins": 2.882298231124878, "rewards/rejected": -2.5682101249694824, "step": 11880 }, { "epoch": 0.48, "learning_rate": 3.14198862152047e-06, "logits/chosen": -3.024257183074951, "logits/rejected": -3.0552115440368652, "logps/chosen": -1.421099066734314, "logps/rejected": -292.2080078125, "loss": 0.057, "rewards/accuracies": 1.0, "rewards/chosen": 0.3047340214252472, "rewards/margins": 2.8727245330810547, "rewards/rejected": -2.5679900646209717, "step": 11890 }, { "epoch": 0.48, "learning_rate": 3.1386143948394764e-06, "logits/chosen": -3.0397467613220215, "logits/rejected": -3.069547414779663, "logps/chosen": -0.31514835357666016, "logps/rejected": -291.22265625, "loss": 0.0557, "rewards/accuracies": 1.0, "rewards/chosen": 0.31477090716362, "rewards/margins": 2.8711585998535156, "rewards/rejected": -2.5563879013061523, "step": 11900 }, { "epoch": 0.48, "eval_logits/chosen": -3.071845531463623, "eval_logits/rejected": -3.0985734462738037, "eval_logps/chosen": -0.17533859610557556, "eval_logps/rejected": -284.5624084472656, "eval_loss": 0.06010793522000313, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31484612822532654, "eval_rewards/margins": 2.80031418800354, "eval_rewards/rejected": -2.4854681491851807, "eval_runtime": 2.543, "eval_samples_per_second": 1.966, "eval_steps_per_second": 0.393, "step": 11900 }, { "epoch": 0.48, "learning_rate": 3.135238923147043e-06, "logits/chosen": -3.0319907665252686, "logits/rejected": -3.063153028488159, "logps/chosen": -0.2122180461883545, "logps/rejected": -291.57122802734375, "loss": 0.0552, "rewards/accuracies": 1.0, "rewards/chosen": 0.3162221610546112, "rewards/margins": 2.8759570121765137, "rewards/rejected": -2.55973482131958, "step": 11910 }, { "epoch": 0.48, "learning_rate": 3.1318622130238237e-06, "logits/chosen": -3.003269672393799, "logits/rejected": -3.0342390537261963, "logps/chosen": -2.2999460697174072, "logps/rejected": -290.06756591796875, "loss": 0.0619, "rewards/accuracies": 1.0, "rewards/chosen": 0.29215186834335327, "rewards/margins": 2.8410935401916504, "rewards/rejected": -2.5489418506622314, "step": 11920 }, { "epoch": 0.48, "learning_rate": 3.1284842710528875e-06, "logits/chosen": -3.011226177215576, "logits/rejected": -3.042442798614502, "logps/chosen": -0.17783084511756897, "logps/rejected": -290.1602783203125, "loss": 0.0561, "rewards/accuracies": 1.0, "rewards/chosen": 0.3149893879890442, "rewards/margins": 2.862027883529663, "rewards/rejected": -2.5470385551452637, "step": 11930 }, { "epoch": 0.48, "learning_rate": 3.1251051038197055e-06, "logits/chosen": -3.00681734085083, "logits/rejected": -3.0365874767303467, "logps/chosen": -0.4209270477294922, "logps/rejected": -291.3616027832031, "loss": 0.0554, "rewards/accuracies": 1.0, "rewards/chosen": 0.3117583990097046, "rewards/margins": 2.8725485801696777, "rewards/rejected": -2.560790538787842, "step": 11940 }, { "epoch": 0.48, "learning_rate": 3.121724717912138e-06, "logits/chosen": -3.0398595333099365, "logits/rejected": -3.0669054985046387, "logps/chosen": -10.10751724243164, "logps/rejected": -280.5904235839844, "loss": 0.1496, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.21577386558055878, "rewards/margins": 2.6688437461853027, "rewards/rejected": -2.4530696868896484, "step": 11950 }, { "epoch": 0.48, "learning_rate": 3.118343119920418e-06, "logits/chosen": -3.0325756072998047, "logits/rejected": -3.0611231327056885, "logps/chosen": -0.22382810711860657, "logps/rejected": -292.823486328125, "loss": 0.0543, "rewards/accuracies": 1.0, "rewards/chosen": 0.31536865234375, "rewards/margins": 2.8878731727600098, "rewards/rejected": -2.572504758834839, "step": 11960 }, { "epoch": 0.48, "learning_rate": 3.1149603164371455e-06, "logits/chosen": -3.0332345962524414, "logits/rejected": -3.062079906463623, "logps/chosen": -5.634451389312744, "logps/rejected": -287.0580139160156, "loss": 0.0947, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.261811763048172, "rewards/margins": 2.7766246795654297, "rewards/rejected": -2.51481294631958, "step": 11970 }, { "epoch": 0.48, "learning_rate": 3.1115763140572686e-06, "logits/chosen": -3.0112102031707764, "logits/rejected": -3.0414116382598877, "logps/chosen": -0.21738263964653015, "logps/rejected": -293.73583984375, "loss": 0.0539, "rewards/accuracies": 1.0, "rewards/chosen": 0.3158429265022278, "rewards/margins": 2.8960328102111816, "rewards/rejected": -2.5801894664764404, "step": 11980 }, { "epoch": 0.48, "learning_rate": 3.1081911193780734e-06, "logits/chosen": -3.0239810943603516, "logits/rejected": -3.053553581237793, "logps/chosen": -6.41326904296875, "logps/rejected": -282.62884521484375, "loss": 0.1086, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2538760304450989, "rewards/margins": 2.7265467643737793, "rewards/rejected": -2.4726715087890625, "step": 11990 }, { "epoch": 0.48, "learning_rate": 3.1048047389991693e-06, "logits/chosen": -3.0285122394561768, "logits/rejected": -3.0568172931671143, "logps/chosen": -3.7166945934295654, "logps/rejected": -286.28778076171875, "loss": 0.0913, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27894702553749084, "rewards/margins": 2.786328077316284, "rewards/rejected": -2.507380962371826, "step": 12000 }, { "epoch": 0.48, "eval_logits/chosen": -3.073049306869507, "eval_logits/rejected": -3.099031686782837, "eval_logps/chosen": -0.1494881808757782, "eval_logps/rejected": -284.7489929199219, "eval_loss": 0.059950102120637894, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3151046335697174, "eval_rewards/margins": 2.802438735961914, "eval_rewards/rejected": -2.4873340129852295, "eval_runtime": 2.542, "eval_samples_per_second": 1.967, "eval_steps_per_second": 0.393, "step": 12000 }, { "epoch": 0.48, "learning_rate": 3.1014171795224794e-06, "logits/chosen": -3.013078212738037, "logits/rejected": -3.0405561923980713, "logps/chosen": -9.888763427734375, "logps/rejected": -282.6634216308594, "loss": 0.146, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.21687690913677216, "rewards/margins": 2.6935207843780518, "rewards/rejected": -2.4766438007354736, "step": 12010 }, { "epoch": 0.48, "learning_rate": 3.0980284475522238e-06, "logits/chosen": -3.021641254425049, "logits/rejected": -3.0489883422851562, "logps/chosen": -7.237156867980957, "logps/rejected": -284.61029052734375, "loss": 0.1031, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24443241953849792, "rewards/margins": 2.737574577331543, "rewards/rejected": -2.4931418895721436, "step": 12020 }, { "epoch": 0.48, "learning_rate": 3.094638549694908e-06, "logits/chosen": -3.017669439315796, "logits/rejected": -3.046204090118408, "logps/chosen": -3.290562391281128, "logps/rejected": -287.40020751953125, "loss": 0.0859, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28783878684043884, "rewards/margins": 2.804323196411133, "rewards/rejected": -2.516484498977661, "step": 12030 }, { "epoch": 0.48, "learning_rate": 3.0912474925593124e-06, "logits/chosen": -3.0270895957946777, "logits/rejected": -3.0574874877929688, "logps/chosen": -3.4783260822296143, "logps/rejected": -286.10858154296875, "loss": 0.0872, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28157567977905273, "rewards/margins": 2.7870917320251465, "rewards/rejected": -2.5055160522460938, "step": 12040 }, { "epoch": 0.48, "learning_rate": 3.087855282756475e-06, "logits/chosen": -3.012429714202881, "logits/rejected": -3.040393352508545, "logps/chosen": -0.22478017210960388, "logps/rejected": -292.87750244140625, "loss": 0.0543, "rewards/accuracies": 1.0, "rewards/chosen": 0.3149457275867462, "rewards/margins": 2.8886804580688477, "rewards/rejected": -2.573734760284424, "step": 12050 }, { "epoch": 0.48, "learning_rate": 3.0844619268996845e-06, "logits/chosen": -3.0329689979553223, "logits/rejected": -3.0602054595947266, "logps/chosen": -3.437058210372925, "logps/rejected": -286.70855712890625, "loss": 0.089, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2826361060142517, "rewards/margins": 2.79171085357666, "rewards/rejected": -2.5090749263763428, "step": 12060 }, { "epoch": 0.48, "learning_rate": 3.0810674316044602e-06, "logits/chosen": -3.0269126892089844, "logits/rejected": -3.055361270904541, "logps/chosen": -2.0208611488342285, "logps/rejected": -288.743896484375, "loss": 0.0638, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.297080397605896, "rewards/margins": 2.831084728240967, "rewards/rejected": -2.5340046882629395, "step": 12070 }, { "epoch": 0.48, "learning_rate": 3.0776718034885454e-06, "logits/chosen": -3.004157543182373, "logits/rejected": -3.032280445098877, "logps/chosen": -3.0861411094665527, "logps/rejected": -287.7037048339844, "loss": 0.0843, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2870631515979767, "rewards/margins": 2.809492826461792, "rewards/rejected": -2.522429943084717, "step": 12080 }, { "epoch": 0.48, "learning_rate": 3.074275049171889e-06, "logits/chosen": -3.0236806869506836, "logits/rejected": -3.0503883361816406, "logps/chosen": -0.18012993037700653, "logps/rejected": -289.8265686035156, "loss": 0.0564, "rewards/accuracies": 1.0, "rewards/chosen": 0.31439733505249023, "rewards/margins": 2.8575387001037598, "rewards/rejected": -2.5431416034698486, "step": 12090 }, { "epoch": 0.48, "learning_rate": 3.0708771752766397e-06, "logits/chosen": -3.013725519180298, "logits/rejected": -3.042400360107422, "logps/chosen": -5.988914489746094, "logps/rejected": -283.5355529785156, "loss": 0.1144, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.25813519954681396, "rewards/margins": 2.737657308578491, "rewards/rejected": -2.479522228240967, "step": 12100 }, { "epoch": 0.48, "eval_logits/chosen": -3.0693249702453613, "eval_logits/rejected": -3.0944936275482178, "eval_logps/chosen": -0.15640708804130554, "eval_logps/rejected": -284.5059509277344, "eval_loss": 0.06018466874957085, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31503549218177795, "eval_rewards/margins": 2.799938917160034, "eval_rewards/rejected": -2.4849038124084473, "eval_runtime": 2.5338, "eval_samples_per_second": 1.973, "eval_steps_per_second": 0.395, "step": 12100 }, { "epoch": 0.48, "learning_rate": 3.0674781884271256e-06, "logits/chosen": -3.0158817768096924, "logits/rejected": -3.045732021331787, "logps/chosen": -2.8543765544891357, "logps/rejected": -289.4032287597656, "loss": 0.0814, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2873151898384094, "rewards/margins": 2.8287129402160645, "rewards/rejected": -2.5413978099823, "step": 12110 }, { "epoch": 0.48, "learning_rate": 3.064078095249844e-06, "logits/chosen": -3.0223066806793213, "logits/rejected": -3.0488381385803223, "logps/chosen": -2.709111213684082, "logps/rejected": -289.69512939453125, "loss": 0.0693, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.29057401418685913, "rewards/margins": 2.8329129219055176, "rewards/rejected": -2.5423390865325928, "step": 12120 }, { "epoch": 0.49, "learning_rate": 3.0606769023734535e-06, "logits/chosen": -3.015336513519287, "logits/rejected": -3.0434083938598633, "logps/chosen": -6.834606170654297, "logps/rejected": -284.2818908691406, "loss": 0.1217, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2492276430130005, "rewards/margins": 2.737258195877075, "rewards/rejected": -2.488030195236206, "step": 12130 }, { "epoch": 0.49, "learning_rate": 3.0572746164287513e-06, "logits/chosen": -3.039102792739868, "logits/rejected": -3.0670015811920166, "logps/chosen": -2.2970240116119385, "logps/rejected": -292.7451171875, "loss": 0.0574, "rewards/accuracies": 1.0, "rewards/chosen": 0.2959497570991516, "rewards/margins": 2.8674519062042236, "rewards/rejected": -2.5715019702911377, "step": 12140 }, { "epoch": 0.49, "learning_rate": 3.053871244048669e-06, "logits/chosen": -3.0142135620117188, "logits/rejected": -3.0421481132507324, "logps/chosen": -5.921875, "logps/rejected": -285.45684814453125, "loss": 0.0989, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.25703689455986023, "rewards/margins": 2.760010242462158, "rewards/rejected": -2.5029730796813965, "step": 12150 }, { "epoch": 0.49, "learning_rate": 3.050466791868254e-06, "logits/chosen": -3.0376803874969482, "logits/rejected": -3.0680453777313232, "logps/chosen": -5.244297504425049, "logps/rejected": -287.54541015625, "loss": 0.0938, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.26662391424179077, "rewards/margins": 2.7872776985168457, "rewards/rejected": -2.52065372467041, "step": 12160 }, { "epoch": 0.49, "learning_rate": 3.047061266524662e-06, "logits/chosen": -3.0196797847747803, "logits/rejected": -3.0513904094696045, "logps/chosen": -0.13604195415973663, "logps/rejected": -292.9606628417969, "loss": 0.0543, "rewards/accuracies": 1.0, "rewards/chosen": 0.31616753339767456, "rewards/margins": 2.8897147178649902, "rewards/rejected": -2.573547124862671, "step": 12170 }, { "epoch": 0.49, "learning_rate": 3.0436546746571374e-06, "logits/chosen": -3.028926372528076, "logits/rejected": -3.05755615234375, "logps/chosen": -1.414974331855774, "logps/rejected": -287.766357421875, "loss": 0.0676, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.3012292981147766, "rewards/margins": 2.8282880783081055, "rewards/rejected": -2.5270590782165527, "step": 12180 }, { "epoch": 0.49, "learning_rate": 3.0402470229070057e-06, "logits/chosen": -3.0204200744628906, "logits/rejected": -3.051314115524292, "logps/chosen": -0.17587456107139587, "logps/rejected": -292.0237731933594, "loss": 0.0548, "rewards/accuracies": 1.0, "rewards/chosen": 0.31798702478408813, "rewards/margins": 2.8803234100341797, "rewards/rejected": -2.5623364448547363, "step": 12190 }, { "epoch": 0.49, "learning_rate": 3.0368383179176584e-06, "logits/chosen": -3.0195956230163574, "logits/rejected": -3.050856113433838, "logps/chosen": -3.7253470420837402, "logps/rejected": -288.04119873046875, "loss": 0.0911, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2782062292098999, "rewards/margins": 2.8037474155426025, "rewards/rejected": -2.525541067123413, "step": 12200 }, { "epoch": 0.49, "eval_logits/chosen": -3.071559190750122, "eval_logits/rejected": -3.099240303039551, "eval_logps/chosen": -0.14721126854419708, "eval_logps/rejected": -284.34661865234375, "eval_loss": 0.060278140008449554, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.315127432346344, "eval_rewards/margins": 2.7984375953674316, "eval_rewards/rejected": -2.4833102226257324, "eval_runtime": 2.544, "eval_samples_per_second": 1.965, "eval_steps_per_second": 0.393, "step": 12200 }, { "epoch": 0.49, "learning_rate": 3.0334285663345404e-06, "logits/chosen": -3.030733585357666, "logits/rejected": -3.058483839035034, "logps/chosen": -6.384877681732178, "logps/rejected": -282.15155029296875, "loss": 0.1193, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2508518099784851, "rewards/margins": 2.722104787826538, "rewards/rejected": -2.471252918243408, "step": 12210 }, { "epoch": 0.49, "learning_rate": 3.0300177748051375e-06, "logits/chosen": -3.0311832427978516, "logits/rejected": -3.0573413372039795, "logps/chosen": -3.831557035446167, "logps/rejected": -286.45208740234375, "loss": 0.093, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2780613899230957, "rewards/margins": 2.7842109203338623, "rewards/rejected": -2.5061497688293457, "step": 12220 }, { "epoch": 0.49, "learning_rate": 3.0266059499789603e-06, "logits/chosen": -3.045969247817993, "logits/rejected": -3.072279453277588, "logps/chosen": -6.140406131744385, "logps/rejected": -283.9040832519531, "loss": 0.1154, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2561575472354889, "rewards/margins": 2.7426376342773438, "rewards/rejected": -2.486480236053467, "step": 12230 }, { "epoch": 0.49, "learning_rate": 3.023193098507538e-06, "logits/chosen": -3.0049214363098145, "logits/rejected": -3.0358314514160156, "logps/chosen": -1.5112888813018799, "logps/rejected": -285.9508972167969, "loss": 0.0689, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.30156806111335754, "rewards/margins": 2.808593273162842, "rewards/rejected": -2.5070252418518066, "step": 12240 }, { "epoch": 0.49, "learning_rate": 3.019779227044398e-06, "logits/chosen": -3.033867597579956, "logits/rejected": -3.064467191696167, "logps/chosen": -3.8575313091278076, "logps/rejected": -287.869384765625, "loss": 0.0916, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27670034766197205, "rewards/margins": 2.802450656890869, "rewards/rejected": -2.525750160217285, "step": 12250 }, { "epoch": 0.49, "learning_rate": 3.016364342245059e-06, "logits/chosen": -3.035456895828247, "logits/rejected": -3.063525438308716, "logps/chosen": -6.599102020263672, "logps/rejected": -278.9796142578125, "loss": 0.1262, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.2509657144546509, "rewards/margins": 2.689237594604492, "rewards/rejected": -2.4382717609405518, "step": 12260 }, { "epoch": 0.49, "learning_rate": 3.0129484507670114e-06, "logits/chosen": -3.0269579887390137, "logits/rejected": -3.0576443672180176, "logps/chosen": -0.16911965608596802, "logps/rejected": -291.1106872558594, "loss": 0.0556, "rewards/accuracies": 1.0, "rewards/chosen": 0.31337565183639526, "rewards/margins": 2.868752956390381, "rewards/rejected": -2.5553770065307617, "step": 12270 }, { "epoch": 0.49, "learning_rate": 3.0095315592697126e-06, "logits/chosen": -3.0128536224365234, "logits/rejected": -3.0419955253601074, "logps/chosen": -3.787130832672119, "logps/rejected": -284.51690673828125, "loss": 0.0922, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2791876196861267, "rewards/margins": 2.7735061645507812, "rewards/rejected": -2.4943184852600098, "step": 12280 }, { "epoch": 0.49, "learning_rate": 3.006113674414565e-06, "logits/chosen": -3.0236761569976807, "logits/rejected": -3.0527946949005127, "logps/chosen": -0.3350379467010498, "logps/rejected": -287.02618408203125, "loss": 0.0587, "rewards/accuracies": 1.0, "rewards/chosen": 0.31408214569091797, "rewards/margins": 2.8295533657073975, "rewards/rejected": -2.5154709815979004, "step": 12290 }, { "epoch": 0.49, "learning_rate": 3.002694802864912e-06, "logits/chosen": -3.021885633468628, "logits/rejected": -3.050614833831787, "logps/chosen": -3.7766506671905518, "logps/rejected": -286.91998291015625, "loss": 0.091, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2810974419116974, "rewards/margins": 2.7929091453552246, "rewards/rejected": -2.5118117332458496, "step": 12300 }, { "epoch": 0.49, "eval_logits/chosen": -3.0713329315185547, "eval_logits/rejected": -3.0975332260131836, "eval_logps/chosen": -0.17468950152397156, "eval_logps/rejected": -284.34906005859375, "eval_loss": 0.0603608600795269, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31485265493392944, "eval_rewards/margins": 2.798187732696533, "eval_rewards/rejected": -2.483335018157959, "eval_runtime": 2.5454, "eval_samples_per_second": 1.964, "eval_steps_per_second": 0.393, "step": 12300 }, { "epoch": 0.49, "learning_rate": 2.9992749512860177e-06, "logits/chosen": -3.031287431716919, "logits/rejected": -3.0588278770446777, "logps/chosen": -2.986020565032959, "logps/rejected": -285.96295166015625, "loss": 0.0846, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28761881589889526, "rewards/margins": 2.7901225090026855, "rewards/rejected": -2.5025038719177246, "step": 12310 }, { "epoch": 0.49, "learning_rate": 2.9958541263450586e-06, "logits/chosen": -3.030862808227539, "logits/rejected": -3.059617519378662, "logps/chosen": -0.20407874882221222, "logps/rejected": -291.86065673828125, "loss": 0.055, "rewards/accuracies": 1.0, "rewards/chosen": 0.3190430700778961, "rewards/margins": 2.878918170928955, "rewards/rejected": -2.559875011444092, "step": 12320 }, { "epoch": 0.49, "learning_rate": 2.992432334711107e-06, "logits/chosen": -3.0118541717529297, "logits/rejected": -3.0395476818084717, "logps/chosen": -0.22196073830127716, "logps/rejected": -292.30670166015625, "loss": 0.0547, "rewards/accuracies": 1.0, "rewards/chosen": 0.31717079877853394, "rewards/margins": 2.8824758529663086, "rewards/rejected": -2.565304756164551, "step": 12330 }, { "epoch": 0.49, "learning_rate": 2.989009583055121e-06, "logits/chosen": -3.00820255279541, "logits/rejected": -3.0376715660095215, "logps/chosen": -2.997225761413574, "logps/rejected": -287.4920959472656, "loss": 0.084, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2870819866657257, "rewards/margins": 2.8067257404327393, "rewards/rejected": -2.519643545150757, "step": 12340 }, { "epoch": 0.49, "learning_rate": 2.98558587804993e-06, "logits/chosen": -3.0271143913269043, "logits/rejected": -3.0578322410583496, "logps/chosen": -0.32654839754104614, "logps/rejected": -289.45745849609375, "loss": 0.0606, "rewards/accuracies": 1.0, "rewards/chosen": 0.31332409381866455, "rewards/margins": 2.852658748626709, "rewards/rejected": -2.539334774017334, "step": 12350 }, { "epoch": 0.49, "learning_rate": 2.9821612263702226e-06, "logits/chosen": -3.054115056991577, "logits/rejected": -3.082730293273926, "logps/chosen": -0.2117171287536621, "logps/rejected": -290.4706115722656, "loss": 0.0559, "rewards/accuracies": 1.0, "rewards/chosen": 0.31351107358932495, "rewards/margins": 2.8654608726501465, "rewards/rejected": -2.5519497394561768, "step": 12360 }, { "epoch": 0.49, "learning_rate": 2.9787356346925327e-06, "logits/chosen": -3.020434617996216, "logits/rejected": -3.049307346343994, "logps/chosen": -0.20225724577903748, "logps/rejected": -291.2893371582031, "loss": 0.0556, "rewards/accuracies": 1.0, "rewards/chosen": 0.31409892439842224, "rewards/margins": 2.869295835494995, "rewards/rejected": -2.555197238922119, "step": 12370 }, { "epoch": 0.5, "learning_rate": 2.9753091096952256e-06, "logits/chosen": -3.0379230976104736, "logits/rejected": -3.0648882389068604, "logps/chosen": -5.754713535308838, "logps/rejected": -282.60333251953125, "loss": 0.1129, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2580723464488983, "rewards/margins": 2.7335963249206543, "rewards/rejected": -2.475524425506592, "step": 12380 }, { "epoch": 0.5, "learning_rate": 2.9718816580584885e-06, "logits/chosen": -3.018341541290283, "logits/rejected": -3.0463457107543945, "logps/chosen": -3.3033385276794434, "logps/rejected": -286.16680908203125, "loss": 0.0877, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2817308306694031, "rewards/margins": 2.7934112548828125, "rewards/rejected": -2.5116801261901855, "step": 12390 }, { "epoch": 0.5, "learning_rate": 2.9684532864643123e-06, "logits/chosen": -3.0146968364715576, "logits/rejected": -3.0456912517547607, "logps/chosen": -0.18609683215618134, "logps/rejected": -292.163818359375, "loss": 0.0548, "rewards/accuracies": 1.0, "rewards/chosen": 0.3150157332420349, "rewards/margins": 2.882652997970581, "rewards/rejected": -2.5676376819610596, "step": 12400 }, { "epoch": 0.5, "eval_logits/chosen": -3.07039475440979, "eval_logits/rejected": -3.0971312522888184, "eval_logps/chosen": -0.22635135054588318, "eval_logps/rejected": -284.1383056640625, "eval_loss": 0.060550056397914886, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31433600187301636, "eval_rewards/margins": 2.795563220977783, "eval_rewards/rejected": -2.481227397918701, "eval_runtime": 2.5338, "eval_samples_per_second": 1.973, "eval_steps_per_second": 0.395, "step": 12400 }, { "epoch": 0.5, "learning_rate": 2.9650240015964824e-06, "logits/chosen": -3.0044384002685547, "logits/rejected": -3.035176992416382, "logps/chosen": -0.5454715490341187, "logps/rejected": -289.7579650878906, "loss": 0.0571, "rewards/accuracies": 1.0, "rewards/chosen": 0.31114986538887024, "rewards/margins": 2.8525426387786865, "rewards/rejected": -2.541393280029297, "step": 12410 }, { "epoch": 0.5, "learning_rate": 2.9615938101405676e-06, "logits/chosen": -3.0081868171691895, "logits/rejected": -3.038576602935791, "logps/chosen": -3.555173397064209, "logps/rejected": -285.89105224609375, "loss": 0.0877, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2795322835445404, "rewards/margins": 2.788208246231079, "rewards/rejected": -2.508676290512085, "step": 12420 }, { "epoch": 0.5, "learning_rate": 2.9581627187838997e-06, "logits/chosen": -2.981274127960205, "logits/rejected": -3.0149714946746826, "logps/chosen": -0.2640033960342407, "logps/rejected": -291.28387451171875, "loss": 0.0553, "rewards/accuracies": 1.0, "rewards/chosen": 0.31201520562171936, "rewards/margins": 2.873403310775757, "rewards/rejected": -2.5613884925842285, "step": 12430 }, { "epoch": 0.5, "learning_rate": 2.9547307342155675e-06, "logits/chosen": -2.996370553970337, "logits/rejected": -3.0256896018981934, "logps/chosen": -2.3725473880767822, "logps/rejected": -286.75225830078125, "loss": 0.0641, "rewards/accuracies": 1.0, "rewards/chosen": 0.29035285115242004, "rewards/margins": 2.8067917823791504, "rewards/rejected": -2.5164389610290527, "step": 12440 }, { "epoch": 0.5, "learning_rate": 2.9512978631264006e-06, "logits/chosen": -3.019047260284424, "logits/rejected": -3.0484890937805176, "logps/chosen": -0.20152607560157776, "logps/rejected": -291.24871826171875, "loss": 0.0552, "rewards/accuracies": 1.0, "rewards/chosen": 0.31151485443115234, "rewards/margins": 2.8756980895996094, "rewards/rejected": -2.564182996749878, "step": 12450 }, { "epoch": 0.5, "learning_rate": 2.9478641122089563e-06, "logits/chosen": -3.0348339080810547, "logits/rejected": -3.063213348388672, "logps/chosen": -0.18475277721881866, "logps/rejected": -290.3617248535156, "loss": 0.0556, "rewards/accuracies": 1.0, "rewards/chosen": 0.3128190040588379, "rewards/margins": 2.8699119091033936, "rewards/rejected": -2.5570926666259766, "step": 12460 }, { "epoch": 0.5, "learning_rate": 2.9444294881575083e-06, "logits/chosen": -3.0143840312957764, "logits/rejected": -3.0459041595458984, "logps/chosen": -0.4577637314796448, "logps/rejected": -290.2486877441406, "loss": 0.0562, "rewards/accuracies": 1.0, "rewards/chosen": 0.3098711371421814, "rewards/margins": 2.8630900382995605, "rewards/rejected": -2.5532188415527344, "step": 12470 }, { "epoch": 0.5, "learning_rate": 2.940993997668031e-06, "logits/chosen": -3.034745454788208, "logits/rejected": -3.062242031097412, "logps/chosen": -0.18141482770442963, "logps/rejected": -290.77947998046875, "loss": 0.0557, "rewards/accuracies": 1.0, "rewards/chosen": 0.3158700466156006, "rewards/margins": 2.8687427043914795, "rewards/rejected": -2.552872896194458, "step": 12480 }, { "epoch": 0.5, "learning_rate": 2.9375576474381907e-06, "logits/chosen": -3.013375759124756, "logits/rejected": -3.045330762863159, "logps/chosen": -0.17400357127189636, "logps/rejected": -290.84942626953125, "loss": 0.0558, "rewards/accuracies": 1.0, "rewards/chosen": 0.31194087862968445, "rewards/margins": 2.866307497024536, "rewards/rejected": -2.5543665885925293, "step": 12490 }, { "epoch": 0.5, "learning_rate": 2.9341204441673267e-06, "logits/chosen": -3.016409158706665, "logits/rejected": -3.046581268310547, "logps/chosen": -0.31943368911743164, "logps/rejected": -288.06768798828125, "loss": 0.0622, "rewards/accuracies": 1.0, "rewards/chosen": 0.31422024965286255, "rewards/margins": 2.8424103260040283, "rewards/rejected": -2.5281901359558105, "step": 12500 }, { "epoch": 0.5, "eval_logits/chosen": -3.070854902267456, "eval_logits/rejected": -3.0978565216064453, "eval_logps/chosen": -0.2027381956577301, "eval_logps/rejected": -284.2022705078125, "eval_loss": 0.06044429540634155, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31457215547561646, "eval_rewards/margins": 2.7964389324188232, "eval_rewards/rejected": -2.4818668365478516, "eval_runtime": 2.5422, "eval_samples_per_second": 1.967, "eval_steps_per_second": 0.393, "step": 12500 }, { "epoch": 0.5, "learning_rate": 2.9306823945564422e-06, "logits/chosen": -3.0150184631347656, "logits/rejected": -3.041703701019287, "logps/chosen": -10.624709129333496, "logps/rejected": -279.87432861328125, "loss": 0.1597, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.21025076508522034, "rewards/margins": 2.6574130058288574, "rewards/rejected": -2.447161912918091, "step": 12510 }, { "epoch": 0.5, "learning_rate": 2.927243505308192e-06, "logits/chosen": -3.0311572551727295, "logits/rejected": -3.060053586959839, "logps/chosen": -3.799262285232544, "logps/rejected": -286.52227783203125, "loss": 0.0925, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2759259343147278, "rewards/margins": 2.7873408794403076, "rewards/rejected": -2.5114150047302246, "step": 12520 }, { "epoch": 0.5, "learning_rate": 2.923803783126866e-06, "logits/chosen": -3.0013089179992676, "logits/rejected": -3.029425859451294, "logps/chosen": -9.78761100769043, "logps/rejected": -279.7231750488281, "loss": 0.1518, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.22112779319286346, "rewards/margins": 2.662672758102417, "rewards/rejected": -2.441545009613037, "step": 12530 }, { "epoch": 0.5, "learning_rate": 2.920363234718379e-06, "logits/chosen": -3.022756576538086, "logits/rejected": -3.052614212036133, "logps/chosen": -0.17859306931495667, "logps/rejected": -289.5655517578125, "loss": 0.0567, "rewards/accuracies": 1.0, "rewards/chosen": 0.31399378180503845, "rewards/margins": 2.855128049850464, "rewards/rejected": -2.5411343574523926, "step": 12540 }, { "epoch": 0.5, "learning_rate": 2.9169218667902562e-06, "logits/chosen": -3.0487959384918213, "logits/rejected": -3.076213836669922, "logps/chosen": -0.7727393507957458, "logps/rejected": -291.80438232421875, "loss": 0.0556, "rewards/accuracies": 1.0, "rewards/chosen": 0.3094833493232727, "rewards/margins": 2.8742175102233887, "rewards/rejected": -2.5647339820861816, "step": 12550 }, { "epoch": 0.5, "learning_rate": 2.9134796860516194e-06, "logits/chosen": -3.0320916175842285, "logits/rejected": -3.0584802627563477, "logps/chosen": -4.016470909118652, "logps/rejected": -284.90692138671875, "loss": 0.0935, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27716076374053955, "rewards/margins": 2.772663116455078, "rewards/rejected": -2.495502233505249, "step": 12560 }, { "epoch": 0.5, "learning_rate": 2.9100366992131778e-06, "logits/chosen": -3.0304598808288574, "logits/rejected": -3.0586342811584473, "logps/chosen": -3.8387222290039062, "logps/rejected": -286.22235107421875, "loss": 0.0876, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.279192715883255, "rewards/margins": 2.7874114513397217, "rewards/rejected": -2.508218765258789, "step": 12570 }, { "epoch": 0.5, "learning_rate": 2.9065929129872097e-06, "logits/chosen": -3.0171477794647217, "logits/rejected": -3.043778896331787, "logps/chosen": -13.141705513000488, "logps/rejected": -276.35968017578125, "loss": 0.1786, "rewards/accuracies": 0.9375, "rewards/chosen": 0.18926644325256348, "rewards/margins": 2.596294403076172, "rewards/rejected": -2.4070279598236084, "step": 12580 }, { "epoch": 0.5, "learning_rate": 2.9031483340875523e-06, "logits/chosen": -3.014371395111084, "logits/rejected": -3.0440673828125, "logps/chosen": -0.32210907340049744, "logps/rejected": -287.37579345703125, "loss": 0.061, "rewards/accuracies": 1.0, "rewards/chosen": 0.31181222200393677, "rewards/margins": 2.829604387283325, "rewards/rejected": -2.517792224884033, "step": 12590 }, { "epoch": 0.5, "learning_rate": 2.8997029692295875e-06, "logits/chosen": -3.025683879852295, "logits/rejected": -3.0538101196289062, "logps/chosen": -0.18350845575332642, "logps/rejected": -290.5189514160156, "loss": 0.056, "rewards/accuracies": 1.0, "rewards/chosen": 0.31306737661361694, "rewards/margins": 2.862333297729492, "rewards/rejected": -2.5492660999298096, "step": 12600 }, { "epoch": 0.5, "eval_logits/chosen": -3.073965072631836, "eval_logits/rejected": -3.0984158515930176, "eval_logps/chosen": -0.17771558463573456, "eval_logps/rejected": -284.52728271484375, "eval_loss": 0.06012387201189995, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31482237577438354, "eval_rewards/margins": 2.7999396324157715, "eval_rewards/rejected": -2.4851174354553223, "eval_runtime": 2.5293, "eval_samples_per_second": 1.977, "eval_steps_per_second": 0.395, "step": 12600 }, { "epoch": 0.5, "learning_rate": 2.8962568251302327e-06, "logits/chosen": -3.0218236446380615, "logits/rejected": -3.0485644340515137, "logps/chosen": -2.8474414348602295, "logps/rejected": -284.6510314941406, "loss": 0.0838, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28995051980018616, "rewards/margins": 2.7778525352478027, "rewards/rejected": -2.4879019260406494, "step": 12610 }, { "epoch": 0.5, "learning_rate": 2.89280990850792e-06, "logits/chosen": -3.0172553062438965, "logits/rejected": -3.0430328845977783, "logps/chosen": -3.929473400115967, "logps/rejected": -283.7809753417969, "loss": 0.093, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.277828186750412, "rewards/margins": 2.7612595558166504, "rewards/rejected": -2.483431339263916, "step": 12620 }, { "epoch": 0.51, "learning_rate": 2.8893622260825906e-06, "logits/chosen": -2.997440814971924, "logits/rejected": -3.029754638671875, "logps/chosen": -2.349987506866455, "logps/rejected": -288.73907470703125, "loss": 0.0701, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.29424625635147095, "rewards/margins": 2.825817584991455, "rewards/rejected": -2.53157114982605, "step": 12630 }, { "epoch": 0.51, "learning_rate": 2.8859137845756785e-06, "logits/chosen": -3.019627332687378, "logits/rejected": -3.048434019088745, "logps/chosen": -3.183488368988037, "logps/rejected": -288.97076416015625, "loss": 0.0847, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28585946559906006, "rewards/margins": 2.8204150199890137, "rewards/rejected": -2.5345559120178223, "step": 12640 }, { "epoch": 0.51, "learning_rate": 2.8824645907100957e-06, "logits/chosen": -3.020537853240967, "logits/rejected": -3.048430919647217, "logps/chosen": -3.8314247131347656, "logps/rejected": -288.6873779296875, "loss": 0.0909, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27852994203567505, "rewards/margins": 2.8096764087677, "rewards/rejected": -2.53114652633667, "step": 12650 }, { "epoch": 0.51, "learning_rate": 2.8790146512102228e-06, "logits/chosen": -2.998187303543091, "logits/rejected": -3.0252909660339355, "logps/chosen": -2.498192548751831, "logps/rejected": -287.28009033203125, "loss": 0.0676, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.29228296875953674, "rewards/margins": 2.8080999851226807, "rewards/rejected": -2.5158169269561768, "step": 12660 }, { "epoch": 0.51, "learning_rate": 2.875563972801893e-06, "logits/chosen": -3.03013277053833, "logits/rejected": -3.057163715362549, "logps/chosen": -0.23172800242900848, "logps/rejected": -292.1220397949219, "loss": 0.0549, "rewards/accuracies": 1.0, "rewards/chosen": 0.3148132860660553, "rewards/margins": 2.879146099090576, "rewards/rejected": -2.564332962036133, "step": 12670 }, { "epoch": 0.51, "learning_rate": 2.872112562212381e-06, "logits/chosen": -2.995379686355591, "logits/rejected": -3.024959087371826, "logps/chosen": -2.5006980895996094, "logps/rejected": -288.3140563964844, "loss": 0.0744, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.29018160700798035, "rewards/margins": 2.821589708328247, "rewards/rejected": -2.5314080715179443, "step": 12680 }, { "epoch": 0.51, "learning_rate": 2.868660426170388e-06, "logits/chosen": -3.038210868835449, "logits/rejected": -3.065674066543579, "logps/chosen": -0.24462850391864777, "logps/rejected": -294.2286071777344, "loss": 0.0535, "rewards/accuracies": 1.0, "rewards/chosen": 0.3149487376213074, "rewards/margins": 2.9016664028167725, "rewards/rejected": -2.5867176055908203, "step": 12690 }, { "epoch": 0.51, "learning_rate": 2.8652075714060296e-06, "logits/chosen": -3.031324863433838, "logits/rejected": -3.0582642555236816, "logps/chosen": -9.202812194824219, "logps/rejected": -284.999755859375, "loss": 0.1175, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2227802723646164, "rewards/margins": 2.723576068878174, "rewards/rejected": -2.500795364379883, "step": 12700 }, { "epoch": 0.51, "eval_logits/chosen": -3.073460578918457, "eval_logits/rejected": -3.095227003097534, "eval_logps/chosen": -0.200768381357193, "eval_logps/rejected": -284.70025634765625, "eval_loss": 0.060075581073760986, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3145918548107147, "eval_rewards/margins": 2.801438808441162, "eval_rewards/rejected": -2.486846685409546, "eval_runtime": 2.5506, "eval_samples_per_second": 1.96, "eval_steps_per_second": 0.392, "step": 12700 }, { "epoch": 0.51, "learning_rate": 2.861754004650823e-06, "logits/chosen": -3.0434410572052, "logits/rejected": -3.0675950050354004, "logps/chosen": -1.2026950120925903, "logps/rejected": -290.5492248535156, "loss": 0.0566, "rewards/accuracies": 1.0, "rewards/chosen": 0.30475515127182007, "rewards/margins": 2.859707832336426, "rewards/rejected": -2.554952621459961, "step": 12710 }, { "epoch": 0.51, "learning_rate": 2.8582997326376736e-06, "logits/chosen": -2.997511625289917, "logits/rejected": -3.030632972717285, "logps/chosen": -3.56231951713562, "logps/rejected": -288.016357421875, "loss": 0.0888, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.282087117433548, "rewards/margins": 2.8080568313598633, "rewards/rejected": -2.525969982147217, "step": 12720 }, { "epoch": 0.51, "learning_rate": 2.8548447621008614e-06, "logits/chosen": -3.016772747039795, "logits/rejected": -3.0500435829162598, "logps/chosen": -0.3992824852466583, "logps/rejected": -291.0509948730469, "loss": 0.0557, "rewards/accuracies": 1.0, "rewards/chosen": 0.314219206571579, "rewards/margins": 2.871349811553955, "rewards/rejected": -2.557130813598633, "step": 12730 }, { "epoch": 0.51, "learning_rate": 2.8513890997760273e-06, "logits/chosen": -3.0381293296813965, "logits/rejected": -3.068425416946411, "logps/chosen": -0.17488528788089752, "logps/rejected": -290.5989685058594, "loss": 0.0559, "rewards/accuracies": 1.0, "rewards/chosen": 0.31399139761924744, "rewards/margins": 2.8655786514282227, "rewards/rejected": -2.5515871047973633, "step": 12740 }, { "epoch": 0.51, "learning_rate": 2.847932752400164e-06, "logits/chosen": -3.0027575492858887, "logits/rejected": -3.0344760417938232, "logps/chosen": -0.13959349691867828, "logps/rejected": -289.8493347167969, "loss": 0.0565, "rewards/accuracies": 1.0, "rewards/chosen": 0.31519120931625366, "rewards/margins": 2.855835437774658, "rewards/rejected": -2.540644407272339, "step": 12750 }, { "epoch": 0.51, "learning_rate": 2.844475726711595e-06, "logits/chosen": -3.0108747482299805, "logits/rejected": -3.0397143363952637, "logps/chosen": -6.208165168762207, "logps/rejected": -280.9454345703125, "loss": 0.1173, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2540012001991272, "rewards/margins": 2.7103893756866455, "rewards/rejected": -2.456387996673584, "step": 12760 }, { "epoch": 0.51, "learning_rate": 2.841018029449971e-06, "logits/chosen": -3.0205979347229004, "logits/rejected": -3.0537915229797363, "logps/chosen": -0.2895019054412842, "logps/rejected": -290.6286926269531, "loss": 0.0561, "rewards/accuracies": 1.0, "rewards/chosen": 0.3149566352367401, "rewards/margins": 2.8655803203582764, "rewards/rejected": -2.5506234169006348, "step": 12770 }, { "epoch": 0.51, "learning_rate": 2.837559667356248e-06, "logits/chosen": -3.0319905281066895, "logits/rejected": -3.0624840259552, "logps/chosen": -7.068246364593506, "logps/rejected": -286.89251708984375, "loss": 0.1229, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24808605015277863, "rewards/margins": 2.763469934463501, "rewards/rejected": -2.5153839588165283, "step": 12780 }, { "epoch": 0.51, "learning_rate": 2.8341006471726817e-06, "logits/chosen": -3.009272336959839, "logits/rejected": -3.04179048538208, "logps/chosen": -0.18166743218898773, "logps/rejected": -288.9847717285156, "loss": 0.0568, "rewards/accuracies": 1.0, "rewards/chosen": 0.31321972608566284, "rewards/margins": 2.8517282009124756, "rewards/rejected": -2.538508653640747, "step": 12790 }, { "epoch": 0.51, "learning_rate": 2.8306409756428067e-06, "logits/chosen": -3.01141357421875, "logits/rejected": -3.0411293506622314, "logps/chosen": -2.1956639289855957, "logps/rejected": -286.8332214355469, "loss": 0.0714, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.29515013098716736, "rewards/margins": 2.810727596282959, "rewards/rejected": -2.5155773162841797, "step": 12800 }, { "epoch": 0.51, "eval_logits/chosen": -3.071413040161133, "eval_logits/rejected": -3.0964226722717285, "eval_logps/chosen": -0.14188732206821442, "eval_logps/rejected": -284.1912536621094, "eval_loss": 0.06049853563308716, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3151806890964508, "eval_rewards/margins": 2.796937942504883, "eval_rewards/rejected": -2.481757164001465, "eval_runtime": 2.5401, "eval_samples_per_second": 1.968, "eval_steps_per_second": 0.394, "step": 12800 }, { "epoch": 0.51, "learning_rate": 2.827180659511431e-06, "logits/chosen": -3.010267734527588, "logits/rejected": -3.0403523445129395, "logps/chosen": -3.2750403881073, "logps/rejected": -286.7343444824219, "loss": 0.0867, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28497403860092163, "rewards/margins": 2.798521041870117, "rewards/rejected": -2.513546943664551, "step": 12810 }, { "epoch": 0.51, "learning_rate": 2.8237197055246175e-06, "logits/chosen": -3.0125904083251953, "logits/rejected": -3.044084072113037, "logps/chosen": -0.17893746495246887, "logps/rejected": -292.0625915527344, "loss": 0.055, "rewards/accuracies": 1.0, "rewards/chosen": 0.31354770064353943, "rewards/margins": 2.8797760009765625, "rewards/rejected": -2.5662283897399902, "step": 12820 }, { "epoch": 0.51, "learning_rate": 2.820258120429674e-06, "logits/chosen": -3.0342583656311035, "logits/rejected": -3.0628669261932373, "logps/chosen": -3.815314769744873, "logps/rejected": -288.8435974121094, "loss": 0.0908, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28020724654197693, "rewards/margins": 2.8151440620422363, "rewards/rejected": -2.5349369049072266, "step": 12830 }, { "epoch": 0.51, "learning_rate": 2.816795910975137e-06, "logits/chosen": -3.0085196495056152, "logits/rejected": -3.0372819900512695, "logps/chosen": -0.24038724601268768, "logps/rejected": -290.32403564453125, "loss": 0.0561, "rewards/accuracies": 1.0, "rewards/chosen": 0.3163490891456604, "rewards/margins": 2.863447666168213, "rewards/rejected": -2.5470986366271973, "step": 12840 }, { "epoch": 0.51, "learning_rate": 2.813333083910761e-06, "logits/chosen": -3.0274276733398438, "logits/rejected": -3.055173397064209, "logps/chosen": -3.767817258834839, "logps/rejected": -286.05303955078125, "loss": 0.0919, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2805179953575134, "rewards/margins": 2.783247947692871, "rewards/rejected": -2.502729892730713, "step": 12850 }, { "epoch": 0.51, "learning_rate": 2.8098696459875048e-06, "logits/chosen": -3.0118775367736816, "logits/rejected": -3.041889190673828, "logps/chosen": -3.6745998859405518, "logps/rejected": -288.10638427734375, "loss": 0.0898, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.278847336769104, "rewards/margins": 2.809251070022583, "rewards/rejected": -2.5304038524627686, "step": 12860 }, { "epoch": 0.51, "learning_rate": 2.806405603957517e-06, "logits/chosen": -3.0222654342651367, "logits/rejected": -3.050816297531128, "logps/chosen": -6.423094749450684, "logps/rejected": -284.5029602050781, "loss": 0.1184, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2529865801334381, "rewards/margins": 2.7427916526794434, "rewards/rejected": -2.489804983139038, "step": 12870 }, { "epoch": 0.52, "learning_rate": 2.802940964574127e-06, "logits/chosen": -3.0322651863098145, "logits/rejected": -3.0622575283050537, "logps/chosen": -2.5630412101745605, "logps/rejected": -285.6885681152344, "loss": 0.081, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.29239457845687866, "rewards/margins": 2.7954859733581543, "rewards/rejected": -2.50309157371521, "step": 12880 }, { "epoch": 0.52, "learning_rate": 2.7994757345918244e-06, "logits/chosen": -3.018536329269409, "logits/rejected": -3.05018949508667, "logps/chosen": -3.8358852863311768, "logps/rejected": -288.45928955078125, "loss": 0.0911, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2814343571662903, "rewards/margins": 2.8094873428344727, "rewards/rejected": -2.528053045272827, "step": 12890 }, { "epoch": 0.52, "learning_rate": 2.7960099207662535e-06, "logits/chosen": -3.0242018699645996, "logits/rejected": -3.0540099143981934, "logps/chosen": -0.6067059636116028, "logps/rejected": -290.4850769042969, "loss": 0.0572, "rewards/accuracies": 1.0, "rewards/chosen": 0.3105511963367462, "rewards/margins": 2.860077381134033, "rewards/rejected": -2.5495262145996094, "step": 12900 }, { "epoch": 0.52, "eval_logits/chosen": -3.0729596614837646, "eval_logits/rejected": -3.097965717315674, "eval_logps/chosen": -0.1462632268667221, "eval_logps/rejected": -284.64495849609375, "eval_loss": 0.06003279611468315, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3151369094848633, "eval_rewards/margins": 2.801431179046631, "eval_rewards/rejected": -2.4862942695617676, "eval_runtime": 2.5381, "eval_samples_per_second": 1.97, "eval_steps_per_second": 0.394, "step": 12900 }, { "epoch": 0.52, "learning_rate": 2.7925435298541944e-06, "logits/chosen": -3.022876262664795, "logits/rejected": -3.0518710613250732, "logps/chosen": -0.22790773212909698, "logps/rejected": -287.8687744140625, "loss": 0.062, "rewards/accuracies": 1.0, "rewards/chosen": 0.3157707750797272, "rewards/margins": 2.8371245861053467, "rewards/rejected": -2.5213539600372314, "step": 12910 }, { "epoch": 0.52, "learning_rate": 2.7890765686135545e-06, "logits/chosen": -3.0248169898986816, "logits/rejected": -3.052508592605591, "logps/chosen": -0.174183189868927, "logps/rejected": -290.2117004394531, "loss": 0.0561, "rewards/accuracies": 1.0, "rewards/chosen": 0.3149590492248535, "rewards/margins": 2.8620171546936035, "rewards/rejected": -2.547058582305908, "step": 12920 }, { "epoch": 0.52, "learning_rate": 2.7856090438033522e-06, "logits/chosen": -3.013505697250366, "logits/rejected": -3.0435357093811035, "logps/chosen": -4.625129222869873, "logps/rejected": -286.1027526855469, "loss": 0.0943, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2697351574897766, "rewards/margins": 2.779953956604004, "rewards/rejected": -2.510218620300293, "step": 12930 }, { "epoch": 0.52, "learning_rate": 2.7821409621837042e-06, "logits/chosen": -3.034234046936035, "logits/rejected": -3.0619773864746094, "logps/chosen": -0.1422184258699417, "logps/rejected": -293.79595947265625, "loss": 0.0535, "rewards/accuracies": 1.0, "rewards/chosen": 0.3149459958076477, "rewards/margins": 2.9019885063171387, "rewards/rejected": -2.5870423316955566, "step": 12940 }, { "epoch": 0.52, "learning_rate": 2.778672330515814e-06, "logits/chosen": -3.0376219749450684, "logits/rejected": -3.0677289962768555, "logps/chosen": -1.0149235725402832, "logps/rejected": -285.95697021484375, "loss": 0.0668, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.30885031819343567, "rewards/margins": 2.8141961097717285, "rewards/rejected": -2.5053458213806152, "step": 12950 }, { "epoch": 0.52, "learning_rate": 2.7752031555619558e-06, "logits/chosen": -3.0050346851348877, "logits/rejected": -3.035489559173584, "logps/chosen": -0.3108625113964081, "logps/rejected": -288.1472473144531, "loss": 0.0608, "rewards/accuracies": 1.0, "rewards/chosen": 0.3117576539516449, "rewards/margins": 2.840822696685791, "rewards/rejected": -2.5290653705596924, "step": 12960 }, { "epoch": 0.52, "learning_rate": 2.7717334440854634e-06, "logits/chosen": -3.0067050457000732, "logits/rejected": -3.033841848373413, "logps/chosen": -8.683127403259277, "logps/rejected": -281.00653076171875, "loss": 0.1414, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.23066261410713196, "rewards/margins": 2.6875765323638916, "rewards/rejected": -2.456913709640503, "step": 12970 }, { "epoch": 0.52, "learning_rate": 2.7682632028507166e-06, "logits/chosen": -3.012265682220459, "logits/rejected": -3.0423691272735596, "logps/chosen": -2.710472822189331, "logps/rejected": -287.0067138671875, "loss": 0.082, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2894020080566406, "rewards/margins": 2.8028762340545654, "rewards/rejected": -2.513474225997925, "step": 12980 }, { "epoch": 0.52, "learning_rate": 2.76479243862313e-06, "logits/chosen": -3.0372469425201416, "logits/rejected": -3.0664305686950684, "logps/chosen": -0.29789966344833374, "logps/rejected": -284.78533935546875, "loss": 0.0665, "rewards/accuracies": 1.0, "rewards/chosen": 0.3144097924232483, "rewards/margins": 2.808354139328003, "rewards/rejected": -2.4939444065093994, "step": 12990 }, { "epoch": 0.52, "learning_rate": 2.761321158169134e-06, "logits/chosen": -3.0057215690612793, "logits/rejected": -3.0364060401916504, "logps/chosen": -3.7321648597717285, "logps/rejected": -289.22723388671875, "loss": 0.09, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2776919901371002, "rewards/margins": 2.8172342777252197, "rewards/rejected": -2.5395424365997314, "step": 13000 }, { "epoch": 0.52, "eval_logits/chosen": -3.0733325481414795, "eval_logits/rejected": -3.0991783142089844, "eval_logps/chosen": -0.13608092069625854, "eval_logps/rejected": -284.55572509765625, "eval_loss": 0.06005164235830307, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31523871421813965, "eval_rewards/margins": 2.80064058303833, "eval_rewards/rejected": -2.4854016304016113, "eval_runtime": 2.5307, "eval_samples_per_second": 1.976, "eval_steps_per_second": 0.395, "step": 13000 }, { "epoch": 0.52, "learning_rate": 2.7578493682561686e-06, "logits/chosen": -3.0443077087402344, "logits/rejected": -3.071451187133789, "logps/chosen": -6.853339195251465, "logps/rejected": -278.0425720214844, "loss": 0.1324, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2513279616832733, "rewards/margins": 2.6809630393981934, "rewards/rejected": -2.4296352863311768, "step": 13010 }, { "epoch": 0.52, "learning_rate": 2.754377075652666e-06, "logits/chosen": -3.0182106494903564, "logits/rejected": -3.048757553100586, "logps/chosen": -0.1574898213148117, "logps/rejected": -287.3952941894531, "loss": 0.0616, "rewards/accuracies": 1.0, "rewards/chosen": 0.3155094087123871, "rewards/margins": 2.8338370323181152, "rewards/rejected": -2.5183277130126953, "step": 13020 }, { "epoch": 0.52, "learning_rate": 2.7509042871280373e-06, "logits/chosen": -3.0063424110412598, "logits/rejected": -3.0377447605133057, "logps/chosen": -0.14548811316490173, "logps/rejected": -288.63775634765625, "loss": 0.0574, "rewards/accuracies": 1.0, "rewards/chosen": 0.31375831365585327, "rewards/margins": 2.8481764793395996, "rewards/rejected": -2.5344181060791016, "step": 13030 }, { "epoch": 0.52, "learning_rate": 2.7474310094526628e-06, "logits/chosen": -3.015378475189209, "logits/rejected": -3.0459580421447754, "logps/chosen": -0.17004820704460144, "logps/rejected": -290.9087219238281, "loss": 0.0576, "rewards/accuracies": 1.0, "rewards/chosen": 0.3158794343471527, "rewards/margins": 2.8689112663269043, "rewards/rejected": -2.5530319213867188, "step": 13040 }, { "epoch": 0.52, "learning_rate": 2.743957249397874e-06, "logits/chosen": -3.0330848693847656, "logits/rejected": -3.060633659362793, "logps/chosen": -0.1995472013950348, "logps/rejected": -293.0176696777344, "loss": 0.0542, "rewards/accuracies": 1.0, "rewards/chosen": 0.31446564197540283, "rewards/margins": 2.8902463912963867, "rewards/rejected": -2.5757811069488525, "step": 13050 }, { "epoch": 0.52, "learning_rate": 2.7404830137359445e-06, "logits/chosen": -3.0278406143188477, "logits/rejected": -3.0580453872680664, "logps/chosen": -0.2068222016096115, "logps/rejected": -291.8015441894531, "loss": 0.055, "rewards/accuracies": 1.0, "rewards/chosen": 0.3149072229862213, "rewards/margins": 2.878850221633911, "rewards/rejected": -2.5639431476593018, "step": 13060 }, { "epoch": 0.52, "learning_rate": 2.7370083092400735e-06, "logits/chosen": -3.04072642326355, "logits/rejected": -3.0696375370025635, "logps/chosen": -2.602450132369995, "logps/rejected": -289.65594482421875, "loss": 0.0731, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.29164883494377136, "rewards/margins": 2.832439422607422, "rewards/rejected": -2.540790319442749, "step": 13070 }, { "epoch": 0.52, "learning_rate": 2.733533142684377e-06, "logits/chosen": -3.0175013542175293, "logits/rejected": -3.047792434692383, "logps/chosen": -0.17533624172210693, "logps/rejected": -292.8375244140625, "loss": 0.0542, "rewards/accuracies": 1.0, "rewards/chosen": 0.31435057520866394, "rewards/margins": 2.8906350135803223, "rewards/rejected": -2.576284408569336, "step": 13080 }, { "epoch": 0.52, "learning_rate": 2.7300575208438684e-06, "logits/chosen": -3.032111406326294, "logits/rejected": -3.0621931552886963, "logps/chosen": -0.1674826741218567, "logps/rejected": -289.6202087402344, "loss": 0.0567, "rewards/accuracies": 1.0, "rewards/chosen": 0.314114511013031, "rewards/margins": 2.852330446243286, "rewards/rejected": -2.5382156372070312, "step": 13090 }, { "epoch": 0.52, "learning_rate": 2.726581450494451e-06, "logits/chosen": -3.02205228805542, "logits/rejected": -3.0520615577697754, "logps/chosen": -0.1759428232908249, "logps/rejected": -291.7349548339844, "loss": 0.055, "rewards/accuracies": 1.0, "rewards/chosen": 0.3135746717453003, "rewards/margins": 2.8782572746276855, "rewards/rejected": -2.564683198928833, "step": 13100 }, { "epoch": 0.52, "eval_logits/chosen": -3.072531223297119, "eval_logits/rejected": -3.0974786281585693, "eval_logps/chosen": -0.19209781289100647, "eval_logps/rejected": -284.66668701171875, "eval_loss": 0.06009731441736221, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3146785497665405, "eval_rewards/margins": 2.80118989944458, "eval_rewards/rejected": -2.48651123046875, "eval_runtime": 2.5357, "eval_samples_per_second": 1.972, "eval_steps_per_second": 0.394, "step": 13100 }, { "epoch": 0.52, "learning_rate": 2.7231049384129016e-06, "logits/chosen": -3.0060489177703857, "logits/rejected": -3.034738063812256, "logps/chosen": -6.665940284729004, "logps/rejected": -283.0694885253906, "loss": 0.121, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2503082752227783, "rewards/margins": 2.7265830039978027, "rewards/rejected": -2.4762744903564453, "step": 13110 }, { "epoch": 0.52, "learning_rate": 2.7196279913768587e-06, "logits/chosen": -3.037135362625122, "logits/rejected": -3.06610369682312, "logps/chosen": -5.788487911224365, "logps/rejected": -282.51556396484375, "loss": 0.1078, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.26054710149765015, "rewards/margins": 2.7292656898498535, "rewards/rejected": -2.4687187671661377, "step": 13120 }, { "epoch": 0.53, "learning_rate": 2.7161506161648076e-06, "logits/chosen": -3.0217156410217285, "logits/rejected": -3.0490658283233643, "logps/chosen": -3.2654120922088623, "logps/rejected": -289.09320068359375, "loss": 0.0855, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28574898838996887, "rewards/margins": 2.8201394081115723, "rewards/rejected": -2.534390687942505, "step": 13130 }, { "epoch": 0.53, "learning_rate": 2.7126728195560704e-06, "logits/chosen": -3.0276904106140137, "logits/rejected": -3.053079843521118, "logps/chosen": -7.475266456604004, "logps/rejected": -286.2763671875, "loss": 0.1265, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24101920425891876, "rewards/margins": 2.752323865890503, "rewards/rejected": -2.5113043785095215, "step": 13140 }, { "epoch": 0.53, "learning_rate": 2.70919460833079e-06, "logits/chosen": -3.0522255897521973, "logits/rejected": -3.0796802043914795, "logps/chosen": -3.169665813446045, "logps/rejected": -287.94573974609375, "loss": 0.0849, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2854539752006531, "rewards/margins": 2.8145511150360107, "rewards/rejected": -2.529097318649292, "step": 13150 }, { "epoch": 0.53, "learning_rate": 2.7057159892699143e-06, "logits/chosen": -3.017099142074585, "logits/rejected": -3.0459938049316406, "logps/chosen": -0.21677878499031067, "logps/rejected": -289.79034423828125, "loss": 0.0566, "rewards/accuracies": 1.0, "rewards/chosen": 0.3156869113445282, "rewards/margins": 2.8554940223693848, "rewards/rejected": -2.5398073196411133, "step": 13160 }, { "epoch": 0.53, "learning_rate": 2.702236969155192e-06, "logits/chosen": -3.0377867221832275, "logits/rejected": -3.067887544631958, "logps/chosen": -0.17840750515460968, "logps/rejected": -293.1044006347656, "loss": 0.0544, "rewards/accuracies": 1.0, "rewards/chosen": 0.3170669674873352, "rewards/margins": 2.887674570083618, "rewards/rejected": -2.5706076622009277, "step": 13170 }, { "epoch": 0.53, "learning_rate": 2.69875755476915e-06, "logits/chosen": -3.025160312652588, "logits/rejected": -3.0534749031066895, "logps/chosen": -6.89141321182251, "logps/rejected": -281.7828674316406, "loss": 0.1243, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24613723158836365, "rewards/margins": 2.7100255489349365, "rewards/rejected": -2.46388840675354, "step": 13180 }, { "epoch": 0.53, "learning_rate": 2.695277752895084e-06, "logits/chosen": -3.0401580333709717, "logits/rejected": -3.066997766494751, "logps/chosen": -0.21833613514900208, "logps/rejected": -288.99652099609375, "loss": 0.0569, "rewards/accuracies": 1.0, "rewards/chosen": 0.31485065817832947, "rewards/margins": 2.8520350456237793, "rewards/rejected": -2.537184238433838, "step": 13190 }, { "epoch": 0.53, "learning_rate": 2.6917975703170466e-06, "logits/chosen": -3.0157065391540527, "logits/rejected": -3.0458881855010986, "logps/chosen": -3.887434720993042, "logps/rejected": -285.6722106933594, "loss": 0.0937, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27792900800704956, "rewards/margins": 2.781573534011841, "rewards/rejected": -2.5036447048187256, "step": 13200 }, { "epoch": 0.53, "eval_logits/chosen": -3.0705983638763428, "eval_logits/rejected": -3.0955677032470703, "eval_logps/chosen": -0.17290307581424713, "eval_logps/rejected": -283.64727783203125, "eval_loss": 0.060839373618364334, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3148705065250397, "eval_rewards/margins": 2.7911880016326904, "eval_rewards/rejected": -2.4763174057006836, "eval_runtime": 2.5386, "eval_samples_per_second": 1.97, "eval_steps_per_second": 0.394, "step": 13200 }, { "epoch": 0.53, "learning_rate": 2.688317013819832e-06, "logits/chosen": -3.029087543487549, "logits/rejected": -3.057743549346924, "logps/chosen": -10.50973892211914, "logps/rejected": -280.33660888671875, "loss": 0.1493, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.2101515531539917, "rewards/margins": 2.6583991050720215, "rewards/rejected": -2.4482476711273193, "step": 13210 }, { "epoch": 0.53, "learning_rate": 2.6848360901889633e-06, "logits/chosen": -3.0446994304656982, "logits/rejected": -3.0742411613464355, "logps/chosen": -0.47048360109329224, "logps/rejected": -293.1072082519531, "loss": 0.0543, "rewards/accuracies": 1.0, "rewards/chosen": 0.31343144178390503, "rewards/margins": 2.891070604324341, "rewards/rejected": -2.577639102935791, "step": 13220 }, { "epoch": 0.53, "learning_rate": 2.6813548062106775e-06, "logits/chosen": -3.019059658050537, "logits/rejected": -3.048372983932495, "logps/chosen": -1.741720199584961, "logps/rejected": -284.07000732421875, "loss": 0.0744, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.29801231622695923, "rewards/margins": 2.7842042446136475, "rewards/rejected": -2.486192226409912, "step": 13230 }, { "epoch": 0.53, "learning_rate": 2.6778731686719177e-06, "logits/chosen": -3.013791799545288, "logits/rejected": -3.044196605682373, "logps/chosen": -0.18605858087539673, "logps/rejected": -291.30279541015625, "loss": 0.0552, "rewards/accuracies": 1.0, "rewards/chosen": 0.3136883080005646, "rewards/margins": 2.8759522438049316, "rewards/rejected": -2.5622634887695312, "step": 13240 }, { "epoch": 0.53, "learning_rate": 2.6743911843603134e-06, "logits/chosen": -3.039956569671631, "logits/rejected": -3.068671941757202, "logps/chosen": -0.21991929411888123, "logps/rejected": -288.67169189453125, "loss": 0.0577, "rewards/accuracies": 1.0, "rewards/chosen": 0.31432044506073, "rewards/margins": 2.8441805839538574, "rewards/rejected": -2.529860258102417, "step": 13250 }, { "epoch": 0.53, "learning_rate": 2.670908860064172e-06, "logits/chosen": -3.0276174545288086, "logits/rejected": -3.0568320751190186, "logps/chosen": -0.16159436106681824, "logps/rejected": -292.61260986328125, "loss": 0.0547, "rewards/accuracies": 1.0, "rewards/chosen": 0.3181222081184387, "rewards/margins": 2.882629156112671, "rewards/rejected": -2.564507007598877, "step": 13260 }, { "epoch": 0.53, "learning_rate": 2.667426202572463e-06, "logits/chosen": -3.0249133110046387, "logits/rejected": -3.050947427749634, "logps/chosen": -7.508017063140869, "logps/rejected": -282.3577575683594, "loss": 0.126, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.2416389286518097, "rewards/margins": 2.711395025253296, "rewards/rejected": -2.4697563648223877, "step": 13270 }, { "epoch": 0.53, "learning_rate": 2.6639432186748044e-06, "logits/chosen": -3.0057642459869385, "logits/rejected": -3.034975528717041, "logps/chosen": -0.17630648612976074, "logps/rejected": -292.3478088378906, "loss": 0.0547, "rewards/accuracies": 1.0, "rewards/chosen": 0.3177596926689148, "rewards/margins": 2.8847742080688477, "rewards/rejected": -2.567014455795288, "step": 13280 }, { "epoch": 0.53, "learning_rate": 2.6604599151614514e-06, "logits/chosen": -2.996924638748169, "logits/rejected": -3.0286076068878174, "logps/chosen": -2.4797425270080566, "logps/rejected": -288.58697509765625, "loss": 0.0785, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2894488275051117, "rewards/margins": 2.8225696086883545, "rewards/rejected": -2.533121109008789, "step": 13290 }, { "epoch": 0.53, "learning_rate": 2.6569762988232838e-06, "logits/chosen": -3.0527772903442383, "logits/rejected": -3.0811944007873535, "logps/chosen": -1.1357566118240356, "logps/rejected": -293.86138916015625, "loss": 0.0543, "rewards/accuracies": 1.0, "rewards/chosen": 0.3063157796859741, "rewards/margins": 2.892972707748413, "rewards/rejected": -2.5866570472717285, "step": 13300 }, { "epoch": 0.53, "eval_logits/chosen": -3.0705206394195557, "eval_logits/rejected": -3.096203088760376, "eval_logps/chosen": -0.1763598471879959, "eval_logps/rejected": -284.2759704589844, "eval_loss": 0.06036185100674629, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31483593583106995, "eval_rewards/margins": 2.7974398136138916, "eval_rewards/rejected": -2.4826037883758545, "eval_runtime": 2.5348, "eval_samples_per_second": 1.973, "eval_steps_per_second": 0.395, "step": 13300 }, { "epoch": 0.53, "learning_rate": 2.653492376451789e-06, "logits/chosen": -3.0303988456726074, "logits/rejected": -3.057791233062744, "logps/chosen": -3.3307337760925293, "logps/rejected": -287.2489929199219, "loss": 0.0865, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28177183866500854, "rewards/margins": 2.806020736694336, "rewards/rejected": -2.5242486000061035, "step": 13310 }, { "epoch": 0.53, "learning_rate": 2.650008154839052e-06, "logits/chosen": -3.0060875415802, "logits/rejected": -3.036898374557495, "logps/chosen": -0.22058269381523132, "logps/rejected": -288.3975524902344, "loss": 0.0576, "rewards/accuracies": 1.0, "rewards/chosen": 0.3156472444534302, "rewards/margins": 2.839465618133545, "rewards/rejected": -2.5238184928894043, "step": 13320 }, { "epoch": 0.53, "learning_rate": 2.646523640777741e-06, "logits/chosen": -3.0198497772216797, "logits/rejected": -3.0492169857025146, "logps/chosen": -3.803232192993164, "logps/rejected": -287.8863220214844, "loss": 0.0912, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2772195339202881, "rewards/margins": 2.8061811923980713, "rewards/rejected": -2.528961658477783, "step": 13330 }, { "epoch": 0.53, "learning_rate": 2.6430388410610958e-06, "logits/chosen": -3.0152018070220947, "logits/rejected": -3.046001434326172, "logps/chosen": -0.5081229209899902, "logps/rejected": -291.0755615234375, "loss": 0.0572, "rewards/accuracies": 1.0, "rewards/chosen": 0.3104220926761627, "rewards/margins": 2.8678975105285645, "rewards/rejected": -2.5574750900268555, "step": 13340 }, { "epoch": 0.53, "learning_rate": 2.63955376248291e-06, "logits/chosen": -3.0079879760742188, "logits/rejected": -3.037623882293701, "logps/chosen": -3.1286633014678955, "logps/rejected": -286.1094665527344, "loss": 0.0862, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28735512495040894, "rewards/margins": 2.7915544509887695, "rewards/rejected": -2.504199266433716, "step": 13350 }, { "epoch": 0.53, "learning_rate": 2.636068411837523e-06, "logits/chosen": -2.9993720054626465, "logits/rejected": -3.028691530227661, "logps/chosen": -9.49960994720459, "logps/rejected": -282.1632385253906, "loss": 0.1435, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.22206895053386688, "rewards/margins": 2.693223714828491, "rewards/rejected": -2.4711546897888184, "step": 13360 }, { "epoch": 0.53, "learning_rate": 2.632582795919805e-06, "logits/chosen": -3.030158519744873, "logits/rejected": -3.0594472885131836, "logps/chosen": -3.485532760620117, "logps/rejected": -286.707763671875, "loss": 0.0889, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2811293303966522, "rewards/margins": 2.7939846515655518, "rewards/rejected": -2.5128555297851562, "step": 13370 }, { "epoch": 0.54, "learning_rate": 2.6290969215251415e-06, "logits/chosen": -3.05481219291687, "logits/rejected": -3.0821125507354736, "logps/chosen": -2.835400342941284, "logps/rejected": -284.26690673828125, "loss": 0.0849, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28854578733444214, "rewards/margins": 2.7725937366485596, "rewards/rejected": -2.4840476512908936, "step": 13380 }, { "epoch": 0.54, "learning_rate": 2.625610795449424e-06, "logits/chosen": -3.019387722015381, "logits/rejected": -3.050309658050537, "logps/chosen": -0.5262137055397034, "logps/rejected": -288.316650390625, "loss": 0.0583, "rewards/accuracies": 1.0, "rewards/chosen": 0.3131789565086365, "rewards/margins": 2.834638833999634, "rewards/rejected": -2.5214600563049316, "step": 13390 }, { "epoch": 0.54, "learning_rate": 2.6221244244890336e-06, "logits/chosen": -3.027817487716675, "logits/rejected": -3.057321071624756, "logps/chosen": -0.19995670020580292, "logps/rejected": -292.1136474609375, "loss": 0.0549, "rewards/accuracies": 1.0, "rewards/chosen": 0.3154405951499939, "rewards/margins": 2.880134105682373, "rewards/rejected": -2.5646934509277344, "step": 13400 }, { "epoch": 0.54, "eval_logits/chosen": -3.0708231925964355, "eval_logits/rejected": -3.0964884757995605, "eval_logps/chosen": -0.13990870118141174, "eval_logps/rejected": -284.6144714355469, "eval_loss": 0.06008122116327286, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31520041823387146, "eval_rewards/margins": 2.801189422607422, "eval_rewards/rejected": -2.4859888553619385, "eval_runtime": 2.5377, "eval_samples_per_second": 1.97, "eval_steps_per_second": 0.394, "step": 13400 }, { "epoch": 0.54, "learning_rate": 2.618637815440829e-06, "logits/chosen": -3.028247594833374, "logits/rejected": -3.056364059448242, "logps/chosen": -3.349883556365967, "logps/rejected": -285.78509521484375, "loss": 0.0871, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28206881880760193, "rewards/margins": 2.790992259979248, "rewards/rejected": -2.5089235305786133, "step": 13410 }, { "epoch": 0.54, "learning_rate": 2.6151509751021307e-06, "logits/chosen": -3.034379005432129, "logits/rejected": -3.0640676021575928, "logps/chosen": -0.21749785542488098, "logps/rejected": -289.4878845214844, "loss": 0.0568, "rewards/accuracies": 1.0, "rewards/chosen": 0.3163222670555115, "rewards/margins": 2.8533284664154053, "rewards/rejected": -2.53700590133667, "step": 13420 }, { "epoch": 0.54, "learning_rate": 2.611663910270716e-06, "logits/chosen": -3.018906593322754, "logits/rejected": -3.046599864959717, "logps/chosen": -3.847057342529297, "logps/rejected": -287.57635498046875, "loss": 0.0918, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2798554301261902, "rewards/margins": 2.797077178955078, "rewards/rejected": -2.5172219276428223, "step": 13430 }, { "epoch": 0.54, "learning_rate": 2.608176627744793e-06, "logits/chosen": -3.0206124782562256, "logits/rejected": -3.049084186553955, "logps/chosen": -4.988816738128662, "logps/rejected": -286.55755615234375, "loss": 0.0903, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2674562335014343, "rewards/margins": 2.778184413909912, "rewards/rejected": -2.510728359222412, "step": 13440 }, { "epoch": 0.54, "learning_rate": 2.604689134322999e-06, "logits/chosen": -3.034566640853882, "logits/rejected": -3.06264066696167, "logps/chosen": -3.769468307495117, "logps/rejected": -287.91912841796875, "loss": 0.0904, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27820536494255066, "rewards/margins": 2.807020664215088, "rewards/rejected": -2.528815507888794, "step": 13450 }, { "epoch": 0.54, "learning_rate": 2.6012014368043813e-06, "logits/chosen": -3.027036190032959, "logits/rejected": -3.0535995960235596, "logps/chosen": -2.708519697189331, "logps/rejected": -285.9801025390625, "loss": 0.0815, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2900027632713318, "rewards/margins": 2.790884017944336, "rewards/rejected": -2.5008811950683594, "step": 13460 }, { "epoch": 0.54, "learning_rate": 2.597713541988384e-06, "logits/chosen": -3.0019702911376953, "logits/rejected": -3.032134532928467, "logps/chosen": -3.4635391235351562, "logps/rejected": -284.699951171875, "loss": 0.0902, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.282231867313385, "rewards/margins": 2.7743828296661377, "rewards/rejected": -2.4921507835388184, "step": 13470 }, { "epoch": 0.54, "learning_rate": 2.594225456674837e-06, "logits/chosen": -3.0102791786193848, "logits/rejected": -3.0388646125793457, "logps/chosen": -3.2785656452178955, "logps/rejected": -287.456787109375, "loss": 0.0856, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2850191295146942, "rewards/margins": 2.8052401542663574, "rewards/rejected": -2.520220994949341, "step": 13480 }, { "epoch": 0.54, "learning_rate": 2.59073718766394e-06, "logits/chosen": -3.025056838989258, "logits/rejected": -3.0531246662139893, "logps/chosen": -4.032431602478027, "logps/rejected": -284.66253662109375, "loss": 0.0938, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27617907524108887, "rewards/margins": 2.7699484825134277, "rewards/rejected": -2.493769645690918, "step": 13490 }, { "epoch": 0.54, "learning_rate": 2.587248741756253e-06, "logits/chosen": -3.0011162757873535, "logits/rejected": -3.030465602874756, "logps/chosen": -0.3386940360069275, "logps/rejected": -289.4720764160156, "loss": 0.057, "rewards/accuracies": 1.0, "rewards/chosen": 0.3090149462223053, "rewards/margins": 2.8553788661956787, "rewards/rejected": -2.5463640689849854, "step": 13500 }, { "epoch": 0.54, "eval_logits/chosen": -3.0694282054901123, "eval_logits/rejected": -3.0939488410949707, "eval_logps/chosen": -0.1809438019990921, "eval_logps/rejected": -284.23052978515625, "eval_loss": 0.060433875769376755, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3147900700569153, "eval_rewards/margins": 2.7969396114349365, "eval_rewards/rejected": -2.482149600982666, "eval_runtime": 2.5398, "eval_samples_per_second": 1.969, "eval_steps_per_second": 0.394, "step": 13500 }, { "epoch": 0.54, "learning_rate": 2.583760125752679e-06, "logits/chosen": -3.0507616996765137, "logits/rejected": -3.07710862159729, "logps/chosen": -4.765551567077637, "logps/rejected": -287.7153625488281, "loss": 0.0943, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.272561252117157, "rewards/margins": 2.7923989295959473, "rewards/rejected": -2.5198378562927246, "step": 13510 }, { "epoch": 0.54, "learning_rate": 2.5802713464544545e-06, "logits/chosen": -3.0365777015686035, "logits/rejected": -3.065802812576294, "logps/chosen": -3.840104341506958, "logps/rejected": -288.84600830078125, "loss": 0.091, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27897509932518005, "rewards/margins": 2.812458038330078, "rewards/rejected": -2.533482789993286, "step": 13520 }, { "epoch": 0.54, "learning_rate": 2.5767824106631323e-06, "logits/chosen": -3.0326883792877197, "logits/rejected": -3.0598785877227783, "logps/chosen": -5.989420413970947, "logps/rejected": -284.5433349609375, "loss": 0.1132, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.26041269302368164, "rewards/margins": 2.7498631477355957, "rewards/rejected": -2.489450693130493, "step": 13530 }, { "epoch": 0.54, "learning_rate": 2.5732933251805716e-06, "logits/chosen": -3.050318479537964, "logits/rejected": -3.079266309738159, "logps/chosen": -3.200241804122925, "logps/rejected": -284.1542053222656, "loss": 0.0866, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28401046991348267, "rewards/margins": 2.774585485458374, "rewards/rejected": -2.4905753135681152, "step": 13540 }, { "epoch": 0.54, "learning_rate": 2.569804096808923e-06, "logits/chosen": -3.0039401054382324, "logits/rejected": -3.033626079559326, "logps/chosen": -10.523519515991211, "logps/rejected": -279.56219482421875, "loss": 0.1592, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.21031300723552704, "rewards/margins": 2.6557297706604004, "rewards/rejected": -2.4454166889190674, "step": 13550 }, { "epoch": 0.54, "learning_rate": 2.566314732350615e-06, "logits/chosen": -3.002206325531006, "logits/rejected": -3.032980442047119, "logps/chosen": -0.16737417876720428, "logps/rejected": -290.9193115234375, "loss": 0.0556, "rewards/accuracies": 1.0, "rewards/chosen": 0.3131691813468933, "rewards/margins": 2.869746446609497, "rewards/rejected": -2.55657696723938, "step": 13560 }, { "epoch": 0.54, "learning_rate": 2.5628252386083443e-06, "logits/chosen": -3.02036714553833, "logits/rejected": -3.0505433082580566, "logps/chosen": -0.20287184417247772, "logps/rejected": -293.10491943359375, "loss": 0.0541, "rewards/accuracies": 1.0, "rewards/chosen": 0.3159864544868469, "rewards/margins": 2.8937175273895264, "rewards/rejected": -2.577730894088745, "step": 13570 }, { "epoch": 0.54, "learning_rate": 2.5593356223850553e-06, "logits/chosen": -3.0231072902679443, "logits/rejected": -3.0509862899780273, "logps/chosen": -10.102815628051758, "logps/rejected": -279.3477478027344, "loss": 0.156, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.21645942330360413, "rewards/margins": 2.6511871814727783, "rewards/rejected": -2.434727668762207, "step": 13580 }, { "epoch": 0.54, "learning_rate": 2.5558458904839345e-06, "logits/chosen": -2.9948034286499023, "logits/rejected": -3.0278801918029785, "logps/chosen": -0.17907395958900452, "logps/rejected": -291.675537109375, "loss": 0.0553, "rewards/accuracies": 1.0, "rewards/chosen": 0.3146246075630188, "rewards/margins": 2.8761792182922363, "rewards/rejected": -2.5615546703338623, "step": 13590 }, { "epoch": 0.54, "learning_rate": 2.5523560497083927e-06, "logits/chosen": -3.0269477367401123, "logits/rejected": -3.0524399280548096, "logps/chosen": -17.144798278808594, "logps/rejected": -271.0459899902344, "loss": 0.2197, "rewards/accuracies": 0.9375, "rewards/chosen": 0.14779651165008545, "rewards/margins": 2.502808094024658, "rewards/rejected": -2.3550117015838623, "step": 13600 }, { "epoch": 0.54, "eval_logits/chosen": -3.0720534324645996, "eval_logits/rejected": -3.0984554290771484, "eval_logps/chosen": -0.13555391132831573, "eval_logps/rejected": -284.4350280761719, "eval_loss": 0.060211729258298874, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31524398922920227, "eval_rewards/margins": 2.799438714981079, "eval_rewards/rejected": -2.48419451713562, "eval_runtime": 2.5392, "eval_samples_per_second": 1.969, "eval_steps_per_second": 0.394, "step": 13600 }, { "epoch": 0.54, "learning_rate": 2.5488661068620533e-06, "logits/chosen": -3.02103853225708, "logits/rejected": -3.051546096801758, "logps/chosen": -3.856445789337158, "logps/rejected": -287.40985107421875, "loss": 0.0917, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2804757356643677, "rewards/margins": 2.799393892288208, "rewards/rejected": -2.518918037414551, "step": 13610 }, { "epoch": 0.54, "learning_rate": 2.545376068748737e-06, "logits/chosen": -3.024657726287842, "logits/rejected": -3.053795099258423, "logps/chosen": -0.1490289568901062, "logps/rejected": -291.5953674316406, "loss": 0.0554, "rewards/accuracies": 1.0, "rewards/chosen": 0.3177209794521332, "rewards/margins": 2.87221097946167, "rewards/rejected": -2.554489850997925, "step": 13620 }, { "epoch": 0.55, "learning_rate": 2.541885942172454e-06, "logits/chosen": -3.0324172973632812, "logits/rejected": -3.0616345405578613, "logps/chosen": -3.755321979522705, "logps/rejected": -285.6149597167969, "loss": 0.0928, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2778596580028534, "rewards/margins": 2.7789435386657715, "rewards/rejected": -2.5010838508605957, "step": 13630 }, { "epoch": 0.55, "learning_rate": 2.5383957339373828e-06, "logits/chosen": -3.03902530670166, "logits/rejected": -3.0696587562561035, "logps/chosen": -0.588028073310852, "logps/rejected": -286.7392272949219, "loss": 0.0651, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.3121492266654968, "rewards/margins": 2.8215835094451904, "rewards/rejected": -2.509434461593628, "step": 13640 }, { "epoch": 0.55, "learning_rate": 2.5349054508478636e-06, "logits/chosen": -3.05220103263855, "logits/rejected": -3.0781548023223877, "logps/chosen": -0.16234758496284485, "logps/rejected": -290.54937744140625, "loss": 0.0559, "rewards/accuracies": 1.0, "rewards/chosen": 0.3177935779094696, "rewards/margins": 2.8677079677581787, "rewards/rejected": -2.5499141216278076, "step": 13650 }, { "epoch": 0.55, "learning_rate": 2.531415099708382e-06, "logits/chosen": -3.0046212673187256, "logits/rejected": -3.0343503952026367, "logps/chosen": -0.14874643087387085, "logps/rejected": -289.85272216796875, "loss": 0.0563, "rewards/accuracies": 1.0, "rewards/chosen": 0.31468480825424194, "rewards/margins": 2.8584370613098145, "rewards/rejected": -2.5437517166137695, "step": 13660 }, { "epoch": 0.55, "learning_rate": 2.527924687323556e-06, "logits/chosen": -2.9966959953308105, "logits/rejected": -3.029146432876587, "logps/chosen": -0.19954541325569153, "logps/rejected": -289.06085205078125, "loss": 0.0568, "rewards/accuracies": 1.0, "rewards/chosen": 0.3151703178882599, "rewards/margins": 2.852616786956787, "rewards/rejected": -2.5374464988708496, "step": 13670 }, { "epoch": 0.55, "learning_rate": 2.524434220498123e-06, "logits/chosen": -3.0083110332489014, "logits/rejected": -3.0369391441345215, "logps/chosen": -2.937852621078491, "logps/rejected": -285.2626037597656, "loss": 0.0839, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.287721186876297, "rewards/margins": 2.785172700881958, "rewards/rejected": -2.4974513053894043, "step": 13680 }, { "epoch": 0.55, "learning_rate": 2.5209437060369266e-06, "logits/chosen": -3.018719434738159, "logits/rejected": -3.0469672679901123, "logps/chosen": -3.878157138824463, "logps/rejected": -288.0392761230469, "loss": 0.0916, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2776643633842468, "rewards/margins": 2.806431770324707, "rewards/rejected": -2.5287675857543945, "step": 13690 }, { "epoch": 0.55, "learning_rate": 2.517453150744904e-06, "logits/chosen": -3.0227558612823486, "logits/rejected": -3.051190137863159, "logps/chosen": -3.4484763145446777, "logps/rejected": -288.08319091796875, "loss": 0.0871, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2833738625049591, "rewards/margins": 2.8093597888946533, "rewards/rejected": -2.5259859561920166, "step": 13700 }, { "epoch": 0.55, "eval_logits/chosen": -3.071653127670288, "eval_logits/rejected": -3.0978915691375732, "eval_logps/chosen": -0.14559724926948547, "eval_logps/rejected": -284.5201110839844, "eval_loss": 0.06014835834503174, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31514355540275574, "eval_rewards/margins": 2.8001887798309326, "eval_rewards/rejected": -2.4850454330444336, "eval_runtime": 2.5384, "eval_samples_per_second": 1.97, "eval_steps_per_second": 0.394, "step": 13700 }, { "epoch": 0.55, "learning_rate": 2.5139625614270706e-06, "logits/chosen": -3.021596908569336, "logits/rejected": -3.050487756729126, "logps/chosen": -5.834027290344238, "logps/rejected": -281.5677185058594, "loss": 0.1124, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.258407324552536, "rewards/margins": 2.7195534706115723, "rewards/rejected": -2.4611458778381348, "step": 13710 }, { "epoch": 0.55, "learning_rate": 2.5104719448885103e-06, "logits/chosen": -3.036459445953369, "logits/rejected": -3.0663864612579346, "logps/chosen": -3.5676944255828857, "logps/rejected": -284.7732849121094, "loss": 0.0912, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.279166042804718, "rewards/margins": 2.77518892288208, "rewards/rejected": -2.4960227012634277, "step": 13720 }, { "epoch": 0.55, "learning_rate": 2.506981307934357e-06, "logits/chosen": -2.9970059394836426, "logits/rejected": -3.0260841846466064, "logps/chosen": -5.825485706329346, "logps/rejected": -282.26910400390625, "loss": 0.1109, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.25749656558036804, "rewards/margins": 2.7290892601013184, "rewards/rejected": -2.471592426300049, "step": 13730 }, { "epoch": 0.55, "learning_rate": 2.5034906573697863e-06, "logits/chosen": -3.0361294746398926, "logits/rejected": -3.0655150413513184, "logps/chosen": -2.7467448711395264, "logps/rejected": -284.73431396484375, "loss": 0.0838, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28819283843040466, "rewards/margins": 2.7796571254730225, "rewards/rejected": -2.491464138031006, "step": 13740 }, { "epoch": 0.55, "learning_rate": 2.5e-06, "logits/chosen": -3.0202159881591797, "logits/rejected": -3.049201011657715, "logps/chosen": -3.305776596069336, "logps/rejected": -286.6091613769531, "loss": 0.0869, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28502053022384644, "rewards/margins": 2.7960097789764404, "rewards/rejected": -2.5109894275665283, "step": 13750 }, { "epoch": 0.55, "learning_rate": 2.496509342630214e-06, "logits/chosen": -3.0288920402526855, "logits/rejected": -3.058591604232788, "logps/chosen": -3.341651201248169, "logps/rejected": -286.841796875, "loss": 0.0736, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.28173282742500305, "rewards/margins": 2.8005287647247314, "rewards/rejected": -2.518795967102051, "step": 13760 }, { "epoch": 0.55, "learning_rate": 2.493018692065644e-06, "logits/chosen": -3.014403820037842, "logits/rejected": -3.0437235832214355, "logps/chosen": -5.465664386749268, "logps/rejected": -284.15716552734375, "loss": 0.1083, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.25989338755607605, "rewards/margins": 2.749290943145752, "rewards/rejected": -2.4893975257873535, "step": 13770 }, { "epoch": 0.55, "learning_rate": 2.489528055111491e-06, "logits/chosen": -3.006603479385376, "logits/rejected": -3.037536859512329, "logps/chosen": -0.1780819594860077, "logps/rejected": -291.91351318359375, "loss": 0.055, "rewards/accuracies": 1.0, "rewards/chosen": 0.315077543258667, "rewards/margins": 2.8773820400238037, "rewards/rejected": -2.5623044967651367, "step": 13780 }, { "epoch": 0.55, "learning_rate": 2.4860374385729298e-06, "logits/chosen": -3.0210375785827637, "logits/rejected": -3.046579122543335, "logps/chosen": -10.95506477355957, "logps/rejected": -276.9764099121094, "loss": 0.168, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.20925593376159668, "rewards/margins": 2.6243882179260254, "rewards/rejected": -2.4151322841644287, "step": 13790 }, { "epoch": 0.55, "learning_rate": 2.482546849255096e-06, "logits/chosen": -3.008763074874878, "logits/rejected": -3.039511203765869, "logps/chosen": -5.841108798980713, "logps/rejected": -283.20147705078125, "loss": 0.1127, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2582980692386627, "rewards/margins": 2.7368664741516113, "rewards/rejected": -2.4785685539245605, "step": 13800 }, { "epoch": 0.55, "eval_logits/chosen": -3.07092547416687, "eval_logits/rejected": -3.0967912673950195, "eval_logps/chosen": -0.14743879437446594, "eval_logps/rejected": -283.7218933105469, "eval_loss": 0.060837019234895706, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31512513756752014, "eval_rewards/margins": 2.7921884059906006, "eval_rewards/rejected": -2.4770634174346924, "eval_runtime": 2.5412, "eval_samples_per_second": 1.968, "eval_steps_per_second": 0.394, "step": 13800 }, { "epoch": 0.55, "learning_rate": 2.4790562939630738e-06, "logits/chosen": -3.008741855621338, "logits/rejected": -3.039854049682617, "logps/chosen": -1.703279733657837, "logps/rejected": -285.10931396484375, "loss": 0.0742, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2982308268547058, "rewards/margins": 2.7984299659729004, "rewards/rejected": -2.500199317932129, "step": 13810 }, { "epoch": 0.55, "learning_rate": 2.475565779501878e-06, "logits/chosen": -3.010131359100342, "logits/rejected": -3.0404510498046875, "logps/chosen": -2.848438024520874, "logps/rejected": -287.2164611816406, "loss": 0.0817, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28654199838638306, "rewards/margins": 2.8069865703582764, "rewards/rejected": -2.520444393157959, "step": 13820 }, { "epoch": 0.55, "learning_rate": 2.472075312676445e-06, "logits/chosen": -3.021087408065796, "logits/rejected": -3.050370216369629, "logps/chosen": -7.193057060241699, "logps/rejected": -283.3706359863281, "loss": 0.1262, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24450984597206116, "rewards/margins": 2.726598024368286, "rewards/rejected": -2.482088088989258, "step": 13830 }, { "epoch": 0.55, "learning_rate": 2.4685849002916184e-06, "logits/chosen": -3.0203018188476562, "logits/rejected": -3.0494627952575684, "logps/chosen": -0.16134077310562134, "logps/rejected": -289.4217224121094, "loss": 0.0567, "rewards/accuracies": 1.0, "rewards/chosen": 0.31575122475624084, "rewards/margins": 2.8545727729797363, "rewards/rejected": -2.5388216972351074, "step": 13840 }, { "epoch": 0.55, "learning_rate": 2.4650945491521372e-06, "logits/chosen": -3.0226399898529053, "logits/rejected": -3.0532660484313965, "logps/chosen": -0.17880259454250336, "logps/rejected": -288.1736145019531, "loss": 0.0574, "rewards/accuracies": 1.0, "rewards/chosen": 0.31514838337898254, "rewards/margins": 2.842736005783081, "rewards/rejected": -2.527587413787842, "step": 13850 }, { "epoch": 0.55, "learning_rate": 2.4616042660626176e-06, "logits/chosen": -2.989835739135742, "logits/rejected": -3.0202982425689697, "logps/chosen": -3.7739906311035156, "logps/rejected": -288.28289794921875, "loss": 0.0909, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2806169390678406, "rewards/margins": 2.80543851852417, "rewards/rejected": -2.5248210430145264, "step": 13860 }, { "epoch": 0.55, "learning_rate": 2.4581140578275473e-06, "logits/chosen": -3.0382862091064453, "logits/rejected": -3.064601421356201, "logps/chosen": -10.123726844787598, "logps/rejected": -281.3382263183594, "loss": 0.1548, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.21510395407676697, "rewards/margins": 2.6743574142456055, "rewards/rejected": -2.4592535495758057, "step": 13870 }, { "epoch": 0.56, "learning_rate": 2.4546239312512633e-06, "logits/chosen": -3.01487398147583, "logits/rejected": -3.042100667953491, "logps/chosen": -9.892324447631836, "logps/rejected": -281.28497314453125, "loss": 0.1519, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.2185075283050537, "rewards/margins": 2.6753134727478027, "rewards/rejected": -2.45680570602417, "step": 13880 }, { "epoch": 0.56, "learning_rate": 2.4511338931379475e-06, "logits/chosen": -3.0067858695983887, "logits/rejected": -3.0329785346984863, "logps/chosen": -13.896360397338867, "logps/rejected": -277.14849853515625, "loss": 0.1907, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.1783135086297989, "rewards/margins": 2.597029209136963, "rewards/rejected": -2.418715238571167, "step": 13890 }, { "epoch": 0.56, "learning_rate": 2.447643950291608e-06, "logits/chosen": -3.020913600921631, "logits/rejected": -3.0490634441375732, "logps/chosen": -3.2613754272460938, "logps/rejected": -288.5361022949219, "loss": 0.0862, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2829523980617523, "rewards/margins": 2.8134100437164307, "rewards/rejected": -2.5304572582244873, "step": 13900 }, { "epoch": 0.56, "eval_logits/chosen": -3.072619676589966, "eval_logits/rejected": -3.098168134689331, "eval_logps/chosen": -0.15801551938056946, "eval_logps/rejected": -283.5950927734375, "eval_loss": 0.06087179109454155, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3150193691253662, "eval_rewards/margins": 2.7908146381378174, "eval_rewards/rejected": -2.475795269012451, "eval_runtime": 2.5397, "eval_samples_per_second": 1.969, "eval_steps_per_second": 0.394, "step": 13900 }, { "epoch": 0.56, "learning_rate": 2.4441541095160664e-06, "logits/chosen": -3.034444570541382, "logits/rejected": -3.0620975494384766, "logps/chosen": -3.931464433670044, "logps/rejected": -287.36383056640625, "loss": 0.0912, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27731087803840637, "rewards/margins": 2.797402858734131, "rewards/rejected": -2.520092010498047, "step": 13910 }, { "epoch": 0.56, "learning_rate": 2.440664377614946e-06, "logits/chosen": -3.0347721576690674, "logits/rejected": -3.0606906414031982, "logps/chosen": -3.5831432342529297, "logps/rejected": -288.25469970703125, "loss": 0.0885, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2797204852104187, "rewards/margins": 2.80995512008667, "rewards/rejected": -2.5302343368530273, "step": 13920 }, { "epoch": 0.56, "learning_rate": 2.4371747613916566e-06, "logits/chosen": -3.0172019004821777, "logits/rejected": -3.0473368167877197, "logps/chosen": -0.19100140035152435, "logps/rejected": -290.4797668457031, "loss": 0.0558, "rewards/accuracies": 1.0, "rewards/chosen": 0.31266048550605774, "rewards/margins": 2.866145372390747, "rewards/rejected": -2.5534846782684326, "step": 13930 }, { "epoch": 0.56, "learning_rate": 2.433685267649385e-06, "logits/chosen": -3.0187618732452393, "logits/rejected": -3.047851085662842, "logps/chosen": -0.18518464267253876, "logps/rejected": -290.9817199707031, "loss": 0.0555, "rewards/accuracies": 1.0, "rewards/chosen": 0.3150179386138916, "rewards/margins": 2.8700523376464844, "rewards/rejected": -2.5550343990325928, "step": 13940 }, { "epoch": 0.56, "learning_rate": 2.4301959031910785e-06, "logits/chosen": -2.9929776191711426, "logits/rejected": -3.024784564971924, "logps/chosen": -0.18955595791339874, "logps/rejected": -291.99078369140625, "loss": 0.0547, "rewards/accuracies": 1.0, "rewards/chosen": 0.3150356411933899, "rewards/margins": 2.8824949264526367, "rewards/rejected": -2.5674593448638916, "step": 13950 }, { "epoch": 0.56, "learning_rate": 2.4267066748194297e-06, "logits/chosen": -3.037381887435913, "logits/rejected": -3.0637214183807373, "logps/chosen": -3.1636576652526855, "logps/rejected": -286.177490234375, "loss": 0.0853, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2878214716911316, "rewards/margins": 2.794520854949951, "rewards/rejected": -2.506699323654175, "step": 13960 }, { "epoch": 0.56, "learning_rate": 2.4232175893368685e-06, "logits/chosen": -3.007483959197998, "logits/rejected": -3.0368688106536865, "logps/chosen": -3.88132905960083, "logps/rejected": -285.4629821777344, "loss": 0.0935, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27743110060691833, "rewards/margins": 2.7758374214172363, "rewards/rejected": -2.498405933380127, "step": 13970 }, { "epoch": 0.56, "learning_rate": 2.4197286535455464e-06, "logits/chosen": -3.0035345554351807, "logits/rejected": -3.0339560508728027, "logps/chosen": -0.5054758787155151, "logps/rejected": -291.87713623046875, "loss": 0.0554, "rewards/accuracies": 1.0, "rewards/chosen": 0.3112233281135559, "rewards/margins": 2.8734397888183594, "rewards/rejected": -2.5622167587280273, "step": 13980 }, { "epoch": 0.56, "learning_rate": 2.4162398742473216e-06, "logits/chosen": -3.0002923011779785, "logits/rejected": -3.028916835784912, "logps/chosen": -7.447993278503418, "logps/rejected": -285.58978271484375, "loss": 0.1267, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24056899547576904, "rewards/margins": 2.7434403896331787, "rewards/rejected": -2.50287127494812, "step": 13990 }, { "epoch": 0.56, "learning_rate": 2.4127512582437486e-06, "logits/chosen": -3.001197576522827, "logits/rejected": -3.0314269065856934, "logps/chosen": -3.167604446411133, "logps/rejected": -286.00860595703125, "loss": 0.0862, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2861153483390808, "rewards/margins": 2.7911651134490967, "rewards/rejected": -2.50504994392395, "step": 14000 }, { "epoch": 0.56, "eval_logits/chosen": -3.0701324939727783, "eval_logits/rejected": -3.095186471939087, "eval_logps/chosen": -0.1697140783071518, "eval_logps/rejected": -283.20672607421875, "eval_loss": 0.061321038752794266, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3149023652076721, "eval_rewards/margins": 2.786813974380493, "eval_rewards/rejected": -2.4719114303588867, "eval_runtime": 2.5393, "eval_samples_per_second": 1.969, "eval_steps_per_second": 0.394, "step": 14000 }, { "epoch": 0.56, "learning_rate": 2.409262812336061e-06, "logits/chosen": -3.019801616668701, "logits/rejected": -3.049100637435913, "logps/chosen": -0.26090484857559204, "logps/rejected": -290.3235168457031, "loss": 0.0564, "rewards/accuracies": 1.0, "rewards/chosen": 0.31682881712913513, "rewards/margins": 2.86055326461792, "rewards/rejected": -2.543724536895752, "step": 14010 }, { "epoch": 0.56, "learning_rate": 2.4057745433251637e-06, "logits/chosen": -3.0254814624786377, "logits/rejected": -3.052960157394409, "logps/chosen": -3.421407699584961, "logps/rejected": -285.63714599609375, "loss": 0.093, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28503233194351196, "rewards/margins": 2.7845394611358643, "rewards/rejected": -2.499507427215576, "step": 14020 }, { "epoch": 0.56, "learning_rate": 2.4022864580116163e-06, "logits/chosen": -2.995932102203369, "logits/rejected": -3.0257599353790283, "logps/chosen": -5.256276607513428, "logps/rejected": -286.3226013183594, "loss": 0.093, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2640056014060974, "rewards/margins": 2.7737433910369873, "rewards/rejected": -2.5097382068634033, "step": 14030 }, { "epoch": 0.56, "learning_rate": 2.398798563195619e-06, "logits/chosen": -3.015389919281006, "logits/rejected": -3.0389957427978516, "logps/chosen": -12.180743217468262, "logps/rejected": -280.499267578125, "loss": 0.1587, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.19535455107688904, "rewards/margins": 2.648885488510132, "rewards/rejected": -2.453530788421631, "step": 14040 }, { "epoch": 0.56, "learning_rate": 2.3953108656770018e-06, "logits/chosen": -3.0181145668029785, "logits/rejected": -3.046302318572998, "logps/chosen": -2.241792917251587, "logps/rejected": -289.81378173828125, "loss": 0.0718, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2939417064189911, "rewards/margins": 2.837796211242676, "rewards/rejected": -2.5438544750213623, "step": 14050 }, { "epoch": 0.56, "learning_rate": 2.391823372255208e-06, "logits/chosen": -3.0078275203704834, "logits/rejected": -3.0384159088134766, "logps/chosen": -0.2525549530982971, "logps/rejected": -292.0185546875, "loss": 0.0551, "rewards/accuracies": 1.0, "rewards/chosen": 0.3138180375099182, "rewards/margins": 2.8768861293792725, "rewards/rejected": -2.563068151473999, "step": 14060 }, { "epoch": 0.56, "learning_rate": 2.3883360897292852e-06, "logits/chosen": -3.0162136554718018, "logits/rejected": -3.0456461906433105, "logps/chosen": -0.20526733994483948, "logps/rejected": -290.9259338378906, "loss": 0.0558, "rewards/accuracies": 1.0, "rewards/chosen": 0.3167805075645447, "rewards/margins": 2.868041753768921, "rewards/rejected": -2.5512614250183105, "step": 14070 }, { "epoch": 0.56, "learning_rate": 2.3848490248978693e-06, "logits/chosen": -3.028829574584961, "logits/rejected": -3.055732488632202, "logps/chosen": -0.17330043017864227, "logps/rejected": -289.593994140625, "loss": 0.0563, "rewards/accuracies": 1.0, "rewards/chosen": 0.3141116499900818, "rewards/margins": 2.8594865798950195, "rewards/rejected": -2.545374870300293, "step": 14080 }, { "epoch": 0.56, "learning_rate": 2.381362184559173e-06, "logits/chosen": -2.982353687286377, "logits/rejected": -3.013545274734497, "logps/chosen": -2.890561819076538, "logps/rejected": -287.28778076171875, "loss": 0.0832, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2889304757118225, "rewards/margins": 2.804783821105957, "rewards/rejected": -2.5158531665802, "step": 14090 }, { "epoch": 0.56, "learning_rate": 2.377875575510967e-06, "logits/chosen": -3.0115323066711426, "logits/rejected": -3.0408520698547363, "logps/chosen": -3.7966792583465576, "logps/rejected": -289.4259338378906, "loss": 0.0904, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2798762917518616, "rewards/margins": 2.8194780349731445, "rewards/rejected": -2.5396018028259277, "step": 14100 }, { "epoch": 0.56, "eval_logits/chosen": -3.0711851119995117, "eval_logits/rejected": -3.096353769302368, "eval_logps/chosen": -0.17844030261039734, "eval_logps/rejected": -283.4654235839844, "eval_loss": 0.06113709136843681, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31481510400772095, "eval_rewards/margins": 2.789314031600952, "eval_rewards/rejected": -2.474498748779297, "eval_runtime": 2.5413, "eval_samples_per_second": 1.967, "eval_steps_per_second": 0.393, "step": 14100 }, { "epoch": 0.56, "learning_rate": 2.3743892045505764e-06, "logits/chosen": -3.0339014530181885, "logits/rejected": -3.0652871131896973, "logps/chosen": -0.3860131800174713, "logps/rejected": -290.3527526855469, "loss": 0.0566, "rewards/accuracies": 1.0, "rewards/chosen": 0.31670695543289185, "rewards/margins": 2.8609135150909424, "rewards/rejected": -2.544206142425537, "step": 14110 }, { "epoch": 0.56, "learning_rate": 2.370903078474859e-06, "logits/chosen": -3.039923906326294, "logits/rejected": -3.0694680213928223, "logps/chosen": -0.17025111615657806, "logps/rejected": -291.9432678222656, "loss": 0.0549, "rewards/accuracies": 1.0, "rewards/chosen": 0.31476667523384094, "rewards/margins": 2.880906105041504, "rewards/rejected": -2.5661396980285645, "step": 14120 }, { "epoch": 0.57, "learning_rate": 2.3674172040801964e-06, "logits/chosen": -3.01993465423584, "logits/rejected": -3.0457358360290527, "logps/chosen": -7.330478668212891, "logps/rejected": -284.17999267578125, "loss": 0.1265, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24592673778533936, "rewards/margins": 2.730901002883911, "rewards/rejected": -2.4849743843078613, "step": 14130 }, { "epoch": 0.57, "learning_rate": 2.3639315881624776e-06, "logits/chosen": -3.0516357421875, "logits/rejected": -3.077436685562134, "logps/chosen": -0.21909339725971222, "logps/rejected": -292.09918212890625, "loss": 0.055, "rewards/accuracies": 1.0, "rewards/chosen": 0.3162366449832916, "rewards/margins": 2.879201650619507, "rewards/rejected": -2.562964916229248, "step": 14140 }, { "epoch": 0.57, "learning_rate": 2.3604462375170905e-06, "logits/chosen": -3.0276670455932617, "logits/rejected": -3.0569958686828613, "logps/chosen": -7.974130153656006, "logps/rejected": -280.06427001953125, "loss": 0.1327, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.23897287249565125, "rewards/margins": 2.68070912361145, "rewards/rejected": -2.4417364597320557, "step": 14150 }, { "epoch": 0.57, "learning_rate": 2.356961158938905e-06, "logits/chosen": -3.037903308868408, "logits/rejected": -3.065692663192749, "logps/chosen": -3.817610502243042, "logps/rejected": -288.31097412109375, "loss": 0.0911, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2810043692588806, "rewards/margins": 2.8074588775634766, "rewards/rejected": -2.526454448699951, "step": 14160 }, { "epoch": 0.57, "learning_rate": 2.353476359222259e-06, "logits/chosen": -3.020045042037964, "logits/rejected": -3.048449993133545, "logps/chosen": -0.19725710153579712, "logps/rejected": -290.5859375, "loss": 0.0558, "rewards/accuracies": 1.0, "rewards/chosen": 0.314282089471817, "rewards/margins": 2.8660850524902344, "rewards/rejected": -2.55180287361145, "step": 14170 }, { "epoch": 0.57, "learning_rate": 2.349991845160949e-06, "logits/chosen": -3.0204710960388184, "logits/rejected": -3.050901174545288, "logps/chosen": -0.20039930939674377, "logps/rejected": -292.8547058105469, "loss": 0.0543, "rewards/accuracies": 1.0, "rewards/chosen": 0.314754843711853, "rewards/margins": 2.889617443084717, "rewards/rejected": -2.5748627185821533, "step": 14180 }, { "epoch": 0.57, "learning_rate": 2.3465076235482117e-06, "logits/chosen": -3.0134642124176025, "logits/rejected": -3.0419657230377197, "logps/chosen": -0.1984046846628189, "logps/rejected": -290.3363342285156, "loss": 0.0561, "rewards/accuracies": 1.0, "rewards/chosen": 0.3123350739479065, "rewards/margins": 2.8614799976348877, "rewards/rejected": -2.549144983291626, "step": 14190 }, { "epoch": 0.57, "learning_rate": 2.3430237011767166e-06, "logits/chosen": -3.0248324871063232, "logits/rejected": -3.051839828491211, "logps/chosen": -3.845383882522583, "logps/rejected": -285.4247131347656, "loss": 0.0932, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2776484489440918, "rewards/margins": 2.778630256652832, "rewards/rejected": -2.500981569290161, "step": 14200 }, { "epoch": 0.57, "eval_logits/chosen": -3.0697684288024902, "eval_logits/rejected": -3.0954346656799316, "eval_logps/chosen": -0.18482139706611633, "eval_logps/rejected": -283.85931396484375, "eval_loss": 0.060768116265535355, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3147513270378113, "eval_rewards/margins": 2.7931888103485107, "eval_rewards/rejected": -2.478437662124634, "eval_runtime": 2.538, "eval_samples_per_second": 1.97, "eval_steps_per_second": 0.394, "step": 14200 }, { "epoch": 0.57, "learning_rate": 2.3395400848385486e-06, "logits/chosen": -3.0273563861846924, "logits/rejected": -3.052476644515991, "logps/chosen": -7.143075466156006, "logps/rejected": -282.12469482421875, "loss": 0.1256, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24661557376384735, "rewards/margins": 2.712862014770508, "rewards/rejected": -2.4662468433380127, "step": 14210 }, { "epoch": 0.57, "learning_rate": 2.336056781325197e-06, "logits/chosen": -3.0383477210998535, "logits/rejected": -3.06378436088562, "logps/chosen": -0.18566061556339264, "logps/rejected": -289.69146728515625, "loss": 0.0564, "rewards/accuracies": 1.0, "rewards/chosen": 0.31641408801078796, "rewards/margins": 2.8564863204956055, "rewards/rejected": -2.540072202682495, "step": 14220 }, { "epoch": 0.57, "learning_rate": 2.3325737974275382e-06, "logits/chosen": -3.0377743244171143, "logits/rejected": -3.0679643154144287, "logps/chosen": -3.5461552143096924, "logps/rejected": -285.091064453125, "loss": 0.0904, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28205448389053345, "rewards/margins": 2.7772128582000732, "rewards/rejected": -2.4951581954956055, "step": 14230 }, { "epoch": 0.57, "learning_rate": 2.3290911399358287e-06, "logits/chosen": -3.0182862281799316, "logits/rejected": -3.0458009243011475, "logps/chosen": -6.11733865737915, "logps/rejected": -279.00653076171875, "loss": 0.1185, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2548183798789978, "rewards/margins": 2.6947824954986572, "rewards/rejected": -2.4399640560150146, "step": 14240 }, { "epoch": 0.57, "learning_rate": 2.325608815639687e-06, "logits/chosen": -3.0049102306365967, "logits/rejected": -3.0333895683288574, "logps/chosen": -3.081688404083252, "logps/rejected": -290.7719421386719, "loss": 0.0677, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2858283817768097, "rewards/margins": 2.838259696960449, "rewards/rejected": -2.552431106567383, "step": 14250 }, { "epoch": 0.57, "learning_rate": 2.3221268313280836e-06, "logits/chosen": -3.029466152191162, "logits/rejected": -3.0577163696289062, "logps/chosen": -0.4066082835197449, "logps/rejected": -288.59149169921875, "loss": 0.0574, "rewards/accuracies": 1.0, "rewards/chosen": 0.3125549852848053, "rewards/margins": 2.845214366912842, "rewards/rejected": -2.5326590538024902, "step": 14260 }, { "epoch": 0.57, "learning_rate": 2.3186451937893234e-06, "logits/chosen": -3.010143518447876, "logits/rejected": -3.038078784942627, "logps/chosen": -6.442028999328613, "logps/rejected": -284.5791931152344, "loss": 0.1182, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.251809298992157, "rewards/margins": 2.742827892303467, "rewards/rejected": -2.491018772125244, "step": 14270 }, { "epoch": 0.57, "learning_rate": 2.315163909811038e-06, "logits/chosen": -3.0114665031433105, "logits/rejected": -3.0401451587677, "logps/chosen": -0.21424174308776855, "logps/rejected": -289.7803039550781, "loss": 0.0566, "rewards/accuracies": 1.0, "rewards/chosen": 0.31460344791412354, "rewards/margins": 2.854449510574341, "rewards/rejected": -2.539846420288086, "step": 14280 }, { "epoch": 0.57, "learning_rate": 2.3116829861801687e-06, "logits/chosen": -3.0200679302215576, "logits/rejected": -3.0474820137023926, "logps/chosen": -2.9562089443206787, "logps/rejected": -288.11529541015625, "loss": 0.0814, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.29031795263290405, "rewards/margins": 2.8160500526428223, "rewards/rejected": -2.5257325172424316, "step": 14290 }, { "epoch": 0.57, "learning_rate": 2.3082024296829538e-06, "logits/chosen": -3.0324361324310303, "logits/rejected": -3.061614513397217, "logps/chosen": -2.435300350189209, "logps/rejected": -289.30242919921875, "loss": 0.0765, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2934972643852234, "rewards/margins": 2.827423572540283, "rewards/rejected": -2.533926010131836, "step": 14300 }, { "epoch": 0.57, "eval_logits/chosen": -3.071510076522827, "eval_logits/rejected": -3.097076177597046, "eval_logps/chosen": -0.19696560502052307, "eval_logps/rejected": -283.60894775390625, "eval_loss": 0.06097652390599251, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3146298825740814, "eval_rewards/margins": 2.7905635833740234, "eval_rewards/rejected": -2.475933790206909, "eval_runtime": 2.5386, "eval_samples_per_second": 1.97, "eval_steps_per_second": 0.394, "step": 14300 }, { "epoch": 0.57, "learning_rate": 2.304722247104917e-06, "logits/chosen": -3.037518262863159, "logits/rejected": -3.0619804859161377, "logps/chosen": -6.441178798675537, "logps/rejected": -282.45343017578125, "loss": 0.1178, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.25117021799087524, "rewards/margins": 2.721862316131592, "rewards/rejected": -2.470691680908203, "step": 14310 }, { "epoch": 0.57, "learning_rate": 2.301242445230851e-06, "logits/chosen": -3.0070862770080566, "logits/rejected": -3.0371158123016357, "logps/chosen": -2.491731643676758, "logps/rejected": -290.84765625, "loss": 0.0633, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.29018688201904297, "rewards/margins": 2.8466708660125732, "rewards/rejected": -2.556483745574951, "step": 14320 }, { "epoch": 0.57, "learning_rate": 2.2977630308448084e-06, "logits/chosen": -2.986860752105713, "logits/rejected": -3.0180439949035645, "logps/chosen": -6.488661766052246, "logps/rejected": -283.1789245605469, "loss": 0.1196, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2515699863433838, "rewards/margins": 2.727694034576416, "rewards/rejected": -2.4761242866516113, "step": 14330 }, { "epoch": 0.57, "learning_rate": 2.294284010730086e-06, "logits/chosen": -3.0313189029693604, "logits/rejected": -3.05894136428833, "logps/chosen": -3.0891172885894775, "logps/rejected": -288.6536560058594, "loss": 0.0837, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28519853949546814, "rewards/margins": 2.8187966346740723, "rewards/rejected": -2.5335984230041504, "step": 14340 }, { "epoch": 0.57, "learning_rate": 2.290805391669212e-06, "logits/chosen": -3.011932134628296, "logits/rejected": -3.03855562210083, "logps/chosen": -3.26593279838562, "logps/rejected": -288.20233154296875, "loss": 0.0863, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2836834788322449, "rewards/margins": 2.8082175254821777, "rewards/rejected": -2.524534225463867, "step": 14350 }, { "epoch": 0.57, "learning_rate": 2.28732718044393e-06, "logits/chosen": -2.9941964149475098, "logits/rejected": -3.0234837532043457, "logps/chosen": -3.0324552059173584, "logps/rejected": -287.202392578125, "loss": 0.0835, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28533750772476196, "rewards/margins": 2.8068554401397705, "rewards/rejected": -2.521517515182495, "step": 14360 }, { "epoch": 0.57, "learning_rate": 2.2838493838351933e-06, "logits/chosen": -3.0041332244873047, "logits/rejected": -3.033130168914795, "logps/chosen": -0.1627131998538971, "logps/rejected": -291.34906005859375, "loss": 0.0552, "rewards/accuracies": 1.0, "rewards/chosen": 0.31494826078414917, "rewards/margins": 2.876325845718384, "rewards/rejected": -2.56137752532959, "step": 14370 }, { "epoch": 0.58, "learning_rate": 2.280372008623142e-06, "logits/chosen": -3.017918109893799, "logits/rejected": -3.0464606285095215, "logps/chosen": -3.7423393726348877, "logps/rejected": -289.13104248046875, "loss": 0.0899, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2796512246131897, "rewards/margins": 2.8169336318969727, "rewards/rejected": -2.5372824668884277, "step": 14380 }, { "epoch": 0.58, "learning_rate": 2.276895061587099e-06, "logits/chosen": -3.020650863647461, "logits/rejected": -3.048799991607666, "logps/chosen": -0.17330783605575562, "logps/rejected": -290.26983642578125, "loss": 0.0557, "rewards/accuracies": 1.0, "rewards/chosen": 0.31397709250450134, "rewards/margins": 2.869605302810669, "rewards/rejected": -2.5556282997131348, "step": 14390 }, { "epoch": 0.58, "learning_rate": 2.2734185495055503e-06, "logits/chosen": -3.0123085975646973, "logits/rejected": -3.041384696960449, "logps/chosen": -2.400379180908203, "logps/rejected": -289.42803955078125, "loss": 0.076, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.29410094022750854, "rewards/margins": 2.8318002223968506, "rewards/rejected": -2.5376992225646973, "step": 14400 }, { "epoch": 0.58, "eval_logits/chosen": -3.0716021060943604, "eval_logits/rejected": -3.097409248352051, "eval_logps/chosen": -0.1812267005443573, "eval_logps/rejected": -284.23077392578125, "eval_loss": 0.06040232256054878, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31478726863861084, "eval_rewards/margins": 2.7969393730163574, "eval_rewards/rejected": -2.482151985168457, "eval_runtime": 2.5432, "eval_samples_per_second": 1.966, "eval_steps_per_second": 0.393, "step": 14400 }, { "epoch": 0.58, "learning_rate": 2.2699424791561324e-06, "logits/chosen": -3.0310988426208496, "logits/rejected": -3.05678653717041, "logps/chosen": -11.956021308898926, "logps/rejected": -278.1504211425781, "loss": 0.1704, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.19834569096565247, "rewards/margins": 2.6294097900390625, "rewards/rejected": -2.4310638904571533, "step": 14410 }, { "epoch": 0.58, "learning_rate": 2.266466857315624e-06, "logits/chosen": -3.018555164337158, "logits/rejected": -3.0466771125793457, "logps/chosen": -3.9663567543029785, "logps/rejected": -285.5011291503906, "loss": 0.0935, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2775769829750061, "rewards/margins": 2.777982234954834, "rewards/rejected": -2.5004050731658936, "step": 14420 }, { "epoch": 0.58, "learning_rate": 2.2629916907599265e-06, "logits/chosen": -3.0001189708709717, "logits/rejected": -3.029743194580078, "logps/chosen": -0.4127276539802551, "logps/rejected": -289.225830078125, "loss": 0.0573, "rewards/accuracies": 1.0, "rewards/chosen": 0.31194180250167847, "rewards/margins": 2.8496813774108887, "rewards/rejected": -2.5377397537231445, "step": 14430 }, { "epoch": 0.58, "learning_rate": 2.259516986264057e-06, "logits/chosen": -3.039996385574341, "logits/rejected": -3.0653228759765625, "logps/chosen": -3.7251648902893066, "logps/rejected": -286.24652099609375, "loss": 0.091, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28105515241622925, "rewards/margins": 2.790008783340454, "rewards/rejected": -2.50895357131958, "step": 14440 }, { "epoch": 0.58, "learning_rate": 2.256042750602127e-06, "logits/chosen": -3.0110554695129395, "logits/rejected": -3.0395941734313965, "logps/chosen": -0.19756671786308289, "logps/rejected": -290.9737854003906, "loss": 0.0557, "rewards/accuracies": 1.0, "rewards/chosen": 0.3133869171142578, "rewards/margins": 2.8672518730163574, "rewards/rejected": -2.5538644790649414, "step": 14450 }, { "epoch": 0.58, "learning_rate": 2.2525689905473377e-06, "logits/chosen": -3.0138068199157715, "logits/rejected": -3.0428216457366943, "logps/chosen": -0.21459996700286865, "logps/rejected": -292.1846008300781, "loss": 0.0546, "rewards/accuracies": 1.0, "rewards/chosen": 0.3153606057167053, "rewards/margins": 2.885138750076294, "rewards/rejected": -2.5697779655456543, "step": 14460 }, { "epoch": 0.58, "learning_rate": 2.2490957128719627e-06, "logits/chosen": -3.008716106414795, "logits/rejected": -3.0374646186828613, "logps/chosen": -3.8086581230163574, "logps/rejected": -286.2989196777344, "loss": 0.0917, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2758113741874695, "rewards/margins": 2.7870826721191406, "rewards/rejected": -2.5112714767456055, "step": 14470 }, { "epoch": 0.58, "learning_rate": 2.2456229243473346e-06, "logits/chosen": -3.017639398574829, "logits/rejected": -3.0444464683532715, "logps/chosen": -0.20520198345184326, "logps/rejected": -289.64776611328125, "loss": 0.0565, "rewards/accuracies": 1.0, "rewards/chosen": 0.3145299553871155, "rewards/margins": 2.8543124198913574, "rewards/rejected": -2.539782762527466, "step": 14480 }, { "epoch": 0.58, "learning_rate": 2.242150631743832e-06, "logits/chosen": -3.022055149078369, "logits/rejected": -3.050915241241455, "logps/chosen": -2.6723685264587402, "logps/rejected": -288.5283508300781, "loss": 0.0765, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28976136445999146, "rewards/margins": 2.818626880645752, "rewards/rejected": -2.5288655757904053, "step": 14490 }, { "epoch": 0.58, "learning_rate": 2.238678841830867e-06, "logits/chosen": -3.018235445022583, "logits/rejected": -3.0452399253845215, "logps/chosen": -0.18793928623199463, "logps/rejected": -291.2825927734375, "loss": 0.0553, "rewards/accuracies": 1.0, "rewards/chosen": 0.3160495460033417, "rewards/margins": 2.8751673698425293, "rewards/rejected": -2.559117555618286, "step": 14500 }, { "epoch": 0.58, "eval_logits/chosen": -3.072190046310425, "eval_logits/rejected": -3.096177577972412, "eval_logps/chosen": -0.2187124788761139, "eval_logps/rejected": -283.8933410644531, "eval_loss": 0.06075149029493332, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3144124150276184, "eval_rewards/margins": 2.7931902408599854, "eval_rewards/rejected": -2.4787776470184326, "eval_runtime": 2.5391, "eval_samples_per_second": 1.969, "eval_steps_per_second": 0.394, "step": 14500 }, { "epoch": 0.58, "learning_rate": 2.235207561376871e-06, "logits/chosen": -3.006836175918579, "logits/rejected": -3.0343596935272217, "logps/chosen": -1.7162567377090454, "logps/rejected": -288.9228820800781, "loss": 0.0593, "rewards/accuracies": 1.0, "rewards/chosen": 0.2989612817764282, "rewards/margins": 2.835975408554077, "rewards/rejected": -2.5370144844055176, "step": 14510 }, { "epoch": 0.58, "learning_rate": 2.2317367971492833e-06, "logits/chosen": -3.010366201400757, "logits/rejected": -3.0391478538513184, "logps/chosen": -0.3367602229118347, "logps/rejected": -291.8067932128906, "loss": 0.0552, "rewards/accuracies": 1.0, "rewards/chosen": 0.3147231936454773, "rewards/margins": 2.877040386199951, "rewards/rejected": -2.562317371368408, "step": 14520 }, { "epoch": 0.58, "learning_rate": 2.228266555914538e-06, "logits/chosen": -3.0280814170837402, "logits/rejected": -3.053567886352539, "logps/chosen": -6.163389205932617, "logps/rejected": -281.1399841308594, "loss": 0.1137, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.25533995032310486, "rewards/margins": 2.713179349899292, "rewards/rejected": -2.4578394889831543, "step": 14530 }, { "epoch": 0.58, "learning_rate": 2.224796844438045e-06, "logits/chosen": -2.9909884929656982, "logits/rejected": -3.0208613872528076, "logps/chosen": -4.692597389221191, "logps/rejected": -282.2137145996094, "loss": 0.1008, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.26839321851730347, "rewards/margins": 2.7398860454559326, "rewards/rejected": -2.4714930057525635, "step": 14540 }, { "epoch": 0.58, "learning_rate": 2.2213276694841866e-06, "logits/chosen": -3.0147604942321777, "logits/rejected": -3.0478923320770264, "logps/chosen": -3.114412307739258, "logps/rejected": -286.44586181640625, "loss": 0.0856, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28486138582229614, "rewards/margins": 2.7949366569519043, "rewards/rejected": -2.510075092315674, "step": 14550 }, { "epoch": 0.58, "learning_rate": 2.2178590378162957e-06, "logits/chosen": -3.0067801475524902, "logits/rejected": -3.038072347640991, "logps/chosen": -2.389739513397217, "logps/rejected": -282.06439208984375, "loss": 0.0836, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2936088442802429, "rewards/margins": 2.7626547813415527, "rewards/rejected": -2.469046115875244, "step": 14560 }, { "epoch": 0.58, "learning_rate": 2.2143909561966494e-06, "logits/chosen": -3.0322375297546387, "logits/rejected": -3.0606160163879395, "logps/chosen": -3.143942356109619, "logps/rejected": -289.8232727050781, "loss": 0.084, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2874149680137634, "rewards/margins": 2.8250458240509033, "rewards/rejected": -2.537630558013916, "step": 14570 }, { "epoch": 0.58, "learning_rate": 2.2109234313864468e-06, "logits/chosen": -2.998522996902466, "logits/rejected": -3.031932830810547, "logps/chosen": -0.1587749570608139, "logps/rejected": -292.8255310058594, "loss": 0.0542, "rewards/accuracies": 1.0, "rewards/chosen": 0.3138037323951721, "rewards/margins": 2.89030385017395, "rewards/rejected": -2.5764999389648438, "step": 14580 }, { "epoch": 0.58, "learning_rate": 2.207456470145807e-06, "logits/chosen": -3.0024566650390625, "logits/rejected": -3.0331344604492188, "logps/chosen": -6.720869541168213, "logps/rejected": -282.2977600097656, "loss": 0.1222, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24762502312660217, "rewards/margins": 2.718202829360962, "rewards/rejected": -2.4705777168273926, "step": 14590 }, { "epoch": 0.58, "learning_rate": 2.2039900792337477e-06, "logits/chosen": -2.99699068069458, "logits/rejected": -3.0265960693359375, "logps/chosen": -3.907764434814453, "logps/rejected": -281.47412109375, "loss": 0.0975, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.27601951360702515, "rewards/margins": 2.7392210960388184, "rewards/rejected": -2.4632017612457275, "step": 14600 }, { "epoch": 0.58, "eval_logits/chosen": -3.0709588527679443, "eval_logits/rejected": -3.097324848175049, "eval_logps/chosen": -0.1702142059803009, "eval_logps/rejected": -284.26971435546875, "eval_loss": 0.06037729233503342, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.314897358417511, "eval_rewards/margins": 2.797438859939575, "eval_rewards/rejected": -2.482541561126709, "eval_runtime": 2.5385, "eval_samples_per_second": 1.97, "eval_steps_per_second": 0.394, "step": 14600 }, { "epoch": 0.58, "learning_rate": 2.2005242654081765e-06, "logits/chosen": -3.003627300262451, "logits/rejected": -3.036254405975342, "logps/chosen": -0.3475344777107239, "logps/rejected": -290.679443359375, "loss": 0.0561, "rewards/accuracies": 1.0, "rewards/chosen": 0.3127504885196686, "rewards/margins": 2.865990161895752, "rewards/rejected": -2.5532400608062744, "step": 14610 }, { "epoch": 0.58, "learning_rate": 2.1970590354258745e-06, "logits/chosen": -3.008425235748291, "logits/rejected": -3.0397403240203857, "logps/chosen": -3.029920816421509, "logps/rejected": -288.6434631347656, "loss": 0.0826, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2860594391822815, "rewards/margins": 2.821418285369873, "rewards/rejected": -2.5353591442108154, "step": 14620 }, { "epoch": 0.59, "learning_rate": 2.1935943960424834e-06, "logits/chosen": -3.0061373710632324, "logits/rejected": -3.037902593612671, "logps/chosen": -0.281788170337677, "logps/rejected": -289.4366760253906, "loss": 0.0573, "rewards/accuracies": 1.0, "rewards/chosen": 0.3131239712238312, "rewards/margins": 2.8545188903808594, "rewards/rejected": -2.5413947105407715, "step": 14630 }, { "epoch": 0.59, "learning_rate": 2.1901303540124956e-06, "logits/chosen": -3.012596607208252, "logits/rejected": -3.0409953594207764, "logps/chosen": -0.1912412941455841, "logps/rejected": -291.46270751953125, "loss": 0.0553, "rewards/accuracies": 1.0, "rewards/chosen": 0.31575217843055725, "rewards/margins": 2.8751418590545654, "rewards/rejected": -2.559389591217041, "step": 14640 }, { "epoch": 0.59, "learning_rate": 2.186666916089239e-06, "logits/chosen": -3.0249266624450684, "logits/rejected": -3.052561044692993, "logps/chosen": -5.933615684509277, "logps/rejected": -285.9869384765625, "loss": 0.1111, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.25785011053085327, "rewards/margins": 2.7629873752593994, "rewards/rejected": -2.5051369667053223, "step": 14650 }, { "epoch": 0.59, "learning_rate": 2.183204089024864e-06, "logits/chosen": -3.0375559329986572, "logits/rejected": -3.065814971923828, "logps/chosen": -0.3004741072654724, "logps/rejected": -289.59808349609375, "loss": 0.0585, "rewards/accuracies": 1.0, "rewards/chosen": 0.3162166476249695, "rewards/margins": 2.8568930625915527, "rewards/rejected": -2.5406765937805176, "step": 14660 }, { "epoch": 0.59, "learning_rate": 2.179741879570327e-06, "logits/chosen": -3.0386407375335693, "logits/rejected": -3.067000150680542, "logps/chosen": -0.3141588270664215, "logps/rejected": -287.44757080078125, "loss": 0.0631, "rewards/accuracies": 1.0, "rewards/chosen": 0.31392064690589905, "rewards/margins": 2.832815170288086, "rewards/rejected": -2.518894672393799, "step": 14670 }, { "epoch": 0.59, "learning_rate": 2.176280294475383e-06, "logits/chosen": -3.0222277641296387, "logits/rejected": -3.0536398887634277, "logps/chosen": -2.7025203704833984, "logps/rejected": -288.5404357910156, "loss": 0.08, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2889368236064911, "rewards/margins": 2.81805419921875, "rewards/rejected": -2.5291175842285156, "step": 14680 }, { "epoch": 0.59, "learning_rate": 2.17281934048857e-06, "logits/chosen": -3.0102009773254395, "logits/rejected": -3.0419278144836426, "logps/chosen": -1.9398090839385986, "logps/rejected": -287.7654113769531, "loss": 0.0715, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.29771238565444946, "rewards/margins": 2.8191041946411133, "rewards/rejected": -2.5213918685913086, "step": 14690 }, { "epoch": 0.59, "learning_rate": 2.1693590243571937e-06, "logits/chosen": -3.012303590774536, "logits/rejected": -3.043775796890259, "logps/chosen": -0.17204679548740387, "logps/rejected": -292.32794189453125, "loss": 0.0544, "rewards/accuracies": 1.0, "rewards/chosen": 0.31531426310539246, "rewards/margins": 2.8876194953918457, "rewards/rejected": -2.572305202484131, "step": 14700 }, { "epoch": 0.59, "eval_logits/chosen": -3.070826292037964, "eval_logits/rejected": -3.096404790878296, "eval_logps/chosen": -0.18395750224590302, "eval_logps/rejected": -283.9458312988281, "eval_loss": 0.060692690312862396, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3147599399089813, "eval_rewards/margins": 2.794062614440918, "eval_rewards/rejected": -2.4793026447296143, "eval_runtime": 2.5359, "eval_samples_per_second": 1.972, "eval_steps_per_second": 0.394, "step": 14700 }, { "epoch": 0.59, "learning_rate": 2.1658993528273196e-06, "logits/chosen": -3.033479690551758, "logits/rejected": -3.0630500316619873, "logps/chosen": -0.20733892917633057, "logps/rejected": -290.8169860839844, "loss": 0.0556, "rewards/accuracies": 1.0, "rewards/chosen": 0.3156730830669403, "rewards/margins": 2.870093584060669, "rewards/rejected": -2.554420232772827, "step": 14710 }, { "epoch": 0.59, "learning_rate": 2.1624403326437523e-06, "logits/chosen": -3.027841567993164, "logits/rejected": -3.052030563354492, "logps/chosen": -9.655538558959961, "logps/rejected": -282.56927490234375, "loss": 0.1482, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.22135181725025177, "rewards/margins": 2.6933035850524902, "rewards/rejected": -2.471952199935913, "step": 14720 }, { "epoch": 0.59, "learning_rate": 2.1589819705500293e-06, "logits/chosen": -3.0481462478637695, "logits/rejected": -3.0752222537994385, "logps/chosen": -2.085939884185791, "logps/rejected": -287.34490966796875, "loss": 0.0742, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2975473999977112, "rewards/margins": 2.8141415119171143, "rewards/rejected": -2.5165939331054688, "step": 14730 }, { "epoch": 0.59, "learning_rate": 2.155524273288405e-06, "logits/chosen": -3.0208239555358887, "logits/rejected": -3.049532413482666, "logps/chosen": -3.828481674194336, "logps/rejected": -289.5250549316406, "loss": 0.0903, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2802647650241852, "rewards/margins": 2.819514274597168, "rewards/rejected": -2.539249897003174, "step": 14740 }, { "epoch": 0.59, "learning_rate": 2.1520672475998374e-06, "logits/chosen": -3.023890972137451, "logits/rejected": -3.0529978275299072, "logps/chosen": -3.7762863636016846, "logps/rejected": -286.2662658691406, "loss": 0.0917, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27928626537323, "rewards/margins": 2.7870893478393555, "rewards/rejected": -2.507803440093994, "step": 14750 }, { "epoch": 0.59, "learning_rate": 2.148610900223973e-06, "logits/chosen": -3.026700496673584, "logits/rejected": -3.0572409629821777, "logps/chosen": -0.1828574538230896, "logps/rejected": -293.1933288574219, "loss": 0.0541, "rewards/accuracies": 1.0, "rewards/chosen": 0.3167140483856201, "rewards/margins": 2.892150402069092, "rewards/rejected": -2.5754361152648926, "step": 14760 }, { "epoch": 0.59, "learning_rate": 2.145155237899139e-06, "logits/chosen": -3.0296409130096436, "logits/rejected": -3.055591344833374, "logps/chosen": -0.15701189637184143, "logps/rejected": -290.7105407714844, "loss": 0.0556, "rewards/accuracies": 1.0, "rewards/chosen": 0.3156682550907135, "rewards/margins": 2.8701066970825195, "rewards/rejected": -2.554438829421997, "step": 14770 }, { "epoch": 0.59, "learning_rate": 2.141700267362327e-06, "logits/chosen": -3.017487049102783, "logits/rejected": -3.048340082168579, "logps/chosen": -3.888868808746338, "logps/rejected": -288.1048889160156, "loss": 0.0911, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2794593274593353, "rewards/margins": 2.804715394973755, "rewards/rejected": -2.5252559185028076, "step": 14780 }, { "epoch": 0.59, "learning_rate": 2.1382459953491773e-06, "logits/chosen": -2.9993948936462402, "logits/rejected": -3.030621290206909, "logps/chosen": -1.9221851825714111, "logps/rejected": -287.48858642578125, "loss": 0.0744, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2969356179237366, "rewards/margins": 2.818211317062378, "rewards/rejected": -2.521275520324707, "step": 14790 }, { "epoch": 0.59, "learning_rate": 2.134792428593971e-06, "logits/chosen": -3.0303256511688232, "logits/rejected": -3.060375452041626, "logps/chosen": -0.16741900146007538, "logps/rejected": -292.64337158203125, "loss": 0.0545, "rewards/accuracies": 1.0, "rewards/chosen": 0.3183608651161194, "rewards/margins": 2.885284423828125, "rewards/rejected": -2.566923141479492, "step": 14800 }, { "epoch": 0.59, "eval_logits/chosen": -3.0717155933380127, "eval_logits/rejected": -3.097196102142334, "eval_logps/chosen": -0.15153655409812927, "eval_logps/rejected": -284.1385192871094, "eval_loss": 0.060518551617860794, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31508415937423706, "eval_rewards/margins": 2.796313524246216, "eval_rewards/rejected": -2.481229305267334, "eval_runtime": 2.5382, "eval_samples_per_second": 1.97, "eval_steps_per_second": 0.394, "step": 14800 }, { "epoch": 0.59, "learning_rate": 2.1313395738296134e-06, "logits/chosen": -3.0128448009490967, "logits/rejected": -3.0407767295837402, "logps/chosen": -3.8257102966308594, "logps/rejected": -289.092529296875, "loss": 0.0908, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28416144847869873, "rewards/margins": 2.814866542816162, "rewards/rejected": -2.530705213546753, "step": 14810 }, { "epoch": 0.59, "learning_rate": 2.1278874377876196e-06, "logits/chosen": -3.0209081172943115, "logits/rejected": -3.051530122756958, "logps/chosen": -3.6877732276916504, "logps/rejected": -286.6667785644531, "loss": 0.0911, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2794848084449768, "rewards/margins": 2.792614698410034, "rewards/rejected": -2.513129711151123, "step": 14820 }, { "epoch": 0.59, "learning_rate": 2.1244360271981073e-06, "logits/chosen": -3.010169506072998, "logits/rejected": -3.0396132469177246, "logps/chosen": -2.802638530731201, "logps/rejected": -288.05084228515625, "loss": 0.0807, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2872433662414551, "rewards/margins": 2.816610813140869, "rewards/rejected": -2.529367685317993, "step": 14830 }, { "epoch": 0.59, "learning_rate": 2.1209853487897785e-06, "logits/chosen": -3.029618740081787, "logits/rejected": -3.0597147941589355, "logps/chosen": -0.3429928123950958, "logps/rejected": -288.0235595703125, "loss": 0.062, "rewards/accuracies": 1.0, "rewards/chosen": 0.3142208456993103, "rewards/margins": 2.8389008045196533, "rewards/rejected": -2.524679660797119, "step": 14840 }, { "epoch": 0.59, "learning_rate": 2.117535409289905e-06, "logits/chosen": -3.0121006965637207, "logits/rejected": -3.0431199073791504, "logps/chosen": -0.20118775963783264, "logps/rejected": -290.58203125, "loss": 0.0557, "rewards/accuracies": 1.0, "rewards/chosen": 0.3117533326148987, "rewards/margins": 2.8686623573303223, "rewards/rejected": -2.5569088459014893, "step": 14850 }, { "epoch": 0.59, "learning_rate": 2.1140862154243223e-06, "logits/chosen": -3.0211331844329834, "logits/rejected": -3.0512232780456543, "logps/chosen": -3.4622726440429688, "logps/rejected": -289.98541259765625, "loss": 0.0864, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2837655246257782, "rewards/margins": 2.832176446914673, "rewards/rejected": -2.548410654067993, "step": 14860 }, { "epoch": 0.59, "learning_rate": 2.11063777391741e-06, "logits/chosen": -3.0188794136047363, "logits/rejected": -3.047022581100464, "logps/chosen": -4.0814924240112305, "logps/rejected": -286.6539001464844, "loss": 0.093, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2757461965084076, "rewards/margins": 2.7859115600585938, "rewards/rejected": -2.5101654529571533, "step": 14870 }, { "epoch": 0.6, "learning_rate": 2.1071900914920817e-06, "logits/chosen": -3.0185062885284424, "logits/rejected": -3.0515594482421875, "logps/chosen": -0.1801159381866455, "logps/rejected": -290.198486328125, "loss": 0.0559, "rewards/accuracies": 1.0, "rewards/chosen": 0.31524109840393066, "rewards/margins": 2.8644089698791504, "rewards/rejected": -2.5491676330566406, "step": 14880 }, { "epoch": 0.6, "learning_rate": 2.103743174869769e-06, "logits/chosen": -3.0191597938537598, "logits/rejected": -3.0515494346618652, "logps/chosen": -0.21501144766807556, "logps/rejected": -291.8154602050781, "loss": 0.0551, "rewards/accuracies": 1.0, "rewards/chosen": 0.3149668276309967, "rewards/margins": 2.87776517868042, "rewards/rejected": -2.562798023223877, "step": 14890 }, { "epoch": 0.6, "learning_rate": 2.1002970307704134e-06, "logits/chosen": -3.014359951019287, "logits/rejected": -3.045231580734253, "logps/chosen": -3.757671356201172, "logps/rejected": -288.21905517578125, "loss": 0.0902, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27676743268966675, "rewards/margins": 2.8079159259796143, "rewards/rejected": -2.5311484336853027, "step": 14900 }, { "epoch": 0.6, "eval_logits/chosen": -3.070969343185425, "eval_logits/rejected": -3.0979833602905273, "eval_logps/chosen": -0.18286879360675812, "eval_logps/rejected": -284.1324768066406, "eval_loss": 0.060550104826688766, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3147708475589752, "eval_rewards/margins": 2.7959396839141846, "eval_rewards/rejected": -2.4811692237854004, "eval_runtime": 2.5429, "eval_samples_per_second": 1.966, "eval_steps_per_second": 0.393, "step": 14900 }, { "epoch": 0.6, "learning_rate": 2.096851665912449e-06, "logits/chosen": -3.026594638824463, "logits/rejected": -3.055602788925171, "logps/chosen": -3.8868396282196045, "logps/rejected": -284.93121337890625, "loss": 0.0969, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2780694365501404, "rewards/margins": 2.7743005752563477, "rewards/rejected": -2.4962313175201416, "step": 14910 }, { "epoch": 0.6, "learning_rate": 2.093407087012791e-06, "logits/chosen": -3.0027549266815186, "logits/rejected": -3.033437728881836, "logps/chosen": -0.25257426500320435, "logps/rejected": -292.2650451660156, "loss": 0.0548, "rewards/accuracies": 1.0, "rewards/chosen": 0.31299251317977905, "rewards/margins": 2.8824758529663086, "rewards/rejected": -2.569483518600464, "step": 14920 }, { "epoch": 0.6, "learning_rate": 2.0899633007868226e-06, "logits/chosen": -3.036912202835083, "logits/rejected": -3.062687635421753, "logps/chosen": -8.617424011230469, "logps/rejected": -280.5596923828125, "loss": 0.1336, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.23268239200115204, "rewards/margins": 2.683760166168213, "rewards/rejected": -2.451077938079834, "step": 14930 }, { "epoch": 0.6, "learning_rate": 2.086520313948381e-06, "logits/chosen": -3.007552146911621, "logits/rejected": -3.035059690475464, "logps/chosen": -6.2535223960876465, "logps/rejected": -283.4858093261719, "loss": 0.1161, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.25560909509658813, "rewards/margins": 2.734640598297119, "rewards/rejected": -2.4790313243865967, "step": 14940 }, { "epoch": 0.6, "learning_rate": 2.0830781332097446e-06, "logits/chosen": -3.014402389526367, "logits/rejected": -3.044217586517334, "logps/chosen": -2.935580253601074, "logps/rejected": -287.5451354980469, "loss": 0.083, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2871706485748291, "rewards/margins": 2.809180974960327, "rewards/rejected": -2.522010326385498, "step": 14950 }, { "epoch": 0.6, "learning_rate": 2.0796367652816213e-06, "logits/chosen": -3.013498544692993, "logits/rejected": -3.04213285446167, "logps/chosen": -7.901714324951172, "logps/rejected": -278.58319091796875, "loss": 0.1325, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.23732805252075195, "rewards/margins": 2.6693010330200195, "rewards/rejected": -2.4319729804992676, "step": 14960 }, { "epoch": 0.6, "learning_rate": 2.076196216873135e-06, "logits/chosen": -3.022874116897583, "logits/rejected": -3.0538814067840576, "logps/chosen": -0.20356485247612, "logps/rejected": -285.6420593261719, "loss": 0.0591, "rewards/accuracies": 1.0, "rewards/chosen": 0.314289391040802, "rewards/margins": 2.814743995666504, "rewards/rejected": -2.5004546642303467, "step": 14970 }, { "epoch": 0.6, "learning_rate": 2.072756494691809e-06, "logits/chosen": -3.020923376083374, "logits/rejected": -3.0499863624572754, "logps/chosen": -0.3488709330558777, "logps/rejected": -290.2087707519531, "loss": 0.0563, "rewards/accuracies": 1.0, "rewards/chosen": 0.3136270046234131, "rewards/margins": 2.8609867095947266, "rewards/rejected": -2.5473597049713135, "step": 14980 }, { "epoch": 0.6, "learning_rate": 2.0693176054435586e-06, "logits/chosen": -3.002655506134033, "logits/rejected": -3.0323071479797363, "logps/chosen": -1.9331705570220947, "logps/rejected": -287.0559387207031, "loss": 0.0639, "rewards/accuracies": 1.0, "rewards/chosen": 0.2965352237224579, "rewards/margins": 2.8069019317626953, "rewards/rejected": -2.510366439819336, "step": 14990 }, { "epoch": 0.6, "learning_rate": 2.0658795558326745e-06, "logits/chosen": -3.0052714347839355, "logits/rejected": -3.0346262454986572, "logps/chosen": -1.3936196565628052, "logps/rejected": -288.6895446777344, "loss": 0.059, "rewards/accuracies": 1.0, "rewards/chosen": 0.30367574095726013, "rewards/margins": 2.8331940174102783, "rewards/rejected": -2.5295181274414062, "step": 15000 }, { "epoch": 0.6, "eval_logits/chosen": -3.0708272457122803, "eval_logits/rejected": -3.0971832275390625, "eval_logps/chosen": -0.16389837861061096, "eval_logps/rejected": -284.7884216308594, "eval_loss": 0.05994411185383797, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3149605393409729, "eval_rewards/margins": 2.8026890754699707, "eval_rewards/rejected": -2.4877285957336426, "eval_runtime": 2.5391, "eval_samples_per_second": 1.969, "eval_steps_per_second": 0.394, "step": 15000 }, { "epoch": 0.6, "learning_rate": 2.0624423525618097e-06, "logits/chosen": -2.993699550628662, "logits/rejected": -3.0245234966278076, "logps/chosen": -3.8271076679229736, "logps/rejected": -287.11090087890625, "loss": 0.0912, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.279020756483078, "rewards/margins": 2.796083688735962, "rewards/rejected": -2.5170629024505615, "step": 15010 }, { "epoch": 0.6, "learning_rate": 2.0590060023319696e-06, "logits/chosen": -3.0087382793426514, "logits/rejected": -3.0379600524902344, "logps/chosen": -4.181872844696045, "logps/rejected": -283.40167236328125, "loss": 0.0905, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.27391138672828674, "rewards/margins": 2.7581443786621094, "rewards/rejected": -2.4842331409454346, "step": 15020 }, { "epoch": 0.6, "learning_rate": 2.055570511842493e-06, "logits/chosen": -3.022223711013794, "logits/rejected": -3.047619342803955, "logps/chosen": -7.569952487945557, "logps/rejected": -282.50103759765625, "loss": 0.1272, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.23960654437541962, "rewards/margins": 2.715066909790039, "rewards/rejected": -2.4754602909088135, "step": 15030 }, { "epoch": 0.6, "learning_rate": 2.0521358877910446e-06, "logits/chosen": -3.024698495864868, "logits/rejected": -3.0546469688415527, "logps/chosen": -3.506005048751831, "logps/rejected": -287.91650390625, "loss": 0.0885, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2815207839012146, "rewards/margins": 2.8065738677978516, "rewards/rejected": -2.5250532627105713, "step": 15040 }, { "epoch": 0.6, "learning_rate": 2.0487021368736002e-06, "logits/chosen": -3.005819320678711, "logits/rejected": -3.0359346866607666, "logps/chosen": -4.237335205078125, "logps/rejected": -279.8788757324219, "loss": 0.1044, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.27527305483818054, "rewards/margins": 2.721278667449951, "rewards/rejected": -2.446005344390869, "step": 15050 }, { "epoch": 0.6, "learning_rate": 2.0452692657844333e-06, "logits/chosen": -3.0444560050964355, "logits/rejected": -3.0720911026000977, "logps/chosen": -5.39161491394043, "logps/rejected": -287.3174743652344, "loss": 0.0924, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.26575130224227905, "rewards/margins": 2.7775654792785645, "rewards/rejected": -2.5118141174316406, "step": 15060 }, { "epoch": 0.6, "learning_rate": 2.0418372812161015e-06, "logits/chosen": -3.026776075363159, "logits/rejected": -3.0565507411956787, "logps/chosen": -3.1431803703308105, "logps/rejected": -287.8318786621094, "loss": 0.0854, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28468650579452515, "rewards/margins": 2.8086040019989014, "rewards/rejected": -2.5239176750183105, "step": 15070 }, { "epoch": 0.6, "learning_rate": 2.0384061898594332e-06, "logits/chosen": -3.0337047576904297, "logits/rejected": -3.0621681213378906, "logps/chosen": -2.2377541065216064, "logps/rejected": -287.9972229003906, "loss": 0.0757, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2957156300544739, "rewards/margins": 2.8195180892944336, "rewards/rejected": -2.5238027572631836, "step": 15080 }, { "epoch": 0.6, "learning_rate": 2.034975998403517e-06, "logits/chosen": -3.0085606575012207, "logits/rejected": -3.037322521209717, "logps/chosen": -5.799045085906982, "logps/rejected": -281.3064880371094, "loss": 0.1083, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2582489252090454, "rewards/margins": 2.7159342765808105, "rewards/rejected": -2.4576854705810547, "step": 15090 }, { "epoch": 0.6, "learning_rate": 2.031546713535688e-06, "logits/chosen": -2.9986634254455566, "logits/rejected": -3.0292983055114746, "logps/chosen": -9.758810043334961, "logps/rejected": -278.3367614746094, "loss": 0.1524, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.21993407607078552, "rewards/margins": 2.6484978199005127, "rewards/rejected": -2.4285638332366943, "step": 15100 }, { "epoch": 0.6, "eval_logits/chosen": -3.071758508682251, "eval_logits/rejected": -3.098630666732788, "eval_logps/chosen": -0.18897129595279694, "eval_logps/rejected": -284.6384582519531, "eval_loss": 0.06010209396481514, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3147098124027252, "eval_rewards/margins": 2.800938367843628, "eval_rewards/rejected": -2.4862284660339355, "eval_runtime": 2.5431, "eval_samples_per_second": 1.966, "eval_steps_per_second": 0.393, "step": 15100 }, { "epoch": 0.6, "learning_rate": 2.0281183419415127e-06, "logits/chosen": -3.0305016040802, "logits/rejected": -3.058486223220825, "logps/chosen": -3.0856096744537354, "logps/rejected": -286.8599548339844, "loss": 0.0848, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28586244583129883, "rewards/margins": 2.799541473388672, "rewards/rejected": -2.513679027557373, "step": 15110 }, { "epoch": 0.6, "learning_rate": 2.0246908903047752e-06, "logits/chosen": -3.042780637741089, "logits/rejected": -3.0713226795196533, "logps/chosen": -3.0001282691955566, "logps/rejected": -287.5124816894531, "loss": 0.0827, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2874395251274109, "rewards/margins": 2.8115074634552, "rewards/rejected": -2.5240678787231445, "step": 15120 }, { "epoch": 0.61, "learning_rate": 2.0212643653074677e-06, "logits/chosen": -3.0367019176483154, "logits/rejected": -3.0630102157592773, "logps/chosen": -0.3122820258140564, "logps/rejected": -288.6756896972656, "loss": 0.06, "rewards/accuracies": 1.0, "rewards/chosen": 0.31181755661964417, "rewards/margins": 2.8466851711273193, "rewards/rejected": -2.534867763519287, "step": 15130 }, { "epoch": 0.61, "learning_rate": 2.0178387736297774e-06, "logits/chosen": -3.046009063720703, "logits/rejected": -3.073789119720459, "logps/chosen": -0.3397195339202881, "logps/rejected": -289.85321044921875, "loss": 0.0571, "rewards/accuracies": 1.0, "rewards/chosen": 0.311494916677475, "rewards/margins": 2.8564388751983643, "rewards/rejected": -2.5449440479278564, "step": 15140 }, { "epoch": 0.61, "learning_rate": 2.0144141219500707e-06, "logits/chosen": -3.025843620300293, "logits/rejected": -3.0549402236938477, "logps/chosen": -5.6986212730407715, "logps/rejected": -279.8172912597656, "loss": 0.1126, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.2600864768028259, "rewards/margins": 2.702305316925049, "rewards/rejected": -2.442218780517578, "step": 15150 }, { "epoch": 0.61, "learning_rate": 2.01099041694488e-06, "logits/chosen": -3.0250658988952637, "logits/rejected": -3.0537455081939697, "logps/chosen": -3.69232177734375, "logps/rejected": -286.01678466796875, "loss": 0.0912, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2812601923942566, "rewards/margins": 2.7865729331970215, "rewards/rejected": -2.505312919616699, "step": 15160 }, { "epoch": 0.61, "learning_rate": 2.0075676652888937e-06, "logits/chosen": -3.0444388389587402, "logits/rejected": -3.073227882385254, "logps/chosen": -0.20074549317359924, "logps/rejected": -292.2315368652344, "loss": 0.0546, "rewards/accuracies": 1.0, "rewards/chosen": 0.3127230405807495, "rewards/margins": 2.8854362964630127, "rewards/rejected": -2.5727131366729736, "step": 15170 }, { "epoch": 0.61, "learning_rate": 2.0041458736549423e-06, "logits/chosen": -3.013706684112549, "logits/rejected": -3.0409095287323, "logps/chosen": -3.3152859210968018, "logps/rejected": -289.86492919921875, "loss": 0.0848, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28272587060928345, "rewards/margins": 2.8300979137420654, "rewards/rejected": -2.5473721027374268, "step": 15180 }, { "epoch": 0.61, "learning_rate": 2.0007250487139827e-06, "logits/chosen": -3.02306866645813, "logits/rejected": -3.0496819019317627, "logps/chosen": -4.8646464347839355, "logps/rejected": -285.50872802734375, "loss": 0.1034, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2690771222114563, "rewards/margins": 2.7701191902160645, "rewards/rejected": -2.501042604446411, "step": 15190 }, { "epoch": 0.61, "learning_rate": 1.997305197135089e-06, "logits/chosen": -2.9876091480255127, "logits/rejected": -3.0201220512390137, "logps/chosen": -0.21264100074768066, "logps/rejected": -290.771728515625, "loss": 0.0558, "rewards/accuracies": 1.0, "rewards/chosen": 0.31277433037757874, "rewards/margins": 2.8667309284210205, "rewards/rejected": -2.5539565086364746, "step": 15200 }, { "epoch": 0.61, "eval_logits/chosen": -3.0714709758758545, "eval_logits/rejected": -3.0971803665161133, "eval_logps/chosen": -0.16943609714508057, "eval_logps/rejected": -284.16888427734375, "eval_loss": 0.06052476167678833, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31490516662597656, "eval_rewards/margins": 2.7964377403259277, "eval_rewards/rejected": -2.481532573699951, "eval_runtime": 2.5349, "eval_samples_per_second": 1.972, "eval_steps_per_second": 0.394, "step": 15200 }, { "epoch": 0.61, "learning_rate": 1.9938863255854356e-06, "logits/chosen": -3.026113748550415, "logits/rejected": -3.0576207637786865, "logps/chosen": -3.703937530517578, "logps/rejected": -286.5873718261719, "loss": 0.0914, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28028756380081177, "rewards/margins": 2.792804479598999, "rewards/rejected": -2.512516975402832, "step": 15210 }, { "epoch": 0.61, "learning_rate": 1.990468440730288e-06, "logits/chosen": -3.0102415084838867, "logits/rejected": -3.042137622833252, "logps/chosen": -2.6700897216796875, "logps/rejected": -288.3897399902344, "loss": 0.0808, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.29150518774986267, "rewards/margins": 2.8194997310638428, "rewards/rejected": -2.5279946327209473, "step": 15220 }, { "epoch": 0.61, "learning_rate": 1.987051549232988e-06, "logits/chosen": -3.0343661308288574, "logits/rejected": -3.0647130012512207, "logps/chosen": -0.20670142769813538, "logps/rejected": -291.54193115234375, "loss": 0.0555, "rewards/accuracies": 1.0, "rewards/chosen": 0.3151089549064636, "rewards/margins": 2.8706183433532715, "rewards/rejected": -2.555509328842163, "step": 15230 }, { "epoch": 0.61, "learning_rate": 1.983635657754942e-06, "logits/chosen": -3.009268283843994, "logits/rejected": -3.0372719764709473, "logps/chosen": -3.625627040863037, "logps/rejected": -287.7968444824219, "loss": 0.0895, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28168582916259766, "rewards/margins": 2.804849147796631, "rewards/rejected": -2.523163318634033, "step": 15240 }, { "epoch": 0.61, "learning_rate": 1.9802207729556023e-06, "logits/chosen": -3.007096767425537, "logits/rejected": -3.03790545463562, "logps/chosen": -0.2145891934633255, "logps/rejected": -290.8239440917969, "loss": 0.0558, "rewards/accuracies": 1.0, "rewards/chosen": 0.3154705762863159, "rewards/margins": 2.8674893379211426, "rewards/rejected": -2.552018642425537, "step": 15250 }, { "epoch": 0.61, "learning_rate": 1.9768069014924622e-06, "logits/chosen": -3.012235641479492, "logits/rejected": -3.0416359901428223, "logps/chosen": -4.01229190826416, "logps/rejected": -285.6966552734375, "loss": 0.0926, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2762192189693451, "rewards/margins": 2.781932830810547, "rewards/rejected": -2.5057132244110107, "step": 15260 }, { "epoch": 0.61, "learning_rate": 1.97339405002104e-06, "logits/chosen": -3.0346550941467285, "logits/rejected": -3.0627615451812744, "logps/chosen": -2.25693678855896, "logps/rejected": -285.9831848144531, "loss": 0.0776, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2962599992752075, "rewards/margins": 2.799330472946167, "rewards/rejected": -2.50307035446167, "step": 15270 }, { "epoch": 0.61, "learning_rate": 1.969982225194864e-06, "logits/chosen": -3.0560226440429688, "logits/rejected": -3.0848355293273926, "logps/chosen": -0.19637687504291534, "logps/rejected": -291.2269592285156, "loss": 0.0555, "rewards/accuracies": 1.0, "rewards/chosen": 0.31665971875190735, "rewards/margins": 2.870231866836548, "rewards/rejected": -2.5535717010498047, "step": 15280 }, { "epoch": 0.61, "learning_rate": 1.9665714336654604e-06, "logits/chosen": -3.006828784942627, "logits/rejected": -3.03695011138916, "logps/chosen": -3.7397701740264893, "logps/rejected": -284.2499084472656, "loss": 0.095, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2791278660297394, "rewards/margins": 2.768376588821411, "rewards/rejected": -2.489248752593994, "step": 15290 }, { "epoch": 0.61, "learning_rate": 1.963161682082342e-06, "logits/chosen": -3.0247507095336914, "logits/rejected": -3.054560422897339, "logps/chosen": -3.633551836013794, "logps/rejected": -282.48101806640625, "loss": 0.0969, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2803553342819214, "rewards/margins": 2.749197244644165, "rewards/rejected": -2.468841791152954, "step": 15300 }, { "epoch": 0.61, "eval_logits/chosen": -3.070420026779175, "eval_logits/rejected": -3.0962963104248047, "eval_logps/chosen": -0.14823108911514282, "eval_logps/rejected": -284.62255859375, "eval_loss": 0.060067106038331985, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.315117210149765, "eval_rewards/margins": 2.801187038421631, "eval_rewards/rejected": -2.486069917678833, "eval_runtime": 2.5343, "eval_samples_per_second": 1.973, "eval_steps_per_second": 0.395, "step": 15300 }, { "epoch": 0.61, "learning_rate": 1.959752977092995e-06, "logits/chosen": -3.0117697715759277, "logits/rejected": -3.0422472953796387, "logps/chosen": -0.17929832637310028, "logps/rejected": -288.3736877441406, "loss": 0.0573, "rewards/accuracies": 1.0, "rewards/chosen": 0.3166620433330536, "rewards/margins": 2.8433618545532227, "rewards/rejected": -2.5266995429992676, "step": 15310 }, { "epoch": 0.61, "learning_rate": 1.956345325342863e-06, "logits/chosen": -3.0008606910705566, "logits/rejected": -3.0316953659057617, "logps/chosen": -6.210136413574219, "logps/rejected": -279.6673278808594, "loss": 0.1231, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2549701929092407, "rewards/margins": 2.694995880126953, "rewards/rejected": -2.440025568008423, "step": 15320 }, { "epoch": 0.61, "learning_rate": 1.9529387334753394e-06, "logits/chosen": -3.022608995437622, "logits/rejected": -3.047131061553955, "logps/chosen": -3.864163875579834, "logps/rejected": -287.3103942871094, "loss": 0.0918, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28114980459213257, "rewards/margins": 2.795135974884033, "rewards/rejected": -2.513986110687256, "step": 15330 }, { "epoch": 0.61, "learning_rate": 1.9495332081317466e-06, "logits/chosen": -3.026320695877075, "logits/rejected": -3.0549144744873047, "logps/chosen": -0.2026529759168625, "logps/rejected": -287.137451171875, "loss": 0.0608, "rewards/accuracies": 1.0, "rewards/chosen": 0.31428828835487366, "rewards/margins": 2.826875925064087, "rewards/rejected": -2.512587785720825, "step": 15340 }, { "epoch": 0.61, "learning_rate": 1.946128755951332e-06, "logits/chosen": -3.0084800720214844, "logits/rejected": -3.0409741401672363, "logps/chosen": -0.16394604742527008, "logps/rejected": -292.452392578125, "loss": 0.0547, "rewards/accuracies": 1.0, "rewards/chosen": 0.31597501039505005, "rewards/margins": 2.881833076477051, "rewards/rejected": -2.5658583641052246, "step": 15350 }, { "epoch": 0.61, "learning_rate": 1.942725383571249e-06, "logits/chosen": -3.0330135822296143, "logits/rejected": -3.0587544441223145, "logps/chosen": -4.224160194396973, "logps/rejected": -286.222412109375, "loss": 0.0972, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.27437201142311096, "rewards/margins": 2.782951831817627, "rewards/rejected": -2.508580446243286, "step": 15360 }, { "epoch": 0.61, "learning_rate": 1.9393230976265478e-06, "logits/chosen": -3.0124335289001465, "logits/rejected": -3.043729066848755, "logps/chosen": -0.18670156598091125, "logps/rejected": -292.18292236328125, "loss": 0.0545, "rewards/accuracies": 1.0, "rewards/chosen": 0.3160589635372162, "rewards/margins": 2.8868207931518555, "rewards/rejected": -2.5707621574401855, "step": 15370 }, { "epoch": 0.62, "learning_rate": 1.9359219047501563e-06, "logits/chosen": -3.0000393390655518, "logits/rejected": -3.033618211746216, "logps/chosen": -0.13016699254512787, "logps/rejected": -290.9527282714844, "loss": 0.0556, "rewards/accuracies": 1.0, "rewards/chosen": 0.3137974143028259, "rewards/margins": 2.868784189224243, "rewards/rejected": -2.5549864768981934, "step": 15380 }, { "epoch": 0.62, "learning_rate": 1.9325218115728756e-06, "logits/chosen": -3.0450103282928467, "logits/rejected": -3.0718331336975098, "logps/chosen": -3.101926803588867, "logps/rejected": -287.359130859375, "loss": 0.0851, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2844391465187073, "rewards/margins": 2.80696439743042, "rewards/rejected": -2.5225250720977783, "step": 15390 }, { "epoch": 0.62, "learning_rate": 1.9291228247233607e-06, "logits/chosen": -3.0331966876983643, "logits/rejected": -3.0635693073272705, "logps/chosen": -0.29304689168930054, "logps/rejected": -289.87139892578125, "loss": 0.0569, "rewards/accuracies": 1.0, "rewards/chosen": 0.314389169216156, "rewards/margins": 2.853736639022827, "rewards/rejected": -2.5393471717834473, "step": 15400 }, { "epoch": 0.62, "eval_logits/chosen": -3.0718886852264404, "eval_logits/rejected": -3.097505807876587, "eval_logps/chosen": -0.15458881855010986, "eval_logps/rejected": -284.7789001464844, "eval_loss": 0.05998820811510086, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31505364179611206, "eval_rewards/margins": 2.802687168121338, "eval_rewards/rejected": -2.487633228302002, "eval_runtime": 2.538, "eval_samples_per_second": 1.97, "eval_steps_per_second": 0.394, "step": 15400 }, { "epoch": 0.62, "learning_rate": 1.9257249508281108e-06, "logits/chosen": -3.0316691398620605, "logits/rejected": -3.0590837001800537, "logps/chosen": -5.676621913909912, "logps/rejected": -282.56036376953125, "loss": 0.1124, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.25813472270965576, "rewards/margins": 2.7299656867980957, "rewards/rejected": -2.4718308448791504, "step": 15410 }, { "epoch": 0.62, "learning_rate": 1.922328196511456e-06, "logits/chosen": -3.0274176597595215, "logits/rejected": -3.0559287071228027, "logps/chosen": -6.105881690979004, "logps/rejected": -284.33642578125, "loss": 0.1152, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.25939053297042847, "rewards/margins": 2.7435965538024902, "rewards/rejected": -2.484205722808838, "step": 15420 }, { "epoch": 0.62, "learning_rate": 1.91893256839554e-06, "logits/chosen": -3.0102343559265137, "logits/rejected": -3.039541244506836, "logps/chosen": -0.18395015597343445, "logps/rejected": -286.0438232421875, "loss": 0.0627, "rewards/accuracies": 1.0, "rewards/chosen": 0.3116060197353363, "rewards/margins": 2.8224737644195557, "rewards/rejected": -2.5108675956726074, "step": 15430 }, { "epoch": 0.62, "learning_rate": 1.9155380731003163e-06, "logits/chosen": -3.00813627243042, "logits/rejected": -3.0399701595306396, "logps/chosen": -0.16808275878429413, "logps/rejected": -288.9375915527344, "loss": 0.057, "rewards/accuracies": 1.0, "rewards/chosen": 0.315356969833374, "rewards/margins": 2.8475019931793213, "rewards/rejected": -2.532144546508789, "step": 15440 }, { "epoch": 0.62, "learning_rate": 1.912144717243525e-06, "logits/chosen": -3.0136756896972656, "logits/rejected": -3.044985294342041, "logps/chosen": -0.31844907999038696, "logps/rejected": -290.0392150878906, "loss": 0.0561, "rewards/accuracies": 1.0, "rewards/chosen": 0.3134036064147949, "rewards/margins": 2.865011215209961, "rewards/rejected": -2.551607847213745, "step": 15450 }, { "epoch": 0.62, "learning_rate": 1.908752507440689e-06, "logits/chosen": -3.0156378746032715, "logits/rejected": -3.0444321632385254, "logps/chosen": -4.054943084716797, "logps/rejected": -285.38812255859375, "loss": 0.0925, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27646538615226746, "rewards/margins": 2.7735843658447266, "rewards/rejected": -2.4971189498901367, "step": 15460 }, { "epoch": 0.62, "learning_rate": 1.905361450305093e-06, "logits/chosen": -3.0397720336914062, "logits/rejected": -3.0699985027313232, "logps/chosen": -0.29954031109809875, "logps/rejected": -291.54864501953125, "loss": 0.0555, "rewards/accuracies": 1.0, "rewards/chosen": 0.31344491243362427, "rewards/margins": 2.8735501766204834, "rewards/rejected": -2.560105085372925, "step": 15470 }, { "epoch": 0.62, "learning_rate": 1.9019715524477769e-06, "logits/chosen": -3.027709484100342, "logits/rejected": -3.0533576011657715, "logps/chosen": -10.646566390991211, "logps/rejected": -271.85003662109375, "loss": 0.1671, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.21017897129058838, "rewards/margins": 2.5766773223876953, "rewards/rejected": -2.3664984703063965, "step": 15480 }, { "epoch": 0.62, "learning_rate": 1.8985828204775206e-06, "logits/chosen": -3.0345571041107178, "logits/rejected": -3.0652389526367188, "logps/chosen": -0.23538437485694885, "logps/rejected": -289.2222595214844, "loss": 0.0578, "rewards/accuracies": 1.0, "rewards/chosen": 0.3126310408115387, "rewards/margins": 2.850895404815674, "rewards/rejected": -2.538264036178589, "step": 15490 }, { "epoch": 0.62, "learning_rate": 1.895195261000831e-06, "logits/chosen": -3.0317037105560303, "logits/rejected": -3.0604281425476074, "logps/chosen": -3.8996779918670654, "logps/rejected": -285.1097106933594, "loss": 0.0935, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2757294774055481, "rewards/margins": 2.7786123752593994, "rewards/rejected": -2.5028834342956543, "step": 15500 }, { "epoch": 0.62, "eval_logits/chosen": -3.069807291030884, "eval_logits/rejected": -3.0960605144500732, "eval_logps/chosen": -0.14882414042949677, "eval_logps/rejected": -284.39825439453125, "eval_loss": 0.06028341129422188, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31511127948760986, "eval_rewards/margins": 2.798938274383545, "eval_rewards/rejected": -2.4838268756866455, "eval_runtime": 2.5406, "eval_samples_per_second": 1.968, "eval_steps_per_second": 0.394, "step": 15500 }, { "epoch": 0.62, "learning_rate": 1.8918088806219279e-06, "logits/chosen": -3.0054807662963867, "logits/rejected": -3.036980152130127, "logps/chosen": -0.1763511598110199, "logps/rejected": -290.9180908203125, "loss": 0.0555, "rewards/accuracies": 1.0, "rewards/chosen": 0.3169814944267273, "rewards/margins": 2.870753049850464, "rewards/rejected": -2.553771734237671, "step": 15510 }, { "epoch": 0.62, "learning_rate": 1.888423685942732e-06, "logits/chosen": -2.9939868450164795, "logits/rejected": -3.026340961456299, "logps/chosen": -0.2389327585697174, "logps/rejected": -290.5375671386719, "loss": 0.0559, "rewards/accuracies": 1.0, "rewards/chosen": 0.31117361783981323, "rewards/margins": 2.864875316619873, "rewards/rejected": -2.553701877593994, "step": 15520 }, { "epoch": 0.62, "learning_rate": 1.885039683562855e-06, "logits/chosen": -3.0245003700256348, "logits/rejected": -3.0528950691223145, "logps/chosen": -6.52678918838501, "logps/rejected": -282.5793762207031, "loss": 0.1196, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2525028884410858, "rewards/margins": 2.7277474403381348, "rewards/rejected": -2.4752449989318848, "step": 15530 }, { "epoch": 0.62, "learning_rate": 1.8816568800795823e-06, "logits/chosen": -3.025254726409912, "logits/rejected": -3.0530645847320557, "logps/chosen": -3.873218536376953, "logps/rejected": -284.56768798828125, "loss": 0.093, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2786978483200073, "rewards/margins": 2.7715044021606445, "rewards/rejected": -2.4928066730499268, "step": 15540 }, { "epoch": 0.62, "learning_rate": 1.8782752820878636e-06, "logits/chosen": -3.004891872406006, "logits/rejected": -3.03476619720459, "logps/chosen": -3.944148302078247, "logps/rejected": -283.24249267578125, "loss": 0.0952, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27818408608436584, "rewards/margins": 2.757171630859375, "rewards/rejected": -2.478987455368042, "step": 15550 }, { "epoch": 0.62, "learning_rate": 1.874894896180295e-06, "logits/chosen": -3.0069429874420166, "logits/rejected": -3.032675266265869, "logps/chosen": -9.45520305633545, "logps/rejected": -280.78570556640625, "loss": 0.1449, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.22230803966522217, "rewards/margins": 2.6810388565063477, "rewards/rejected": -2.458730936050415, "step": 15560 }, { "epoch": 0.62, "learning_rate": 1.8715157289471132e-06, "logits/chosen": -3.0474941730499268, "logits/rejected": -3.076904296875, "logps/chosen": -0.18622103333473206, "logps/rejected": -291.8546142578125, "loss": 0.0551, "rewards/accuracies": 1.0, "rewards/chosen": 0.3159223198890686, "rewards/margins": 2.8761420249938965, "rewards/rejected": -2.5602192878723145, "step": 15570 }, { "epoch": 0.62, "learning_rate": 1.868137786976177e-06, "logits/chosen": -3.023500919342041, "logits/rejected": -3.0530686378479004, "logps/chosen": -1.255136251449585, "logps/rejected": -287.7996826171875, "loss": 0.0603, "rewards/accuracies": 1.0, "rewards/chosen": 0.30546313524246216, "rewards/margins": 2.8244643211364746, "rewards/rejected": -2.5190012454986572, "step": 15580 }, { "epoch": 0.62, "learning_rate": 1.8647610768529581e-06, "logits/chosen": -3.0134191513061523, "logits/rejected": -3.0449533462524414, "logps/chosen": -3.9786324501037598, "logps/rejected": -286.63543701171875, "loss": 0.0928, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.277859628200531, "rewards/margins": 2.7910256385803223, "rewards/rejected": -2.5131661891937256, "step": 15590 }, { "epoch": 0.62, "learning_rate": 1.8613856051605242e-06, "logits/chosen": -3.025489091873169, "logits/rejected": -3.056016445159912, "logps/chosen": -2.8311452865600586, "logps/rejected": -289.60504150390625, "loss": 0.0815, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28790083527565, "rewards/margins": 2.8266940116882324, "rewards/rejected": -2.5387935638427734, "step": 15600 }, { "epoch": 0.62, "eval_logits/chosen": -3.0710904598236084, "eval_logits/rejected": -3.09729266166687, "eval_logps/chosen": -0.22922053933143616, "eval_logps/rejected": -284.62872314453125, "eval_loss": 0.06017429754137993, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3143073320388794, "eval_rewards/margins": 2.8004391193389893, "eval_rewards/rejected": -2.4861319065093994, "eval_runtime": 2.5301, "eval_samples_per_second": 1.976, "eval_steps_per_second": 0.395, "step": 15600 }, { "epoch": 0.62, "learning_rate": 1.8580113784795306e-06, "logits/chosen": -3.0291342735290527, "logits/rejected": -3.056217908859253, "logps/chosen": -3.301896333694458, "logps/rejected": -283.60491943359375, "loss": 0.0865, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2834903299808502, "rewards/margins": 2.767256736755371, "rewards/rejected": -2.4837660789489746, "step": 15610 }, { "epoch": 0.62, "learning_rate": 1.854638403388206e-06, "logits/chosen": -3.0279555320739746, "logits/rejected": -3.0583956241607666, "logps/chosen": -0.16662637889385223, "logps/rejected": -292.4411926269531, "loss": 0.0545, "rewards/accuracies": 1.0, "rewards/chosen": 0.31523460149765015, "rewards/margins": 2.886324644088745, "rewards/rejected": -2.5710902214050293, "step": 15620 }, { "epoch": 0.63, "learning_rate": 1.8512666864623367e-06, "logits/chosen": -3.0117173194885254, "logits/rejected": -3.0407192707061768, "logps/chosen": -7.024845123291016, "logps/rejected": -282.10687255859375, "loss": 0.1236, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2462574988603592, "rewards/margins": 2.7137694358825684, "rewards/rejected": -2.4675118923187256, "step": 15630 }, { "epoch": 0.63, "learning_rate": 1.8478962342752584e-06, "logits/chosen": -3.030803918838501, "logits/rejected": -3.060227394104004, "logps/chosen": -0.7734983563423157, "logps/rejected": -289.86407470703125, "loss": 0.0579, "rewards/accuracies": 1.0, "rewards/chosen": 0.30897411704063416, "rewards/margins": 2.850019693374634, "rewards/rejected": -2.5410454273223877, "step": 15640 }, { "epoch": 0.63, "learning_rate": 1.8445270533978387e-06, "logits/chosen": -3.015355348587036, "logits/rejected": -3.0446860790252686, "logps/chosen": -0.6121145486831665, "logps/rejected": -290.10333251953125, "loss": 0.0571, "rewards/accuracies": 1.0, "rewards/chosen": 0.3107401132583618, "rewards/margins": 2.856734037399292, "rewards/rejected": -2.5459938049316406, "step": 15650 }, { "epoch": 0.63, "learning_rate": 1.8411591503984687e-06, "logits/chosen": -3.007638931274414, "logits/rejected": -3.0354063510894775, "logps/chosen": -14.664067268371582, "logps/rejected": -278.6825256347656, "loss": 0.1976, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.17106366157531738, "rewards/margins": 2.6072731018066406, "rewards/rejected": -2.436209201812744, "step": 15660 }, { "epoch": 0.63, "learning_rate": 1.8377925318430478e-06, "logits/chosen": -3.0278069972991943, "logits/rejected": -3.0566906929016113, "logps/chosen": -3.0446066856384277, "logps/rejected": -287.622314453125, "loss": 0.0838, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28596577048301697, "rewards/margins": 2.8106801509857178, "rewards/rejected": -2.524714231491089, "step": 15670 }, { "epoch": 0.63, "learning_rate": 1.8344272042949724e-06, "logits/chosen": -3.01519775390625, "logits/rejected": -3.047365427017212, "logps/chosen": -2.705287218093872, "logps/rejected": -289.10650634765625, "loss": 0.0791, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2903751730918884, "rewards/margins": 2.8230347633361816, "rewards/rejected": -2.5326600074768066, "step": 15680 }, { "epoch": 0.63, "learning_rate": 1.8310631743151187e-06, "logits/chosen": -3.024202823638916, "logits/rejected": -3.0571835041046143, "logps/chosen": -0.1705324947834015, "logps/rejected": -291.2358093261719, "loss": 0.0554, "rewards/accuracies": 1.0, "rewards/chosen": 0.31577762961387634, "rewards/margins": 2.8709750175476074, "rewards/rejected": -2.5551974773406982, "step": 15690 }, { "epoch": 0.63, "learning_rate": 1.827700448461836e-06, "logits/chosen": -2.999006986618042, "logits/rejected": -3.0326218605041504, "logps/chosen": -0.19870756566524506, "logps/rejected": -290.8468017578125, "loss": 0.0556, "rewards/accuracies": 1.0, "rewards/chosen": 0.3139212727546692, "rewards/margins": 2.8693902492523193, "rewards/rejected": -2.555469036102295, "step": 15700 }, { "epoch": 0.63, "eval_logits/chosen": -3.0709102153778076, "eval_logits/rejected": -3.0971016883850098, "eval_logps/chosen": -0.18164129555225372, "eval_logps/rejected": -284.3312072753906, "eval_loss": 0.060334932059049606, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31478312611579895, "eval_rewards/margins": 2.7979393005371094, "eval_rewards/rejected": -2.483156204223633, "eval_runtime": 2.5361, "eval_samples_per_second": 1.972, "eval_steps_per_second": 0.394, "step": 15700 }, { "epoch": 0.63, "learning_rate": 1.8243390332909305e-06, "logits/chosen": -3.0255649089813232, "logits/rejected": -3.0543129444122314, "logps/chosen": -0.18652990460395813, "logps/rejected": -293.30572509765625, "loss": 0.0541, "rewards/accuracies": 1.0, "rewards/chosen": 0.3168758749961853, "rewards/margins": 2.8929173946380615, "rewards/rejected": -2.5760416984558105, "step": 15710 }, { "epoch": 0.63, "learning_rate": 1.8209789353556528e-06, "logits/chosen": -3.0177128314971924, "logits/rejected": -3.0465502738952637, "logps/chosen": -4.4032487869262695, "logps/rejected": -283.75677490234375, "loss": 0.1021, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.272812157869339, "rewards/margins": 2.755575656890869, "rewards/rejected": -2.4827635288238525, "step": 15720 }, { "epoch": 0.63, "learning_rate": 1.8176201612066874e-06, "logits/chosen": -3.0117039680480957, "logits/rejected": -3.0417096614837646, "logps/chosen": -3.5866539478302, "logps/rejected": -286.55889892578125, "loss": 0.0892, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28012388944625854, "rewards/margins": 2.792564630508423, "rewards/rejected": -2.5124409198760986, "step": 15730 }, { "epoch": 0.63, "learning_rate": 1.814262717392134e-06, "logits/chosen": -3.0097317695617676, "logits/rejected": -3.0398573875427246, "logps/chosen": -2.3446342945098877, "logps/rejected": -287.704833984375, "loss": 0.075, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2926589250564575, "rewards/margins": 2.814592123031616, "rewards/rejected": -2.5219333171844482, "step": 15740 }, { "epoch": 0.63, "learning_rate": 1.8109066104575023e-06, "logits/chosen": -3.036864757537842, "logits/rejected": -3.065692663192749, "logps/chosen": -0.17706963419914246, "logps/rejected": -292.01727294921875, "loss": 0.055, "rewards/accuracies": 1.0, "rewards/chosen": 0.31493136286735535, "rewards/margins": 2.8780410289764404, "rewards/rejected": -2.563109874725342, "step": 15750 }, { "epoch": 0.63, "learning_rate": 1.8075518469456944e-06, "logits/chosen": -3.0193209648132324, "logits/rejected": -3.048807144165039, "logps/chosen": -6.759845733642578, "logps/rejected": -283.40869140625, "loss": 0.121, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2505294680595398, "rewards/margins": 2.734851360321045, "rewards/rejected": -2.4843220710754395, "step": 15760 }, { "epoch": 0.63, "learning_rate": 1.804198433396994e-06, "logits/chosen": -3.0144705772399902, "logits/rejected": -3.0455212593078613, "logps/chosen": -0.17160272598266602, "logps/rejected": -293.19000244140625, "loss": 0.0542, "rewards/accuracies": 1.0, "rewards/chosen": 0.3157275915145874, "rewards/margins": 2.89023494720459, "rewards/rejected": -2.574507474899292, "step": 15770 }, { "epoch": 0.63, "learning_rate": 1.8008463763490507e-06, "logits/chosen": -2.9983880519866943, "logits/rejected": -3.0291786193847656, "logps/chosen": -6.306177616119385, "logps/rejected": -284.04168701171875, "loss": 0.1158, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2537180781364441, "rewards/margins": 2.7405753135681152, "rewards/rejected": -2.4868569374084473, "step": 15780 }, { "epoch": 0.63, "learning_rate": 1.7974956823368728e-06, "logits/chosen": -3.0004160404205322, "logits/rejected": -3.0315139293670654, "logps/chosen": -4.748826503753662, "logps/rejected": -280.4735107421875, "loss": 0.1048, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.26745349168777466, "rewards/margins": 2.717998504638672, "rewards/rejected": -2.450544834136963, "step": 15790 }, { "epoch": 0.63, "learning_rate": 1.7941463578928088e-06, "logits/chosen": -3.030427932739258, "logits/rejected": -3.059760570526123, "logps/chosen": -0.2590544819831848, "logps/rejected": -290.5874938964844, "loss": 0.0559, "rewards/accuracies": 1.0, "rewards/chosen": 0.3120112419128418, "rewards/margins": 2.8660848140716553, "rewards/rejected": -2.5540740489959717, "step": 15800 }, { "epoch": 0.63, "eval_logits/chosen": -3.0703752040863037, "eval_logits/rejected": -3.0964303016662598, "eval_logps/chosen": -0.19321753084659576, "eval_logps/rejected": -283.9676513671875, "eval_loss": 0.060698963701725006, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31466737389564514, "eval_rewards/margins": 2.7941882610321045, "eval_rewards/rejected": -2.479520797729492, "eval_runtime": 2.5516, "eval_samples_per_second": 1.96, "eval_steps_per_second": 0.392, "step": 15800 }, { "epoch": 0.63, "learning_rate": 1.7907984095465397e-06, "logits/chosen": -3.016601085662842, "logits/rejected": -3.0456271171569824, "logps/chosen": -7.460234642028809, "logps/rejected": -282.86761474609375, "loss": 0.1289, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24332384765148163, "rewards/margins": 2.718212127685547, "rewards/rejected": -2.474888324737549, "step": 15810 }, { "epoch": 0.63, "learning_rate": 1.7874518438250598e-06, "logits/chosen": -3.0409467220306396, "logits/rejected": -3.070237398147583, "logps/chosen": -0.2130649834871292, "logps/rejected": -291.40484619140625, "loss": 0.0552, "rewards/accuracies": 1.0, "rewards/chosen": 0.3135736286640167, "rewards/margins": 2.8772928714752197, "rewards/rejected": -2.5637190341949463, "step": 15820 }, { "epoch": 0.63, "learning_rate": 1.7841066672526724e-06, "logits/chosen": -3.021223783493042, "logits/rejected": -3.049182891845703, "logps/chosen": -7.710000038146973, "logps/rejected": -282.40582275390625, "loss": 0.1294, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.23931238055229187, "rewards/margins": 2.7145023345947266, "rewards/rejected": -2.4751899242401123, "step": 15830 }, { "epoch": 0.63, "learning_rate": 1.7807628863509685e-06, "logits/chosen": -3.0130774974823, "logits/rejected": -3.043914318084717, "logps/chosen": -3.8897242546081543, "logps/rejected": -285.43011474609375, "loss": 0.0875, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27673619985580444, "rewards/margins": 2.7737677097320557, "rewards/rejected": -2.4970316886901855, "step": 15840 }, { "epoch": 0.63, "learning_rate": 1.7774205076388207e-06, "logits/chosen": -3.0210623741149902, "logits/rejected": -3.051354169845581, "logps/chosen": -0.17015349864959717, "logps/rejected": -289.0009765625, "loss": 0.0571, "rewards/accuracies": 1.0, "rewards/chosen": 0.31474894285202026, "rewards/margins": 2.8470845222473145, "rewards/rejected": -2.5323357582092285, "step": 15850 }, { "epoch": 0.63, "learning_rate": 1.774079537632369e-06, "logits/chosen": -3.02409291267395, "logits/rejected": -3.0544040203094482, "logps/chosen": -0.32130882143974304, "logps/rejected": -289.1429748535156, "loss": 0.0569, "rewards/accuracies": 1.0, "rewards/chosen": 0.3139004707336426, "rewards/margins": 2.8514270782470703, "rewards/rejected": -2.5375266075134277, "step": 15860 }, { "epoch": 0.63, "learning_rate": 1.7707399828450028e-06, "logits/chosen": -2.998532772064209, "logits/rejected": -3.028221368789673, "logps/chosen": -4.830090522766113, "logps/rejected": -287.50543212890625, "loss": 0.0946, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2692040205001831, "rewards/margins": 2.7926862239837646, "rewards/rejected": -2.523482084274292, "step": 15870 }, { "epoch": 0.64, "learning_rate": 1.7674018497873568e-06, "logits/chosen": -3.032966375350952, "logits/rejected": -3.0633511543273926, "logps/chosen": -0.1864352822303772, "logps/rejected": -293.4407653808594, "loss": 0.054, "rewards/accuracies": 1.0, "rewards/chosen": 0.3156728744506836, "rewards/margins": 2.8941991329193115, "rewards/rejected": -2.578526735305786, "step": 15880 }, { "epoch": 0.64, "learning_rate": 1.7640651449672913e-06, "logits/chosen": -3.0231428146362305, "logits/rejected": -3.051917314529419, "logps/chosen": -3.872304916381836, "logps/rejected": -288.16412353515625, "loss": 0.0914, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27762532234191895, "rewards/margins": 2.806708812713623, "rewards/rejected": -2.529083251953125, "step": 15890 }, { "epoch": 0.64, "learning_rate": 1.7607298748898844e-06, "logits/chosen": -3.0284018516540527, "logits/rejected": -3.058696746826172, "logps/chosen": -3.401154041290283, "logps/rejected": -286.50830078125, "loss": 0.0871, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2846538722515106, "rewards/margins": 2.7915139198303223, "rewards/rejected": -2.5068600177764893, "step": 15900 }, { "epoch": 0.64, "eval_logits/chosen": -3.0718460083007812, "eval_logits/rejected": -3.096666097640991, "eval_logps/chosen": -0.17925696074962616, "eval_logps/rejected": -284.0538330078125, "eval_loss": 0.060597874224185944, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3148069381713867, "eval_rewards/margins": 2.795189380645752, "eval_rewards/rejected": -2.4803826808929443, "eval_runtime": 2.5407, "eval_samples_per_second": 1.968, "eval_steps_per_second": 0.394, "step": 15900 }, { "epoch": 0.64, "learning_rate": 1.7573960460574133e-06, "logits/chosen": -3.016036033630371, "logits/rejected": -3.04487681388855, "logps/chosen": -0.3340376615524292, "logps/rejected": -290.6935119628906, "loss": 0.0561, "rewards/accuracies": 1.0, "rewards/chosen": 0.31411775946617126, "rewards/margins": 2.8632094860076904, "rewards/rejected": -2.5490915775299072, "step": 15910 }, { "epoch": 0.64, "learning_rate": 1.7540636649693496e-06, "logits/chosen": -3.0038769245147705, "logits/rejected": -3.035766124725342, "logps/chosen": -0.19359010457992554, "logps/rejected": -292.46514892578125, "loss": 0.0548, "rewards/accuracies": 1.0, "rewards/chosen": 0.3173004686832428, "rewards/margins": 2.8812222480773926, "rewards/rejected": -2.5639216899871826, "step": 15920 }, { "epoch": 0.64, "learning_rate": 1.7507327381223406e-06, "logits/chosen": -3.014031410217285, "logits/rejected": -3.0441207885742188, "logps/chosen": -0.33660516142845154, "logps/rejected": -289.0889892578125, "loss": 0.0574, "rewards/accuracies": 1.0, "rewards/chosen": 0.3156014680862427, "rewards/margins": 2.845364570617676, "rewards/rejected": -2.5297627449035645, "step": 15930 }, { "epoch": 0.64, "learning_rate": 1.7474032720101991e-06, "logits/chosen": -3.019645929336548, "logits/rejected": -3.0495784282684326, "logps/chosen": -6.815450191497803, "logps/rejected": -283.6018371582031, "loss": 0.1215, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24828767776489258, "rewards/margins": 2.734980821609497, "rewards/rejected": -2.4866929054260254, "step": 15940 }, { "epoch": 0.64, "learning_rate": 1.744075273123889e-06, "logits/chosen": -3.016434907913208, "logits/rejected": -3.0466690063476562, "logps/chosen": -0.2415396273136139, "logps/rejected": -290.5325622558594, "loss": 0.056, "rewards/accuracies": 1.0, "rewards/chosen": 0.3129243850708008, "rewards/margins": 2.86484956741333, "rewards/rejected": -2.55192494392395, "step": 15950 }, { "epoch": 0.64, "learning_rate": 1.7407487479515147e-06, "logits/chosen": -3.0017032623291016, "logits/rejected": -3.0323944091796875, "logps/chosen": -3.553964138031006, "logps/rejected": -288.51617431640625, "loss": 0.0889, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28377586603164673, "rewards/margins": 2.8110568523406982, "rewards/rejected": -2.5272810459136963, "step": 15960 }, { "epoch": 0.64, "learning_rate": 1.7374237029783064e-06, "logits/chosen": -3.0349671840667725, "logits/rejected": -3.062655210494995, "logps/chosen": -3.821338653564453, "logps/rejected": -287.8187255859375, "loss": 0.091, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28008607029914856, "rewards/margins": 2.802619218826294, "rewards/rejected": -2.5225331783294678, "step": 15970 }, { "epoch": 0.64, "learning_rate": 1.7341001446866101e-06, "logits/chosen": -3.005521297454834, "logits/rejected": -3.035811185836792, "logps/chosen": -0.21872425079345703, "logps/rejected": -291.6434631347656, "loss": 0.055, "rewards/accuracies": 1.0, "rewards/chosen": 0.3138793110847473, "rewards/margins": 2.8782453536987305, "rewards/rejected": -2.564365863800049, "step": 15980 }, { "epoch": 0.64, "learning_rate": 1.7307780795558743e-06, "logits/chosen": -3.0069987773895264, "logits/rejected": -3.036048173904419, "logps/chosen": -2.3424124717712402, "logps/rejected": -289.9413146972656, "loss": 0.0746, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.29290157556533813, "rewards/margins": 2.839643955230713, "rewards/rejected": -2.5467422008514404, "step": 15990 }, { "epoch": 0.64, "learning_rate": 1.7274575140626318e-06, "logits/chosen": -2.99804425239563, "logits/rejected": -3.0284156799316406, "logps/chosen": -0.22589509189128876, "logps/rejected": -291.39984130859375, "loss": 0.0551, "rewards/accuracies": 1.0, "rewards/chosen": 0.31521540880203247, "rewards/margins": 2.876246929168701, "rewards/rejected": -2.5610315799713135, "step": 16000 }, { "epoch": 0.64, "eval_logits/chosen": -3.069883346557617, "eval_logits/rejected": -3.0956602096557617, "eval_logps/chosen": -0.20038136839866638, "eval_logps/rejected": -283.9373779296875, "eval_loss": 0.060714103281497955, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3145957291126251, "eval_rewards/margins": 2.793813467025757, "eval_rewards/rejected": -2.479218006134033, "eval_runtime": 2.5385, "eval_samples_per_second": 1.97, "eval_steps_per_second": 0.394, "step": 16000 }, { "epoch": 0.64, "learning_rate": 1.7241384546804973e-06, "logits/chosen": -3.0129895210266113, "logits/rejected": -3.0406336784362793, "logps/chosen": -9.637168884277344, "logps/rejected": -280.51251220703125, "loss": 0.1499, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.21950700879096985, "rewards/margins": 2.6741931438446045, "rewards/rejected": -2.4546866416931152, "step": 16010 }, { "epoch": 0.64, "learning_rate": 1.7208209078801454e-06, "logits/chosen": -3.016028642654419, "logits/rejected": -3.045029401779175, "logps/chosen": -0.1649365872144699, "logps/rejected": -293.1739807128906, "loss": 0.054, "rewards/accuracies": 1.0, "rewards/chosen": 0.3164282441139221, "rewards/margins": 2.894798994064331, "rewards/rejected": -2.5783705711364746, "step": 16020 }, { "epoch": 0.64, "learning_rate": 1.7175048801293042e-06, "logits/chosen": -3.0120184421539307, "logits/rejected": -3.0398831367492676, "logps/chosen": -3.3983356952667236, "logps/rejected": -285.5310974121094, "loss": 0.0903, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2838529944419861, "rewards/margins": 2.7843518257141113, "rewards/rejected": -2.5004987716674805, "step": 16030 }, { "epoch": 0.64, "learning_rate": 1.7141903778927407e-06, "logits/chosen": -3.013646125793457, "logits/rejected": -3.0413050651550293, "logps/chosen": -0.18508361279964447, "logps/rejected": -292.19256591796875, "loss": 0.0548, "rewards/accuracies": 1.0, "rewards/chosen": 0.3160596787929535, "rewards/margins": 2.881603240966797, "rewards/rejected": -2.5655436515808105, "step": 16040 }, { "epoch": 0.64, "learning_rate": 1.7108774076322443e-06, "logits/chosen": -3.026616096496582, "logits/rejected": -3.0555307865142822, "logps/chosen": -1.9927231073379517, "logps/rejected": -287.833740234375, "loss": 0.0716, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.29709392786026, "rewards/margins": 2.8190274238586426, "rewards/rejected": -2.5219335556030273, "step": 16050 }, { "epoch": 0.64, "learning_rate": 1.7075659758066207e-06, "logits/chosen": -2.988856792449951, "logits/rejected": -3.0208017826080322, "logps/chosen": -0.8347099423408508, "logps/rejected": -286.50115966796875, "loss": 0.0663, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.30842477083206177, "rewards/margins": 2.8189425468444824, "rewards/rejected": -2.5105175971984863, "step": 16060 }, { "epoch": 0.64, "learning_rate": 1.7042560888716766e-06, "logits/chosen": -2.997617244720459, "logits/rejected": -3.0283656120300293, "logps/chosen": -4.804306507110596, "logps/rejected": -283.7452087402344, "loss": 0.1044, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2672671675682068, "rewards/margins": 2.753904342651367, "rewards/rejected": -2.4866368770599365, "step": 16070 }, { "epoch": 0.64, "learning_rate": 1.7009477532802055e-06, "logits/chosen": -3.0187594890594482, "logits/rejected": -3.0477755069732666, "logps/chosen": -0.1697317659854889, "logps/rejected": -290.9215393066406, "loss": 0.0558, "rewards/accuracies": 1.0, "rewards/chosen": 0.3151927590370178, "rewards/margins": 2.868006467819214, "rewards/rejected": -2.55281400680542, "step": 16080 }, { "epoch": 0.64, "learning_rate": 1.6976409754819767e-06, "logits/chosen": -3.007870674133301, "logits/rejected": -3.0388622283935547, "logps/chosen": -0.15752090513706207, "logps/rejected": -291.31243896484375, "loss": 0.0555, "rewards/accuracies": 1.0, "rewards/chosen": 0.315120130777359, "rewards/margins": 2.870250701904297, "rewards/rejected": -2.5551304817199707, "step": 16090 }, { "epoch": 0.64, "learning_rate": 1.6943357619237227e-06, "logits/chosen": -3.0175793170928955, "logits/rejected": -3.046970844268799, "logps/chosen": -6.813069820404053, "logps/rejected": -281.91253662109375, "loss": 0.1236, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2494432032108307, "rewards/margins": 2.7135491371154785, "rewards/rejected": -2.4641058444976807, "step": 16100 }, { "epoch": 0.64, "eval_logits/chosen": -3.071242332458496, "eval_logits/rejected": -3.098142147064209, "eval_logps/chosen": -0.16569048166275024, "eval_logps/rejected": -284.06512451171875, "eval_loss": 0.0606086440384388, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3149426579475403, "eval_rewards/margins": 2.795438289642334, "eval_rewards/rejected": -2.4804954528808594, "eval_runtime": 2.538, "eval_samples_per_second": 1.97, "eval_steps_per_second": 0.394, "step": 16100 }, { "epoch": 0.64, "learning_rate": 1.6910321190491264e-06, "logits/chosen": -3.02187442779541, "logits/rejected": -3.050781726837158, "logps/chosen": -4.139871597290039, "logps/rejected": -285.2140197753906, "loss": 0.0942, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2748285233974457, "rewards/margins": 2.7724013328552246, "rewards/rejected": -2.497572422027588, "step": 16110 }, { "epoch": 0.64, "learning_rate": 1.6877300532988095e-06, "logits/chosen": -3.0007362365722656, "logits/rejected": -3.031184196472168, "logps/chosen": -4.7048516273498535, "logps/rejected": -286.4099426269531, "loss": 0.094, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2699268162250519, "rewards/margins": 2.778393268585205, "rewards/rejected": -2.508466958999634, "step": 16120 }, { "epoch": 0.65, "learning_rate": 1.6844295711103167e-06, "logits/chosen": -3.0378174781799316, "logits/rejected": -3.065051794052124, "logps/chosen": -1.788891077041626, "logps/rejected": -288.93145751953125, "loss": 0.0732, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.299090713262558, "rewards/margins": 2.8331706523895264, "rewards/rejected": -2.5340800285339355, "step": 16130 }, { "epoch": 0.65, "learning_rate": 1.6811306789181081e-06, "logits/chosen": -3.0200634002685547, "logits/rejected": -3.0479912757873535, "logps/chosen": -2.0517420768737793, "logps/rejected": -290.37823486328125, "loss": 0.0627, "rewards/accuracies": 1.0, "rewards/chosen": 0.29899340867996216, "rewards/margins": 2.843851327896118, "rewards/rejected": -2.54485821723938, "step": 16140 }, { "epoch": 0.65, "learning_rate": 1.677833383153542e-06, "logits/chosen": -3.0303454399108887, "logits/rejected": -3.0583391189575195, "logps/chosen": -2.505300998687744, "logps/rejected": -285.63922119140625, "loss": 0.0805, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.290775865316391, "rewards/margins": 2.7934131622314453, "rewards/rejected": -2.5026373863220215, "step": 16150 }, { "epoch": 0.65, "learning_rate": 1.6745376902448657e-06, "logits/chosen": -3.016399383544922, "logits/rejected": -3.0477187633514404, "logps/chosen": -3.0198912620544434, "logps/rejected": -288.5413513183594, "loss": 0.0839, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28850823640823364, "rewards/margins": 2.815577983856201, "rewards/rejected": -2.5270700454711914, "step": 16160 }, { "epoch": 0.65, "learning_rate": 1.6712436066172022e-06, "logits/chosen": -3.0199151039123535, "logits/rejected": -3.050656795501709, "logps/chosen": -0.19677314162254333, "logps/rejected": -292.94635009765625, "loss": 0.0543, "rewards/accuracies": 1.0, "rewards/chosen": 0.31586959958076477, "rewards/margins": 2.8892791271209717, "rewards/rejected": -2.57340931892395, "step": 16170 }, { "epoch": 0.65, "learning_rate": 1.6679511386925337e-06, "logits/chosen": -2.9921069145202637, "logits/rejected": -3.0229053497314453, "logps/chosen": -7.134033203125, "logps/rejected": -278.7083435058594, "loss": 0.1284, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.24546131491661072, "rewards/margins": 2.6815943717956543, "rewards/rejected": -2.4361329078674316, "step": 16180 }, { "epoch": 0.65, "learning_rate": 1.6646602928896962e-06, "logits/chosen": -3.01045298576355, "logits/rejected": -3.0432512760162354, "logps/chosen": -0.1954454779624939, "logps/rejected": -287.2496032714844, "loss": 0.058, "rewards/accuracies": 1.0, "rewards/chosen": 0.3141838014125824, "rewards/margins": 2.8340704441070557, "rewards/rejected": -2.5198864936828613, "step": 16190 }, { "epoch": 0.65, "learning_rate": 1.661371075624363e-06, "logits/chosen": -3.021578550338745, "logits/rejected": -3.0505926609039307, "logps/chosen": -2.2983338832855225, "logps/rejected": -287.8603515625, "loss": 0.0768, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2936291992664337, "rewards/margins": 2.8169479370117188, "rewards/rejected": -2.5233187675476074, "step": 16200 }, { "epoch": 0.65, "eval_logits/chosen": -3.0697882175445557, "eval_logits/rejected": -3.096447706222534, "eval_logps/chosen": -0.14493978023529053, "eval_logps/rejected": -284.19439697265625, "eval_loss": 0.06048208475112915, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31515011191368103, "eval_rewards/margins": 2.796938419342041, "eval_rewards/rejected": -2.481788158416748, "eval_runtime": 2.5395, "eval_samples_per_second": 1.969, "eval_steps_per_second": 0.394, "step": 16200 }, { "epoch": 0.65, "learning_rate": 1.65808349330903e-06, "logits/chosen": -3.029789447784424, "logits/rejected": -3.0596954822540283, "logps/chosen": -0.9827854037284851, "logps/rejected": -288.1858215332031, "loss": 0.065, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.3073241114616394, "rewards/margins": 2.8327250480651855, "rewards/rejected": -2.5254008769989014, "step": 16210 }, { "epoch": 0.65, "learning_rate": 1.6547975523530074e-06, "logits/chosen": -3.027160167694092, "logits/rejected": -3.05775785446167, "logps/chosen": -1.498854160308838, "logps/rejected": -286.8951110839844, "loss": 0.0707, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.30079230666160583, "rewards/margins": 2.8150172233581543, "rewards/rejected": -2.5142250061035156, "step": 16220 }, { "epoch": 0.65, "learning_rate": 1.651513259162405e-06, "logits/chosen": -3.014577865600586, "logits/rejected": -3.0457208156585693, "logps/chosen": -3.7812283039093018, "logps/rejected": -287.3572692871094, "loss": 0.0917, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27966177463531494, "rewards/margins": 2.795984983444214, "rewards/rejected": -2.5163230895996094, "step": 16230 }, { "epoch": 0.65, "learning_rate": 1.6482306201401211e-06, "logits/chosen": -3.039668321609497, "logits/rejected": -3.0673046112060547, "logps/chosen": -0.22881916165351868, "logps/rejected": -292.32293701171875, "loss": 0.0547, "rewards/accuracies": 1.0, "rewards/chosen": 0.31308960914611816, "rewards/margins": 2.884754180908203, "rewards/rejected": -2.571664333343506, "step": 16240 }, { "epoch": 0.65, "learning_rate": 1.6449496416858285e-06, "logits/chosen": -3.0218489170074463, "logits/rejected": -3.0523488521575928, "logps/chosen": -6.423367500305176, "logps/rejected": -283.92596435546875, "loss": 0.1175, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.25381556153297424, "rewards/margins": 2.737182378768921, "rewards/rejected": -2.4833669662475586, "step": 16250 }, { "epoch": 0.65, "learning_rate": 1.6416703301959622e-06, "logits/chosen": -3.0042667388916016, "logits/rejected": -3.0358738899230957, "logps/chosen": -3.808058500289917, "logps/rejected": -288.5841979980469, "loss": 0.0911, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2787301242351532, "rewards/margins": 2.8108363151550293, "rewards/rejected": -2.5321059226989746, "step": 16260 }, { "epoch": 0.65, "learning_rate": 1.6383926920637077e-06, "logits/chosen": -3.024531841278076, "logits/rejected": -3.0559494495391846, "logps/chosen": -3.8167495727539062, "logps/rejected": -286.3826599121094, "loss": 0.0921, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27956536412239075, "rewards/margins": 2.789175510406494, "rewards/rejected": -2.509610652923584, "step": 16270 }, { "epoch": 0.65, "learning_rate": 1.6351167336789882e-06, "logits/chosen": -3.008134126663208, "logits/rejected": -3.038771152496338, "logps/chosen": -0.18017248809337616, "logps/rejected": -290.6046447753906, "loss": 0.056, "rewards/accuracies": 1.0, "rewards/chosen": 0.3152341842651367, "rewards/margins": 2.8623671531677246, "rewards/rejected": -2.547132968902588, "step": 16280 }, { "epoch": 0.65, "learning_rate": 1.6318424614284525e-06, "logits/chosen": -3.020630359649658, "logits/rejected": -3.0506246089935303, "logps/chosen": -0.27929431200027466, "logps/rejected": -289.6347351074219, "loss": 0.0568, "rewards/accuracies": 1.0, "rewards/chosen": 0.31263965368270874, "rewards/margins": 2.855104446411133, "rewards/rejected": -2.5424647331237793, "step": 16290 }, { "epoch": 0.65, "learning_rate": 1.6285698816954626e-06, "logits/chosen": -3.0266330242156982, "logits/rejected": -3.054236888885498, "logps/chosen": -3.8802623748779297, "logps/rejected": -285.7914733886719, "loss": 0.0923, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27864986658096313, "rewards/margins": 2.783613681793213, "rewards/rejected": -2.5049636363983154, "step": 16300 }, { "epoch": 0.65, "eval_logits/chosen": -3.0711021423339844, "eval_logits/rejected": -3.0964062213897705, "eval_logps/chosen": -0.18515048921108246, "eval_logps/rejected": -284.1095886230469, "eval_loss": 0.06060536578297615, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31474804878234863, "eval_rewards/margins": 2.7956881523132324, "eval_rewards/rejected": -2.480940341949463, "eval_runtime": 2.5409, "eval_samples_per_second": 1.968, "eval_steps_per_second": 0.394, "step": 16300 }, { "epoch": 0.65, "learning_rate": 1.6252990008600782e-06, "logits/chosen": -2.9943947792053223, "logits/rejected": -3.0219404697418213, "logps/chosen": -0.2027575969696045, "logps/rejected": -291.050048828125, "loss": 0.0555, "rewards/accuracies": 1.0, "rewards/chosen": 0.31394585967063904, "rewards/margins": 2.8713269233703613, "rewards/rejected": -2.5573811531066895, "step": 16310 }, { "epoch": 0.65, "learning_rate": 1.6220298252990502e-06, "logits/chosen": -3.020808458328247, "logits/rejected": -3.050124168395996, "logps/chosen": -8.564538955688477, "logps/rejected": -279.81793212890625, "loss": 0.136, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.23334869742393494, "rewards/margins": 2.6752982139587402, "rewards/rejected": -2.4419496059417725, "step": 16320 }, { "epoch": 0.65, "learning_rate": 1.6187623613858038e-06, "logits/chosen": -2.998875141143799, "logits/rejected": -3.0299320220947266, "logps/chosen": -3.623887538909912, "logps/rejected": -289.8062744140625, "loss": 0.0884, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2817783057689667, "rewards/margins": 2.822922468185425, "rewards/rejected": -2.541144609451294, "step": 16330 }, { "epoch": 0.65, "learning_rate": 1.6154966154904265e-06, "logits/chosen": -2.9893569946289062, "logits/rejected": -3.0191709995269775, "logps/chosen": -2.710435152053833, "logps/rejected": -287.29248046875, "loss": 0.08, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2895277142524719, "rewards/margins": 2.8107831478118896, "rewards/rejected": -2.5212552547454834, "step": 16340 }, { "epoch": 0.65, "learning_rate": 1.612232593979658e-06, "logits/chosen": -3.010054111480713, "logits/rejected": -3.0407519340515137, "logps/chosen": -0.2605161666870117, "logps/rejected": -289.29486083984375, "loss": 0.0569, "rewards/accuracies": 1.0, "rewards/chosen": 0.31457167863845825, "rewards/margins": 2.8518338203430176, "rewards/rejected": -2.537262201309204, "step": 16350 }, { "epoch": 0.65, "learning_rate": 1.6089703032168736e-06, "logits/chosen": -3.002986192703247, "logits/rejected": -3.0319409370422363, "logps/chosen": -5.998864650726318, "logps/rejected": -285.146240234375, "loss": 0.1128, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2563631534576416, "rewards/margins": 2.758862018585205, "rewards/rejected": -2.5024986267089844, "step": 16360 }, { "epoch": 0.65, "learning_rate": 1.605709749562077e-06, "logits/chosen": -3.0193517208099365, "logits/rejected": -3.0479636192321777, "logps/chosen": -0.2203955203294754, "logps/rejected": -292.0693054199219, "loss": 0.055, "rewards/accuracies": 1.0, "rewards/chosen": 0.3176235854625702, "rewards/margins": 2.8787410259246826, "rewards/rejected": -2.561117649078369, "step": 16370 }, { "epoch": 0.66, "learning_rate": 1.6024509393718847e-06, "logits/chosen": -3.015592575073242, "logits/rejected": -3.0450539588928223, "logps/chosen": -3.297527313232422, "logps/rejected": -287.2058410644531, "loss": 0.0867, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28415340185165405, "rewards/margins": 2.800589084625244, "rewards/rejected": -2.5164356231689453, "step": 16380 }, { "epoch": 0.66, "learning_rate": 1.5991938789995138e-06, "logits/chosen": -3.0342297554016113, "logits/rejected": -3.0614824295043945, "logps/chosen": -0.19027046859264374, "logps/rejected": -290.4726867675781, "loss": 0.056, "rewards/accuracies": 1.0, "rewards/chosen": 0.31373053789138794, "rewards/margins": 2.863480806350708, "rewards/rejected": -2.5497498512268066, "step": 16390 }, { "epoch": 0.66, "learning_rate": 1.5959385747947697e-06, "logits/chosen": -2.998467206954956, "logits/rejected": -3.026881694793701, "logps/chosen": -4.35853385925293, "logps/rejected": -283.05938720703125, "loss": 0.0956, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.270334929227829, "rewards/margins": 2.750171661376953, "rewards/rejected": -2.4798367023468018, "step": 16400 }, { "epoch": 0.66, "eval_logits/chosen": -3.0699660778045654, "eval_logits/rejected": -3.0961413383483887, "eval_logps/chosen": -0.15517470240592957, "eval_logps/rejected": -284.12969970703125, "eval_loss": 0.060578517615795135, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3150478005409241, "eval_rewards/margins": 2.796189069747925, "eval_rewards/rejected": -2.4811413288116455, "eval_runtime": 2.5476, "eval_samples_per_second": 1.963, "eval_steps_per_second": 0.393, "step": 16400 }, { "epoch": 0.66, "learning_rate": 1.5926850331040345e-06, "logits/chosen": -3.011420488357544, "logits/rejected": -3.0417890548706055, "logps/chosen": -0.8807978630065918, "logps/rejected": -290.221923828125, "loss": 0.0577, "rewards/accuracies": 1.0, "rewards/chosen": 0.3080647587776184, "rewards/margins": 2.8572638034820557, "rewards/rejected": -2.549199342727661, "step": 16410 }, { "epoch": 0.66, "learning_rate": 1.5894332602702545e-06, "logits/chosen": -3.038438558578491, "logits/rejected": -3.0653135776519775, "logps/chosen": -0.8092816472053528, "logps/rejected": -286.12457275390625, "loss": 0.0601, "rewards/accuracies": 1.0, "rewards/chosen": 0.3089962601661682, "rewards/margins": 2.8155109882354736, "rewards/rejected": -2.5065150260925293, "step": 16420 }, { "epoch": 0.66, "learning_rate": 1.5861832626329282e-06, "logits/chosen": -2.9955971240997314, "logits/rejected": -3.024306058883667, "logps/chosen": -3.3056647777557373, "logps/rejected": -288.2052917480469, "loss": 0.0855, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28466901183128357, "rewards/margins": 2.8138210773468018, "rewards/rejected": -2.5291523933410645, "step": 16430 }, { "epoch": 0.66, "learning_rate": 1.58293504652809e-06, "logits/chosen": -3.013485908508301, "logits/rejected": -3.042703151702881, "logps/chosen": -3.8787460327148438, "logps/rejected": -286.62677001953125, "loss": 0.0925, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27779850363731384, "rewards/margins": 2.791189670562744, "rewards/rejected": -2.5133910179138184, "step": 16440 }, { "epoch": 0.66, "learning_rate": 1.5796886182883053e-06, "logits/chosen": -3.000614643096924, "logits/rejected": -3.032233476638794, "logps/chosen": -0.18853922188282013, "logps/rejected": -292.9788513183594, "loss": 0.0543, "rewards/accuracies": 1.0, "rewards/chosen": 0.3146760165691376, "rewards/margins": 2.888321876525879, "rewards/rejected": -2.573646068572998, "step": 16450 }, { "epoch": 0.66, "learning_rate": 1.5764439842426516e-06, "logits/chosen": -3.0237982273101807, "logits/rejected": -3.0522892475128174, "logps/chosen": -2.800323963165283, "logps/rejected": -288.0635986328125, "loss": 0.0809, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2880081832408905, "rewards/margins": 2.8166537284851074, "rewards/rejected": -2.5286452770233154, "step": 16460 }, { "epoch": 0.66, "learning_rate": 1.5732011507167082e-06, "logits/chosen": -3.0118167400360107, "logits/rejected": -3.0418801307678223, "logps/chosen": -0.8645929098129272, "logps/rejected": -288.01434326171875, "loss": 0.0597, "rewards/accuracies": 1.0, "rewards/chosen": 0.30627018213272095, "rewards/margins": 2.8355166912078857, "rewards/rejected": -2.5292460918426514, "step": 16470 }, { "epoch": 0.66, "learning_rate": 1.5699601240325474e-06, "logits/chosen": -3.035339832305908, "logits/rejected": -3.0638556480407715, "logps/chosen": -0.17861448228359222, "logps/rejected": -290.24847412109375, "loss": 0.0562, "rewards/accuracies": 1.0, "rewards/chosen": 0.31407788395881653, "rewards/margins": 2.860225200653076, "rewards/rejected": -2.546147346496582, "step": 16480 }, { "epoch": 0.66, "learning_rate": 1.5667209105087134e-06, "logits/chosen": -3.0132129192352295, "logits/rejected": -3.041710138320923, "logps/chosen": -5.8827409744262695, "logps/rejected": -283.49884033203125, "loss": 0.1119, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.25877439975738525, "rewards/margins": 2.73589825630188, "rewards/rejected": -2.477123737335205, "step": 16490 }, { "epoch": 0.66, "learning_rate": 1.56348351646022e-06, "logits/chosen": -3.013010263442993, "logits/rejected": -3.042860984802246, "logps/chosen": -0.1771104484796524, "logps/rejected": -291.5143127441406, "loss": 0.0552, "rewards/accuracies": 1.0, "rewards/chosen": 0.316184937953949, "rewards/margins": 2.875755548477173, "rewards/rejected": -2.559570550918579, "step": 16500 }, { "epoch": 0.66, "eval_logits/chosen": -3.070254325866699, "eval_logits/rejected": -3.0965654850006104, "eval_logps/chosen": -0.15485908091068268, "eval_logps/rejected": -284.1294250488281, "eval_loss": 0.060529422014951706, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31505095958709717, "eval_rewards/margins": 2.796189308166504, "eval_rewards/rejected": -2.481138229370117, "eval_runtime": 2.5375, "eval_samples_per_second": 1.97, "eval_steps_per_second": 0.394, "step": 16500 }, { "epoch": 0.66, "learning_rate": 1.5602479481985333e-06, "logits/chosen": -3.0205371379852295, "logits/rejected": -3.052062749862671, "logps/chosen": -0.20174220204353333, "logps/rejected": -290.1114807128906, "loss": 0.0562, "rewards/accuracies": 1.0, "rewards/chosen": 0.31502217054367065, "rewards/margins": 2.860435962677002, "rewards/rejected": -2.5454142093658447, "step": 16510 }, { "epoch": 0.66, "learning_rate": 1.557014212031559e-06, "logits/chosen": -3.0156190395355225, "logits/rejected": -3.0431604385375977, "logps/chosen": -7.456724643707275, "logps/rejected": -284.587646484375, "loss": 0.1274, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.23989292979240417, "rewards/margins": 2.733884334564209, "rewards/rejected": -2.4939916133880615, "step": 16520 }, { "epoch": 0.66, "learning_rate": 1.5537823142636304e-06, "logits/chosen": -3.031494617462158, "logits/rejected": -3.0590624809265137, "logps/chosen": -2.9149062633514404, "logps/rejected": -287.62213134765625, "loss": 0.0819, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2891761362552643, "rewards/margins": 2.811718225479126, "rewards/rejected": -2.5225417613983154, "step": 16530 }, { "epoch": 0.66, "learning_rate": 1.5505522611954977e-06, "logits/chosen": -3.0008339881896973, "logits/rejected": -3.028841495513916, "logps/chosen": -7.360565185546875, "logps/rejected": -282.1570739746094, "loss": 0.1284, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24273553490638733, "rewards/margins": 2.7104787826538086, "rewards/rejected": -2.467743396759033, "step": 16540 }, { "epoch": 0.66, "learning_rate": 1.547324059124315e-06, "logits/chosen": -3.041374683380127, "logits/rejected": -3.06969952583313, "logps/chosen": -3.0397844314575195, "logps/rejected": -288.50897216796875, "loss": 0.084, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28691166639328003, "rewards/margins": 2.819826126098633, "rewards/rejected": -2.532914638519287, "step": 16550 }, { "epoch": 0.66, "learning_rate": 1.544097714343627e-06, "logits/chosen": -3.0313830375671387, "logits/rejected": -3.0614235401153564, "logps/chosen": -3.0898849964141846, "logps/rejected": -287.97161865234375, "loss": 0.0829, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28461623191833496, "rewards/margins": 2.813955307006836, "rewards/rejected": -2.529339075088501, "step": 16560 }, { "epoch": 0.66, "learning_rate": 1.5408732331433596e-06, "logits/chosen": -3.0109353065490723, "logits/rejected": -3.0404722690582275, "logps/chosen": -3.4610373973846436, "logps/rejected": -286.3028869628906, "loss": 0.0874, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28258591890335083, "rewards/margins": 2.7924587726593018, "rewards/rejected": -2.5098729133605957, "step": 16570 }, { "epoch": 0.66, "learning_rate": 1.5376506218098017e-06, "logits/chosen": -3.0260519981384277, "logits/rejected": -3.0551974773406982, "logps/chosen": -0.8293204307556152, "logps/rejected": -287.12054443359375, "loss": 0.0647, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.3095115125179291, "rewards/margins": 2.8251452445983887, "rewards/rejected": -2.5156333446502686, "step": 16580 }, { "epoch": 0.66, "learning_rate": 1.5344298866256002e-06, "logits/chosen": -3.003955125808716, "logits/rejected": -3.033331871032715, "logps/chosen": -8.383515357971191, "logps/rejected": -280.04071044921875, "loss": 0.1392, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.23085716366767883, "rewards/margins": 2.6815500259399414, "rewards/rejected": -2.450692892074585, "step": 16590 }, { "epoch": 0.66, "learning_rate": 1.5312110338697427e-06, "logits/chosen": -3.0206246376037598, "logits/rejected": -3.0513174533843994, "logps/chosen": -3.758796215057373, "logps/rejected": -286.3612365722656, "loss": 0.0921, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2803575396537781, "rewards/margins": 2.7887232303619385, "rewards/rejected": -2.5083658695220947, "step": 16600 }, { "epoch": 0.66, "eval_logits/chosen": -3.070540428161621, "eval_logits/rejected": -3.096243143081665, "eval_logps/chosen": -0.1595509946346283, "eval_logps/rejected": -283.9339599609375, "eval_loss": 0.06063425540924072, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31500402092933655, "eval_rewards/margins": 2.7941880226135254, "eval_rewards/rejected": -2.4791836738586426, "eval_runtime": 2.5396, "eval_samples_per_second": 1.969, "eval_steps_per_second": 0.394, "step": 16600 }, { "epoch": 0.66, "learning_rate": 1.5279940698175484e-06, "logits/chosen": -3.0260541439056396, "logits/rejected": -3.0543665885925293, "logps/chosen": -6.720359802246094, "logps/rejected": -282.33624267578125, "loss": 0.1219, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24920251965522766, "rewards/margins": 2.7193870544433594, "rewards/rejected": -2.470184087753296, "step": 16610 }, { "epoch": 0.66, "learning_rate": 1.524779000740651e-06, "logits/chosen": -3.020236015319824, "logits/rejected": -3.0508151054382324, "logps/chosen": -4.9243083000183105, "logps/rejected": -284.9336853027344, "loss": 0.1022, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.26838722825050354, "rewards/margins": 2.7622528076171875, "rewards/rejected": -2.493865489959717, "step": 16620 }, { "epoch": 0.67, "learning_rate": 1.521565832906994e-06, "logits/chosen": -3.0069985389709473, "logits/rejected": -3.039262056350708, "logps/chosen": -0.1815185546875, "logps/rejected": -290.11688232421875, "loss": 0.056, "rewards/accuracies": 1.0, "rewards/chosen": 0.31536582112312317, "rewards/margins": 2.861616611480713, "rewards/rejected": -2.546250581741333, "step": 16630 }, { "epoch": 0.67, "learning_rate": 1.5183545725808127e-06, "logits/chosen": -3.027838945388794, "logits/rejected": -3.058008909225464, "logps/chosen": -3.3343169689178467, "logps/rejected": -286.83221435546875, "loss": 0.0868, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2830333709716797, "rewards/margins": 2.7982068061828613, "rewards/rejected": -2.5151734352111816, "step": 16640 }, { "epoch": 0.67, "learning_rate": 1.5151452260226224e-06, "logits/chosen": -3.0127599239349365, "logits/rejected": -3.043036937713623, "logps/chosen": -0.19326269626617432, "logps/rejected": -291.4412536621094, "loss": 0.0555, "rewards/accuracies": 1.0, "rewards/chosen": 0.31564420461654663, "rewards/margins": 2.8712658882141113, "rewards/rejected": -2.55562162399292, "step": 16650 }, { "epoch": 0.67, "learning_rate": 1.5119377994892095e-06, "logits/chosen": -3.0412230491638184, "logits/rejected": -3.0682053565979004, "logps/chosen": -3.966150999069214, "logps/rejected": -283.5875549316406, "loss": 0.0955, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27856117486953735, "rewards/margins": 2.7617907524108887, "rewards/rejected": -2.483229875564575, "step": 16660 }, { "epoch": 0.67, "learning_rate": 1.5087322992336149e-06, "logits/chosen": -2.97214937210083, "logits/rejected": -3.005687713623047, "logps/chosen": -2.8129818439483643, "logps/rejected": -285.9007873535156, "loss": 0.0824, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28725165128707886, "rewards/margins": 2.79573130607605, "rewards/rejected": -2.508479356765747, "step": 16670 }, { "epoch": 0.67, "learning_rate": 1.505528731505126e-06, "logits/chosen": -3.0148367881774902, "logits/rejected": -3.045407772064209, "logps/chosen": -4.257229804992676, "logps/rejected": -284.0137023925781, "loss": 0.0977, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.27552860975265503, "rewards/margins": 2.7583611011505127, "rewards/rejected": -2.482832431793213, "step": 16680 }, { "epoch": 0.67, "learning_rate": 1.502327102549262e-06, "logits/chosen": -3.0186355113983154, "logits/rejected": -3.0468027591705322, "logps/chosen": -8.107789993286133, "logps/rejected": -281.7931823730469, "loss": 0.1197, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.23705962300300598, "rewards/margins": 2.7015938758850098, "rewards/rejected": -2.464534282684326, "step": 16690 }, { "epoch": 0.67, "learning_rate": 1.4991274186077632e-06, "logits/chosen": -3.069828510284424, "logits/rejected": -3.097304105758667, "logps/chosen": -5.019820213317871, "logps/rejected": -284.939697265625, "loss": 0.1013, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.26807695627212524, "rewards/margins": 2.7587544918060303, "rewards/rejected": -2.49067759513855, "step": 16700 }, { "epoch": 0.67, "eval_logits/chosen": -3.0692667961120605, "eval_logits/rejected": -3.095215082168579, "eval_logps/chosen": -0.16406603157520294, "eval_logps/rejected": -284.13848876953125, "eval_loss": 0.060545384883880615, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3149588704109192, "eval_rewards/margins": 2.7961878776550293, "eval_rewards/rejected": -2.4812285900115967, "eval_runtime": 2.541, "eval_samples_per_second": 1.968, "eval_steps_per_second": 0.394, "step": 16700 }, { "epoch": 0.67, "learning_rate": 1.4959296859185754e-06, "logits/chosen": -3.0268890857696533, "logits/rejected": -3.05617356300354, "logps/chosen": -7.876406192779541, "logps/rejected": -280.3373718261719, "loss": 0.1373, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.2343808114528656, "rewards/margins": 2.688239812850952, "rewards/rejected": -2.4538588523864746, "step": 16710 }, { "epoch": 0.67, "learning_rate": 1.4927339107158437e-06, "logits/chosen": -3.01871395111084, "logits/rejected": -3.0472865104675293, "logps/chosen": -0.16690579056739807, "logps/rejected": -290.98504638671875, "loss": 0.0557, "rewards/accuracies": 1.0, "rewards/chosen": 0.3145678639411926, "rewards/margins": 2.871798515319824, "rewards/rejected": -2.5572304725646973, "step": 16720 }, { "epoch": 0.67, "learning_rate": 1.4895400992298942e-06, "logits/chosen": -3.0140795707702637, "logits/rejected": -3.045462131500244, "logps/chosen": -3.079259157180786, "logps/rejected": -285.8410949707031, "loss": 0.0856, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2865827977657318, "rewards/margins": 2.788691997528076, "rewards/rejected": -2.5021090507507324, "step": 16730 }, { "epoch": 0.67, "learning_rate": 1.4863482576872276e-06, "logits/chosen": -3.022059679031372, "logits/rejected": -3.050328016281128, "logps/chosen": -8.150382041931152, "logps/rejected": -279.01190185546875, "loss": 0.1367, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.2357555329799652, "rewards/margins": 2.6716115474700928, "rewards/rejected": -2.4358558654785156, "step": 16740 }, { "epoch": 0.67, "learning_rate": 1.4831583923105e-06, "logits/chosen": -2.9932682514190674, "logits/rejected": -3.0217957496643066, "logps/chosen": -5.604483127593994, "logps/rejected": -281.2306823730469, "loss": 0.1112, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2591361105442047, "rewards/margins": 2.7187705039978027, "rewards/rejected": -2.459634304046631, "step": 16750 }, { "epoch": 0.67, "learning_rate": 1.4799705093185181e-06, "logits/chosen": -3.0190694332122803, "logits/rejected": -3.0459840297698975, "logps/chosen": -6.116730690002441, "logps/rejected": -284.88262939453125, "loss": 0.1144, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.25558626651763916, "rewards/margins": 2.7555253505706787, "rewards/rejected": -2.49993896484375, "step": 16760 }, { "epoch": 0.67, "learning_rate": 1.4767846149262238e-06, "logits/chosen": -3.001539468765259, "logits/rejected": -3.0325381755828857, "logps/chosen": -3.840409755706787, "logps/rejected": -286.7460632324219, "loss": 0.0922, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2791980504989624, "rewards/margins": 2.7930822372436523, "rewards/rejected": -2.5138843059539795, "step": 16770 }, { "epoch": 0.67, "learning_rate": 1.4736007153446803e-06, "logits/chosen": -3.013788938522339, "logits/rejected": -3.0442802906036377, "logps/chosen": -0.5874699354171753, "logps/rejected": -288.7613830566406, "loss": 0.0639, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.3103863000869751, "rewards/margins": 2.8487448692321777, "rewards/rejected": -2.538358688354492, "step": 16780 }, { "epoch": 0.67, "learning_rate": 1.4704188167810635e-06, "logits/chosen": -3.0273234844207764, "logits/rejected": -3.0571441650390625, "logps/chosen": -3.1494197845458984, "logps/rejected": -286.99725341796875, "loss": 0.085, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2822128236293793, "rewards/margins": 2.8044848442077637, "rewards/rejected": -2.5222718715667725, "step": 16790 }, { "epoch": 0.67, "learning_rate": 1.467238925438646e-06, "logits/chosen": -3.047241687774658, "logits/rejected": -3.0753626823425293, "logps/chosen": -0.4152262806892395, "logps/rejected": -288.11614990234375, "loss": 0.0583, "rewards/accuracies": 1.0, "rewards/chosen": 0.31344449520111084, "rewards/margins": 2.840935230255127, "rewards/rejected": -2.5274906158447266, "step": 16800 }, { "epoch": 0.67, "eval_logits/chosen": -3.0703492164611816, "eval_logits/rejected": -3.0956597328186035, "eval_logps/chosen": -0.13849003612995148, "eval_logps/rejected": -283.9129333496094, "eval_loss": 0.060699693858623505, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3152146339416504, "eval_rewards/margins": 2.7941880226135254, "eval_rewards/rejected": -2.478973388671875, "eval_runtime": 2.5364, "eval_samples_per_second": 1.971, "eval_steps_per_second": 0.394, "step": 16800 }, { "epoch": 0.67, "learning_rate": 1.46406104751679e-06, "logits/chosen": -3.047081470489502, "logits/rejected": -3.076572895050049, "logps/chosen": -3.0683417320251465, "logps/rejected": -287.97589111328125, "loss": 0.0837, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2849496901035309, "rewards/margins": 2.814178943634033, "rewards/rejected": -2.529228925704956, "step": 16810 }, { "epoch": 0.67, "learning_rate": 1.4608851892109305e-06, "logits/chosen": -3.023054838180542, "logits/rejected": -3.052030086517334, "logps/chosen": -3.5051162242889404, "logps/rejected": -286.3328857421875, "loss": 0.0878, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2797238826751709, "rewards/margins": 2.796128988265991, "rewards/rejected": -2.5164051055908203, "step": 16820 }, { "epoch": 0.67, "learning_rate": 1.457711356712567e-06, "logits/chosen": -3.024583339691162, "logits/rejected": -3.0527548789978027, "logps/chosen": -0.6150221824645996, "logps/rejected": -285.48040771484375, "loss": 0.0646, "rewards/accuracies": 1.0, "rewards/chosen": 0.3111326992511749, "rewards/margins": 2.8102290630340576, "rewards/rejected": -2.499096393585205, "step": 16830 }, { "epoch": 0.67, "learning_rate": 1.4545395562092467e-06, "logits/chosen": -3.050981283187866, "logits/rejected": -3.0787570476531982, "logps/chosen": -6.653090000152588, "logps/rejected": -282.40509033203125, "loss": 0.1209, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2499408721923828, "rewards/margins": 2.7165191173553467, "rewards/rejected": -2.466578483581543, "step": 16840 }, { "epoch": 0.67, "learning_rate": 1.4513697938845571e-06, "logits/chosen": -3.0249247550964355, "logits/rejected": -3.0539870262145996, "logps/chosen": -0.18720033764839172, "logps/rejected": -292.1741638183594, "loss": 0.0548, "rewards/accuracies": 1.0, "rewards/chosen": 0.31617483496665955, "rewards/margins": 2.8804633617401123, "rewards/rejected": -2.56428861618042, "step": 16850 }, { "epoch": 0.67, "learning_rate": 1.4482020759181136e-06, "logits/chosen": -3.024162769317627, "logits/rejected": -3.051290988922119, "logps/chosen": -7.052886962890625, "logps/rejected": -283.4461364746094, "loss": 0.1245, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24677443504333496, "rewards/margins": 2.7274701595306396, "rewards/rejected": -2.480695962905884, "step": 16860 }, { "epoch": 0.67, "learning_rate": 1.4450364084855433e-06, "logits/chosen": -3.0273356437683105, "logits/rejected": -3.0545687675476074, "logps/chosen": -4.845399379730225, "logps/rejected": -287.1412353515625, "loss": 0.0946, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2680504322052002, "rewards/margins": 2.782707929611206, "rewards/rejected": -2.514657497406006, "step": 16870 }, { "epoch": 0.68, "learning_rate": 1.4418727977584774e-06, "logits/chosen": -3.0145533084869385, "logits/rejected": -3.0442070960998535, "logps/chosen": -0.2408263236284256, "logps/rejected": -290.04974365234375, "loss": 0.0562, "rewards/accuracies": 1.0, "rewards/chosen": 0.31567373871803284, "rewards/margins": 2.8614847660064697, "rewards/rejected": -2.545811414718628, "step": 16880 }, { "epoch": 0.68, "learning_rate": 1.438711249904536e-06, "logits/chosen": -3.027634859085083, "logits/rejected": -3.056579113006592, "logps/chosen": -3.8503613471984863, "logps/rejected": -287.93267822265625, "loss": 0.0919, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27800074219703674, "rewards/margins": 2.799656391143799, "rewards/rejected": -2.521655797958374, "step": 16890 }, { "epoch": 0.68, "learning_rate": 1.4355517710873184e-06, "logits/chosen": -3.0160160064697266, "logits/rejected": -3.046125888824463, "logps/chosen": -0.18088161945343018, "logps/rejected": -289.6818542480469, "loss": 0.0564, "rewards/accuracies": 1.0, "rewards/chosen": 0.31637340784072876, "rewards/margins": 2.8578312397003174, "rewards/rejected": -2.5414576530456543, "step": 16900 }, { "epoch": 0.68, "eval_logits/chosen": -3.0707173347473145, "eval_logits/rejected": -3.097022533416748, "eval_logps/chosen": -0.15288792550563812, "eval_logps/rejected": -284.1773376464844, "eval_loss": 0.06047062948346138, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31507065892219543, "eval_rewards/margins": 2.7966883182525635, "eval_rewards/rejected": -2.4816176891326904, "eval_runtime": 2.5361, "eval_samples_per_second": 1.972, "eval_steps_per_second": 0.394, "step": 16900 }, { "epoch": 0.68, "learning_rate": 1.4323943674663914e-06, "logits/chosen": -3.030766487121582, "logits/rejected": -3.0611774921417236, "logps/chosen": -0.5097833275794983, "logps/rejected": -286.9996337890625, "loss": 0.0642, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.3119841516017914, "rewards/margins": 2.8281400203704834, "rewards/rejected": -2.51615571975708, "step": 16910 }, { "epoch": 0.68, "learning_rate": 1.4292390451972745e-06, "logits/chosen": -3.019165515899658, "logits/rejected": -3.048450469970703, "logps/chosen": -0.22943496704101562, "logps/rejected": -290.3204040527344, "loss": 0.0562, "rewards/accuracies": 1.0, "rewards/chosen": 0.3131713271141052, "rewards/margins": 2.862921953201294, "rewards/rejected": -2.549750566482544, "step": 16920 }, { "epoch": 0.68, "learning_rate": 1.4260858104314299e-06, "logits/chosen": -3.028331756591797, "logits/rejected": -3.0585293769836426, "logps/chosen": -0.1746743619441986, "logps/rejected": -290.03973388671875, "loss": 0.0561, "rewards/accuracies": 1.0, "rewards/chosen": 0.3150883615016937, "rewards/margins": 2.8623249530792236, "rewards/rejected": -2.547236680984497, "step": 16930 }, { "epoch": 0.68, "learning_rate": 1.42293466931625e-06, "logits/chosen": -3.0084424018859863, "logits/rejected": -3.0406250953674316, "logps/chosen": -0.28060537576675415, "logps/rejected": -289.93939208984375, "loss": 0.0565, "rewards/accuracies": 1.0, "rewards/chosen": 0.31523746252059937, "rewards/margins": 2.855865001678467, "rewards/rejected": -2.5406272411346436, "step": 16940 }, { "epoch": 0.68, "learning_rate": 1.419785627995044e-06, "logits/chosen": -3.0307538509368896, "logits/rejected": -3.060084819793701, "logps/chosen": -6.926133632659912, "logps/rejected": -282.2654724121094, "loss": 0.1228, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2501484751701355, "rewards/margins": 2.7191836833953857, "rewards/rejected": -2.4690353870391846, "step": 16950 }, { "epoch": 0.68, "learning_rate": 1.4166386926070322e-06, "logits/chosen": -3.0181775093078613, "logits/rejected": -3.050724744796753, "logps/chosen": -0.16632576286792755, "logps/rejected": -292.2181396484375, "loss": 0.0548, "rewards/accuracies": 1.0, "rewards/chosen": 0.31514012813568115, "rewards/margins": 2.8815572261810303, "rewards/rejected": -2.5664172172546387, "step": 16960 }, { "epoch": 0.68, "learning_rate": 1.4134938692873246e-06, "logits/chosen": -3.011220932006836, "logits/rejected": -3.0421934127807617, "logps/chosen": -4.4767937660217285, "logps/rejected": -284.4206848144531, "loss": 0.0977, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2727833390235901, "rewards/margins": 2.762021780014038, "rewards/rejected": -2.4892385005950928, "step": 16970 }, { "epoch": 0.68, "learning_rate": 1.4103511641669152e-06, "logits/chosen": -3.0303845405578613, "logits/rejected": -3.0593137741088867, "logps/chosen": -0.5192890167236328, "logps/rejected": -289.9402160644531, "loss": 0.0574, "rewards/accuracies": 1.0, "rewards/chosen": 0.31304579973220825, "rewards/margins": 2.856234550476074, "rewards/rejected": -2.5431885719299316, "step": 16980 }, { "epoch": 0.68, "learning_rate": 1.4072105833726685e-06, "logits/chosen": -3.019526481628418, "logits/rejected": -3.0488784313201904, "logps/chosen": -0.18212607502937317, "logps/rejected": -288.74566650390625, "loss": 0.0571, "rewards/accuracies": 1.0, "rewards/chosen": 0.31593939661979675, "rewards/margins": 2.8463869094848633, "rewards/rejected": -2.530447483062744, "step": 16990 }, { "epoch": 0.68, "learning_rate": 1.4040721330273063e-06, "logits/chosen": -2.9925029277801514, "logits/rejected": -3.0227952003479004, "logps/chosen": -3.9100990295410156, "logps/rejected": -286.6326599121094, "loss": 0.0925, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27662867307662964, "rewards/margins": 2.793485641479492, "rewards/rejected": -2.516857147216797, "step": 17000 }, { "epoch": 0.68, "eval_logits/chosen": -3.070481300354004, "eval_logits/rejected": -3.0960233211517334, "eval_logps/chosen": -0.1672990769147873, "eval_logps/rejected": -284.39178466796875, "eval_loss": 0.06031963229179382, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31492653489112854, "eval_rewards/margins": 2.7986886501312256, "eval_rewards/rejected": -2.483762264251709, "eval_runtime": 2.5385, "eval_samples_per_second": 1.97, "eval_steps_per_second": 0.394, "step": 17000 }, { "epoch": 0.68, "learning_rate": 1.4009358192494017e-06, "logits/chosen": -3.012655258178711, "logits/rejected": -3.042903184890747, "logps/chosen": -6.168453693389893, "logps/rejected": -282.3889465332031, "loss": 0.1154, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2582475244998932, "rewards/margins": 2.727105140686035, "rewards/rejected": -2.4688572883605957, "step": 17010 }, { "epoch": 0.68, "learning_rate": 1.397801648153354e-06, "logits/chosen": -3.0330488681793213, "logits/rejected": -3.0616049766540527, "logps/chosen": -3.8499419689178467, "logps/rejected": -289.64483642578125, "loss": 0.0906, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2781457304954529, "rewards/margins": 2.8185088634490967, "rewards/rejected": -2.540363073348999, "step": 17020 }, { "epoch": 0.68, "learning_rate": 1.3946696258493936e-06, "logits/chosen": -3.0374042987823486, "logits/rejected": -3.0658109188079834, "logps/chosen": -0.1898970603942871, "logps/rejected": -292.059814453125, "loss": 0.0551, "rewards/accuracies": 1.0, "rewards/chosen": 0.3162730038166046, "rewards/margins": 2.878474712371826, "rewards/rejected": -2.562201976776123, "step": 17030 }, { "epoch": 0.68, "learning_rate": 1.3915397584435564e-06, "logits/chosen": -3.0268242359161377, "logits/rejected": -3.057021379470825, "logps/chosen": -0.3591463565826416, "logps/rejected": -289.1417541503906, "loss": 0.0568, "rewards/accuracies": 1.0, "rewards/chosen": 0.3167189061641693, "rewards/margins": 2.8526179790496826, "rewards/rejected": -2.5358994007110596, "step": 17040 }, { "epoch": 0.68, "learning_rate": 1.388412052037682e-06, "logits/chosen": -3.031808376312256, "logits/rejected": -3.059906482696533, "logps/chosen": -5.55517578125, "logps/rejected": -283.9244384765625, "loss": 0.1086, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2629479765892029, "rewards/margins": 2.746854066848755, "rewards/rejected": -2.4839060306549072, "step": 17050 }, { "epoch": 0.68, "learning_rate": 1.3852865127293901e-06, "logits/chosen": -3.0185909271240234, "logits/rejected": -3.0476467609405518, "logps/chosen": -1.7035763263702393, "logps/rejected": -287.48370361328125, "loss": 0.0612, "rewards/accuracies": 1.0, "rewards/chosen": 0.29842668771743774, "rewards/margins": 2.821362257003784, "rewards/rejected": -2.522935628890991, "step": 17060 }, { "epoch": 0.68, "learning_rate": 1.3821631466120821e-06, "logits/chosen": -3.0278687477111816, "logits/rejected": -3.0566000938415527, "logps/chosen": -0.1650432050228119, "logps/rejected": -292.7552795410156, "loss": 0.0545, "rewards/accuracies": 1.0, "rewards/chosen": 0.31551456451416016, "rewards/margins": 2.8863494396209717, "rewards/rejected": -2.5708351135253906, "step": 17070 }, { "epoch": 0.68, "learning_rate": 1.3790419597749198e-06, "logits/chosen": -3.012402057647705, "logits/rejected": -3.041232109069824, "logps/chosen": -0.15758544206619263, "logps/rejected": -291.9353332519531, "loss": 0.0551, "rewards/accuracies": 1.0, "rewards/chosen": 0.31664884090423584, "rewards/margins": 2.8770241737365723, "rewards/rejected": -2.560375690460205, "step": 17080 }, { "epoch": 0.68, "learning_rate": 1.375922958302815e-06, "logits/chosen": -3.0117835998535156, "logits/rejected": -3.038914918899536, "logps/chosen": -5.974498271942139, "logps/rejected": -286.35418701171875, "loss": 0.1115, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.25771743059158325, "rewards/margins": 2.7677454948425293, "rewards/rejected": -2.51002836227417, "step": 17090 }, { "epoch": 0.68, "learning_rate": 1.3728061482764238e-06, "logits/chosen": -3.0121607780456543, "logits/rejected": -3.0424602031707764, "logps/chosen": -0.4433276653289795, "logps/rejected": -291.4475402832031, "loss": 0.056, "rewards/accuracies": 1.0, "rewards/chosen": 0.3106141984462738, "rewards/margins": 2.8750555515289307, "rewards/rejected": -2.564441204071045, "step": 17100 }, { "epoch": 0.68, "eval_logits/chosen": -3.0696871280670166, "eval_logits/rejected": -3.095147132873535, "eval_logps/chosen": -0.15241165459156036, "eval_logps/rejected": -284.1518859863281, "eval_loss": 0.06055723503232002, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31507542729377747, "eval_rewards/margins": 2.796438694000244, "eval_rewards/rejected": -2.481362819671631, "eval_runtime": 2.5349, "eval_samples_per_second": 1.972, "eval_steps_per_second": 0.394, "step": 17100 }, { "epoch": 0.68, "learning_rate": 1.369691535772123e-06, "logits/chosen": -3.0133984088897705, "logits/rejected": -3.0433387756347656, "logps/chosen": -8.234275817871094, "logps/rejected": -280.2055969238281, "loss": 0.1355, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.23505838215351105, "rewards/margins": 2.6852030754089355, "rewards/rejected": -2.4501452445983887, "step": 17110 }, { "epoch": 0.68, "learning_rate": 1.3665791268620121e-06, "logits/chosen": -3.019498586654663, "logits/rejected": -3.050245523452759, "logps/chosen": -3.013449192047119, "logps/rejected": -285.9660339355469, "loss": 0.0823, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2843361794948578, "rewards/margins": 2.795501232147217, "rewards/rejected": -2.511164903640747, "step": 17120 }, { "epoch": 0.69, "learning_rate": 1.3634689276138905e-06, "logits/chosen": -3.0235469341278076, "logits/rejected": -3.048687219619751, "logps/chosen": -9.511899948120117, "logps/rejected": -275.7026062011719, "loss": 0.1525, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.2222900390625, "rewards/margins": 2.625469207763672, "rewards/rejected": -2.4031786918640137, "step": 17130 }, { "epoch": 0.69, "learning_rate": 1.3603609440912508e-06, "logits/chosen": -3.024536609649658, "logits/rejected": -3.053208827972412, "logps/chosen": -5.179483413696289, "logps/rejected": -284.5981750488281, "loss": 0.1075, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.264211505651474, "rewards/margins": 2.7541160583496094, "rewards/rejected": -2.4899048805236816, "step": 17140 }, { "epoch": 0.69, "learning_rate": 1.3572551823532654e-06, "logits/chosen": -3.021286725997925, "logits/rejected": -3.049680471420288, "logps/chosen": -2.2465240955352783, "logps/rejected": -289.0710754394531, "loss": 0.0652, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2949848473072052, "rewards/margins": 2.8305702209472656, "rewards/rejected": -2.535585403442383, "step": 17150 }, { "epoch": 0.69, "learning_rate": 1.3541516484547754e-06, "logits/chosen": -3.0037312507629395, "logits/rejected": -3.0344414710998535, "logps/chosen": -3.1827385425567627, "logps/rejected": -288.733154296875, "loss": 0.085, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28538328409194946, "rewards/margins": 2.8171916007995605, "rewards/rejected": -2.531808614730835, "step": 17160 }, { "epoch": 0.69, "learning_rate": 1.3510503484462807e-06, "logits/chosen": -3.0079474449157715, "logits/rejected": -3.0386228561401367, "logps/chosen": -2.5416152477264404, "logps/rejected": -288.5398864746094, "loss": 0.0791, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2898055911064148, "rewards/margins": 2.8248512744903564, "rewards/rejected": -2.5350453853607178, "step": 17170 }, { "epoch": 0.69, "learning_rate": 1.3479512883739233e-06, "logits/chosen": -3.0283541679382324, "logits/rejected": -3.0566511154174805, "logps/chosen": -3.5273098945617676, "logps/rejected": -288.47216796875, "loss": 0.0886, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2822117507457733, "rewards/margins": 2.8105292320251465, "rewards/rejected": -2.5283172130584717, "step": 17180 }, { "epoch": 0.69, "learning_rate": 1.3448544742794792e-06, "logits/chosen": -3.0080642700195312, "logits/rejected": -3.0368897914886475, "logps/chosen": -3.353010892868042, "logps/rejected": -283.7425842285156, "loss": 0.0877, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2856161296367645, "rewards/margins": 2.76631498336792, "rewards/rejected": -2.480699062347412, "step": 17190 }, { "epoch": 0.69, "learning_rate": 1.3417599122003464e-06, "logits/chosen": -3.008664608001709, "logits/rejected": -3.039154529571533, "logps/chosen": -3.8804562091827393, "logps/rejected": -285.05328369140625, "loss": 0.0923, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2768271565437317, "rewards/margins": 2.770395278930664, "rewards/rejected": -2.493567943572998, "step": 17200 }, { "epoch": 0.69, "eval_logits/chosen": -3.0702590942382812, "eval_logits/rejected": -3.0957581996917725, "eval_logps/chosen": -0.16426649689674377, "eval_logps/rejected": -284.23870849609375, "eval_loss": 0.06046100705862045, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3149568736553192, "eval_rewards/margins": 2.7971882820129395, "eval_rewards/rejected": -2.482231616973877, "eval_runtime": 2.5385, "eval_samples_per_second": 1.97, "eval_steps_per_second": 0.394, "step": 17200 }, { "epoch": 0.69, "learning_rate": 1.338667608169531e-06, "logits/chosen": -3.022888660430908, "logits/rejected": -3.0510058403015137, "logps/chosen": -5.092907905578613, "logps/rejected": -283.2092590332031, "loss": 0.106, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2646971344947815, "rewards/margins": 2.743873357772827, "rewards/rejected": -2.4791760444641113, "step": 17210 }, { "epoch": 0.69, "learning_rate": 1.3355775682156395e-06, "logits/chosen": -3.0036630630493164, "logits/rejected": -3.031294345855713, "logps/chosen": -6.687766075134277, "logps/rejected": -285.7857666015625, "loss": 0.12, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.25161224603652954, "rewards/margins": 2.7558913230895996, "rewards/rejected": -2.5042786598205566, "step": 17220 }, { "epoch": 0.69, "learning_rate": 1.3324897983628621e-06, "logits/chosen": -3.0175318717956543, "logits/rejected": -3.048720359802246, "logps/chosen": -0.45630016922950745, "logps/rejected": -288.1964416503906, "loss": 0.0578, "rewards/accuracies": 1.0, "rewards/chosen": 0.3134072721004486, "rewards/margins": 2.8416504859924316, "rewards/rejected": -2.528243064880371, "step": 17230 }, { "epoch": 0.69, "learning_rate": 1.329404304630964e-06, "logits/chosen": -3.014279842376709, "logits/rejected": -3.0453479290008545, "logps/chosen": -0.2629079520702362, "logps/rejected": -288.2630615234375, "loss": 0.0575, "rewards/accuracies": 1.0, "rewards/chosen": 0.31452813744544983, "rewards/margins": 2.8411221504211426, "rewards/rejected": -2.5265936851501465, "step": 17240 }, { "epoch": 0.69, "learning_rate": 1.3263210930352737e-06, "logits/chosen": -3.0070977210998535, "logits/rejected": -3.0383176803588867, "logps/chosen": -0.1674434393644333, "logps/rejected": -290.37469482421875, "loss": 0.0558, "rewards/accuracies": 1.0, "rewards/chosen": 0.31593233346939087, "rewards/margins": 2.865070343017578, "rewards/rejected": -2.549138069152832, "step": 17250 }, { "epoch": 0.69, "learning_rate": 1.3232401695866686e-06, "logits/chosen": -2.999643325805664, "logits/rejected": -3.033475399017334, "logps/chosen": -0.16357269883155823, "logps/rejected": -291.4584045410156, "loss": 0.055, "rewards/accuracies": 1.0, "rewards/chosen": 0.31413644552230835, "rewards/margins": 2.87756085395813, "rewards/rejected": -2.563424587249756, "step": 17260 }, { "epoch": 0.69, "learning_rate": 1.3201615402915686e-06, "logits/chosen": -2.9951648712158203, "logits/rejected": -3.0291099548339844, "logps/chosen": -0.2339058816432953, "logps/rejected": -291.98187255859375, "loss": 0.055, "rewards/accuracies": 1.0, "rewards/chosen": 0.3131881654262543, "rewards/margins": 2.8798294067382812, "rewards/rejected": -2.566641330718994, "step": 17270 }, { "epoch": 0.69, "learning_rate": 1.3170852111519176e-06, "logits/chosen": -3.011437177658081, "logits/rejected": -3.0433449745178223, "logps/chosen": -0.16996555030345917, "logps/rejected": -289.64129638671875, "loss": 0.0563, "rewards/accuracies": 1.0, "rewards/chosen": 0.31285303831100464, "rewards/margins": 2.8583927154541016, "rewards/rejected": -2.545539379119873, "step": 17280 }, { "epoch": 0.69, "learning_rate": 1.3140111881651773e-06, "logits/chosen": -3.0152242183685303, "logits/rejected": -3.0451791286468506, "logps/chosen": -4.013184070587158, "logps/rejected": -282.5010681152344, "loss": 0.0929, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2746261656284332, "rewards/margins": 2.749244213104248, "rewards/rejected": -2.4746179580688477, "step": 17290 }, { "epoch": 0.69, "learning_rate": 1.3109394773243117e-06, "logits/chosen": -3.028050184249878, "logits/rejected": -3.0572242736816406, "logps/chosen": -2.963945150375366, "logps/rejected": -286.84197998046875, "loss": 0.0832, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2862498462200165, "rewards/margins": 2.803539514541626, "rewards/rejected": -2.517289638519287, "step": 17300 }, { "epoch": 0.69, "eval_logits/chosen": -3.0706727504730225, "eval_logits/rejected": -3.0961520671844482, "eval_logps/chosen": -0.16062907874584198, "eval_logps/rejected": -284.610107421875, "eval_loss": 0.06019454076886177, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3149932622909546, "eval_rewards/margins": 2.800938367843628, "eval_rewards/rejected": -2.485945224761963, "eval_runtime": 2.5403, "eval_samples_per_second": 1.968, "eval_steps_per_second": 0.394, "step": 17300 }, { "epoch": 0.69, "learning_rate": 1.30787008461778e-06, "logits/chosen": -3.0061581134796143, "logits/rejected": -3.036393404006958, "logps/chosen": -3.761981964111328, "logps/rejected": -287.1051025390625, "loss": 0.0915, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27753305435180664, "rewards/margins": 2.7957050800323486, "rewards/rejected": -2.518172264099121, "step": 17310 }, { "epoch": 0.69, "learning_rate": 1.3048030160295196e-06, "logits/chosen": -3.011699914932251, "logits/rejected": -3.044250965118408, "logps/chosen": -0.1810985803604126, "logps/rejected": -289.32122802734375, "loss": 0.0568, "rewards/accuracies": 1.0, "rewards/chosen": 0.3161593973636627, "rewards/margins": 2.8504719734191895, "rewards/rejected": -2.5343127250671387, "step": 17320 }, { "epoch": 0.69, "learning_rate": 1.3017382775389376e-06, "logits/chosen": -2.995487689971924, "logits/rejected": -3.029189348220825, "logps/chosen": -0.6961869597434998, "logps/rejected": -291.6742858886719, "loss": 0.0562, "rewards/accuracies": 1.0, "rewards/chosen": 0.3085206151008606, "rewards/margins": 2.8737034797668457, "rewards/rejected": -2.56518292427063, "step": 17330 }, { "epoch": 0.69, "learning_rate": 1.2986758751208983e-06, "logits/chosen": -2.9995505809783936, "logits/rejected": -3.0290257930755615, "logps/chosen": -0.44228529930114746, "logps/rejected": -287.6860046386719, "loss": 0.064, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.3148420453071594, "rewards/margins": 2.830239772796631, "rewards/rejected": -2.515397787094116, "step": 17340 }, { "epoch": 0.69, "learning_rate": 1.2956158147457116e-06, "logits/chosen": -3.0231685638427734, "logits/rejected": -3.0537219047546387, "logps/chosen": -0.1967194378376007, "logps/rejected": -292.9955139160156, "loss": 0.0542, "rewards/accuracies": 1.0, "rewards/chosen": 0.3148500323295593, "rewards/margins": 2.890686511993408, "rewards/rejected": -2.575836658477783, "step": 17350 }, { "epoch": 0.69, "learning_rate": 1.2925581023791239e-06, "logits/chosen": -3.003260850906372, "logits/rejected": -3.0333356857299805, "logps/chosen": -0.16013266146183014, "logps/rejected": -292.0518798828125, "loss": 0.0549, "rewards/accuracies": 1.0, "rewards/chosen": 0.3162152171134949, "rewards/margins": 2.8794422149658203, "rewards/rejected": -2.5632271766662598, "step": 17360 }, { "epoch": 0.69, "learning_rate": 1.2895027439822982e-06, "logits/chosen": -3.0184104442596436, "logits/rejected": -3.049426794052124, "logps/chosen": -0.7785434126853943, "logps/rejected": -288.17340087890625, "loss": 0.0584, "rewards/accuracies": 1.0, "rewards/chosen": 0.3072156310081482, "rewards/margins": 2.835972309112549, "rewards/rejected": -2.528756618499756, "step": 17370 }, { "epoch": 0.7, "learning_rate": 1.2864497455118152e-06, "logits/chosen": -3.0174622535705566, "logits/rejected": -3.048150062561035, "logps/chosen": -0.7505964636802673, "logps/rejected": -286.0722961425781, "loss": 0.065, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.3097427785396576, "rewards/margins": 2.8176016807556152, "rewards/rejected": -2.5078587532043457, "step": 17380 }, { "epoch": 0.7, "learning_rate": 1.2833991129196508e-06, "logits/chosen": -3.0274245738983154, "logits/rejected": -3.05458402633667, "logps/chosen": -3.8615689277648926, "logps/rejected": -286.7967529296875, "loss": 0.0922, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27857908606529236, "rewards/margins": 2.79148268699646, "rewards/rejected": -2.5129036903381348, "step": 17390 }, { "epoch": 0.7, "learning_rate": 1.280350852153168e-06, "logits/chosen": -3.0198700428009033, "logits/rejected": -3.0490806102752686, "logps/chosen": -12.486019134521484, "logps/rejected": -273.7156677246094, "loss": 0.171, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.1941261738538742, "rewards/margins": 2.575122594833374, "rewards/rejected": -2.3809967041015625, "step": 17400 }, { "epoch": 0.7, "eval_logits/chosen": -3.0701351165771484, "eval_logits/rejected": -3.0960693359375, "eval_logps/chosen": -0.15764912962913513, "eval_logps/rejected": -284.18212890625, "eval_loss": 0.0604715533554554, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3150230348110199, "eval_rewards/margins": 2.7966887950897217, "eval_rewards/rejected": -2.4816653728485107, "eval_runtime": 2.5455, "eval_samples_per_second": 1.964, "eval_steps_per_second": 0.393, "step": 17400 }, { "epoch": 0.7, "learning_rate": 1.2773049691551103e-06, "logits/chosen": -3.0003602504730225, "logits/rejected": -3.0354230403900146, "logps/chosen": -3.679553985595703, "logps/rejected": -287.8396911621094, "loss": 0.0902, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2781984210014343, "rewards/margins": 2.8076767921447754, "rewards/rejected": -2.529478073120117, "step": 17410 }, { "epoch": 0.7, "learning_rate": 1.2742614698635784e-06, "logits/chosen": -3.020311117172241, "logits/rejected": -3.0505223274230957, "logps/chosen": -3.6861624717712402, "logps/rejected": -285.9217529296875, "loss": 0.092, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28183597326278687, "rewards/margins": 2.7824809551239014, "rewards/rejected": -2.500645160675049, "step": 17420 }, { "epoch": 0.7, "learning_rate": 1.2712203602120326e-06, "logits/chosen": -3.0178279876708984, "logits/rejected": -3.051135301589966, "logps/chosen": -3.0806527137756348, "logps/rejected": -287.06842041015625, "loss": 0.0843, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2858680486679077, "rewards/margins": 2.8045425415039062, "rewards/rejected": -2.518674373626709, "step": 17430 }, { "epoch": 0.7, "learning_rate": 1.2681816461292715e-06, "logits/chosen": -3.016171932220459, "logits/rejected": -3.043241024017334, "logps/chosen": -3.8253567218780518, "logps/rejected": -287.3716735839844, "loss": 0.0916, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27748459577560425, "rewards/margins": 2.8010175228118896, "rewards/rejected": -2.5235331058502197, "step": 17440 }, { "epoch": 0.7, "learning_rate": 1.2651453335394232e-06, "logits/chosen": -3.025444746017456, "logits/rejected": -3.055690050125122, "logps/chosen": -0.20198580622673035, "logps/rejected": -292.1217346191406, "loss": 0.0548, "rewards/accuracies": 1.0, "rewards/chosen": 0.3152790367603302, "rewards/margins": 2.881721019744873, "rewards/rejected": -2.5664420127868652, "step": 17450 }, { "epoch": 0.7, "learning_rate": 1.2621114283619345e-06, "logits/chosen": -3.024214267730713, "logits/rejected": -3.0526487827301025, "logps/chosen": -4.610439300537109, "logps/rejected": -283.24456787109375, "loss": 0.084, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2745947539806366, "rewards/margins": 2.749394655227661, "rewards/rejected": -2.4748001098632812, "step": 17460 }, { "epoch": 0.7, "learning_rate": 1.259079936511558e-06, "logits/chosen": -3.0447471141815186, "logits/rejected": -3.0710391998291016, "logps/chosen": -3.6538829803466797, "logps/rejected": -287.7995300292969, "loss": 0.0898, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2801123857498169, "rewards/margins": 2.8047845363616943, "rewards/rejected": -2.524672269821167, "step": 17470 }, { "epoch": 0.7, "learning_rate": 1.2560508638983437e-06, "logits/chosen": -3.0205235481262207, "logits/rejected": -3.049914598464966, "logps/chosen": -5.021552562713623, "logps/rejected": -286.0279846191406, "loss": 0.1042, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.26622098684310913, "rewards/margins": 2.769150495529175, "rewards/rejected": -2.5029296875, "step": 17480 }, { "epoch": 0.7, "learning_rate": 1.2530242164276236e-06, "logits/chosen": -3.0343751907348633, "logits/rejected": -3.064020872116089, "logps/chosen": -0.17524774372577667, "logps/rejected": -292.00439453125, "loss": 0.0551, "rewards/accuracies": 1.0, "rewards/chosen": 0.3142052888870239, "rewards/margins": 2.8765039443969727, "rewards/rejected": -2.562298536300659, "step": 17490 }, { "epoch": 0.7, "learning_rate": 1.2500000000000007e-06, "logits/chosen": -2.994042158126831, "logits/rejected": -3.0272774696350098, "logps/chosen": -2.977365255355835, "logps/rejected": -288.88134765625, "loss": 0.0823, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.287405788898468, "rewards/margins": 2.8191628456115723, "rewards/rejected": -2.53175687789917, "step": 17500 }, { "epoch": 0.7, "eval_logits/chosen": -3.069868564605713, "eval_logits/rejected": -3.096381902694702, "eval_logps/chosen": -0.17171546816825867, "eval_logps/rejected": -284.0711669921875, "eval_loss": 0.060625962913036346, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31488236784935, "eval_rewards/margins": 2.795438289642334, "eval_rewards/rejected": -2.480556011199951, "eval_runtime": 2.5357, "eval_samples_per_second": 1.972, "eval_steps_per_second": 0.394, "step": 17500 }, { "epoch": 0.7, "learning_rate": 1.24697822051134e-06, "logits/chosen": -3.0171446800231934, "logits/rejected": -3.0443360805511475, "logps/chosen": -8.453685760498047, "logps/rejected": -283.3009338378906, "loss": 0.1361, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.23425395786762238, "rewards/margins": 2.7107625007629395, "rewards/rejected": -2.476508140563965, "step": 17510 }, { "epoch": 0.7, "learning_rate": 1.243958883852755e-06, "logits/chosen": -2.9865617752075195, "logits/rejected": -3.0171265602111816, "logps/chosen": -3.7971432209014893, "logps/rejected": -288.0497131347656, "loss": 0.0911, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2772633135318756, "rewards/margins": 2.80602765083313, "rewards/rejected": -2.528764247894287, "step": 17520 }, { "epoch": 0.7, "learning_rate": 1.2409419959105981e-06, "logits/chosen": -2.997295618057251, "logits/rejected": -3.0281479358673096, "logps/chosen": -0.220094233751297, "logps/rejected": -290.34930419921875, "loss": 0.0559, "rewards/accuracies": 1.0, "rewards/chosen": 0.31485503911972046, "rewards/margins": 2.864657163619995, "rewards/rejected": -2.54980206489563, "step": 17530 }, { "epoch": 0.7, "learning_rate": 1.2379275625664462e-06, "logits/chosen": -3.003645420074463, "logits/rejected": -3.0352540016174316, "logps/chosen": -2.784823417663574, "logps/rejected": -285.883544921875, "loss": 0.082, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28675374388694763, "rewards/margins": 2.791731357574463, "rewards/rejected": -2.5049774646759033, "step": 17540 }, { "epoch": 0.7, "learning_rate": 1.234915589697091e-06, "logits/chosen": -3.0277509689331055, "logits/rejected": -3.055792808532715, "logps/chosen": -0.27920395135879517, "logps/rejected": -292.41546630859375, "loss": 0.0548, "rewards/accuracies": 1.0, "rewards/chosen": 0.3158296048641205, "rewards/margins": 2.883425712585449, "rewards/rejected": -2.567595958709717, "step": 17550 }, { "epoch": 0.7, "learning_rate": 1.2319060831745273e-06, "logits/chosen": -3.0151686668395996, "logits/rejected": -3.0456061363220215, "logps/chosen": -3.782829999923706, "logps/rejected": -288.8480224609375, "loss": 0.0903, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2778281569480896, "rewards/margins": 2.813889265060425, "rewards/rejected": -2.5360610485076904, "step": 17560 }, { "epoch": 0.7, "learning_rate": 1.2288990488659433e-06, "logits/chosen": -3.008955478668213, "logits/rejected": -3.0406298637390137, "logps/chosen": -9.08268928527832, "logps/rejected": -279.57708740234375, "loss": 0.1448, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.22433018684387207, "rewards/margins": 2.6651787757873535, "rewards/rejected": -2.4408485889434814, "step": 17570 }, { "epoch": 0.7, "learning_rate": 1.2258944926337057e-06, "logits/chosen": -3.0016794204711914, "logits/rejected": -3.0333971977233887, "logps/chosen": -0.3622663617134094, "logps/rejected": -287.21746826171875, "loss": 0.0606, "rewards/accuracies": 1.0, "rewards/chosen": 0.3124070465564728, "rewards/margins": 2.8293051719665527, "rewards/rejected": -2.5168983936309814, "step": 17580 }, { "epoch": 0.7, "learning_rate": 1.2228924203353507e-06, "logits/chosen": -3.04030179977417, "logits/rejected": -3.0662145614624023, "logps/chosen": -12.916990280151367, "logps/rejected": -276.66497802734375, "loss": 0.1832, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.18972626328468323, "rewards/margins": 2.5974414348602295, "rewards/rejected": -2.407715320587158, "step": 17590 }, { "epoch": 0.7, "learning_rate": 1.2198928378235717e-06, "logits/chosen": -3.0265233516693115, "logits/rejected": -3.053837299346924, "logps/chosen": -2.5578017234802246, "logps/rejected": -288.0620422363281, "loss": 0.0707, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2920084297657013, "rewards/margins": 2.8160481452941895, "rewards/rejected": -2.5240397453308105, "step": 17600 }, { "epoch": 0.7, "eval_logits/chosen": -3.0705387592315674, "eval_logits/rejected": -3.0963964462280273, "eval_logps/chosen": -0.1645336151123047, "eval_logps/rejected": -283.7638854980469, "eval_loss": 0.06079899147152901, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3149541914463043, "eval_rewards/margins": 2.7924373149871826, "eval_rewards/rejected": -2.4774832725524902, "eval_runtime": 2.537, "eval_samples_per_second": 1.971, "eval_steps_per_second": 0.394, "step": 17600 }, { "epoch": 0.7, "learning_rate": 1.2168957509462074e-06, "logits/chosen": -3.031931161880493, "logits/rejected": -3.060534954071045, "logps/chosen": -0.16888007521629333, "logps/rejected": -293.6887512207031, "loss": 0.0536, "rewards/accuracies": 1.0, "rewards/chosen": 0.3190687596797943, "rewards/margins": 2.9015889167785645, "rewards/rejected": -2.582520008087158, "step": 17610 }, { "epoch": 0.7, "learning_rate": 1.2139011655462338e-06, "logits/chosen": -3.018138885498047, "logits/rejected": -3.0452330112457275, "logps/chosen": -4.761816024780273, "logps/rejected": -284.32122802734375, "loss": 0.1027, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.26841533184051514, "rewards/margins": 2.756636619567871, "rewards/rejected": -2.4882214069366455, "step": 17620 }, { "epoch": 0.71, "learning_rate": 1.2109090874617477e-06, "logits/chosen": -3.02954363822937, "logits/rejected": -3.055955648422241, "logps/chosen": -7.071269989013672, "logps/rejected": -285.58966064453125, "loss": 0.1227, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24680697917938232, "rewards/margins": 2.7478301525115967, "rewards/rejected": -2.501023054122925, "step": 17630 }, { "epoch": 0.71, "learning_rate": 1.207919522525958e-06, "logits/chosen": -3.0147576332092285, "logits/rejected": -3.045163631439209, "logps/chosen": -3.1574230194091797, "logps/rejected": -286.58197021484375, "loss": 0.0853, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28600677847862244, "rewards/margins": 2.797990560531616, "rewards/rejected": -2.511983871459961, "step": 17640 }, { "epoch": 0.71, "learning_rate": 1.204932476567175e-06, "logits/chosen": -3.038252592086792, "logits/rejected": -3.068612575531006, "logps/chosen": -0.1889668107032776, "logps/rejected": -288.38836669921875, "loss": 0.0573, "rewards/accuracies": 1.0, "rewards/chosen": 0.3175138831138611, "rewards/margins": 2.8414371013641357, "rewards/rejected": -2.52392315864563, "step": 17650 }, { "epoch": 0.71, "learning_rate": 1.2019479554087964e-06, "logits/chosen": -3.0133769512176514, "logits/rejected": -3.04252290725708, "logps/chosen": -6.0151495933532715, "logps/rejected": -284.3155212402344, "loss": 0.1123, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2564007341861725, "rewards/margins": 2.743856430053711, "rewards/rejected": -2.4874558448791504, "step": 17660 }, { "epoch": 0.71, "learning_rate": 1.1989659648693017e-06, "logits/chosen": -3.039607286453247, "logits/rejected": -3.067488431930542, "logps/chosen": -0.16867992281913757, "logps/rejected": -290.17681884765625, "loss": 0.0561, "rewards/accuracies": 1.0, "rewards/chosen": 0.3164631426334381, "rewards/margins": 2.862305164337158, "rewards/rejected": -2.545842170715332, "step": 17670 }, { "epoch": 0.71, "learning_rate": 1.1959865107622306e-06, "logits/chosen": -3.012415647506714, "logits/rejected": -3.0410208702087402, "logps/chosen": -5.534210681915283, "logps/rejected": -283.2608947753906, "loss": 0.1082, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.26261430978775024, "rewards/margins": 2.7410573959350586, "rewards/rejected": -2.478443145751953, "step": 17680 }, { "epoch": 0.71, "learning_rate": 1.1930095988961837e-06, "logits/chosen": -3.049402952194214, "logits/rejected": -3.0780787467956543, "logps/chosen": -0.17998751997947693, "logps/rejected": -291.1936340332031, "loss": 0.0556, "rewards/accuracies": 1.0, "rewards/chosen": 0.31581979990005493, "rewards/margins": 2.8688912391662598, "rewards/rejected": -2.5530714988708496, "step": 17690 }, { "epoch": 0.71, "learning_rate": 1.1900352350748026e-06, "logits/chosen": -3.024726390838623, "logits/rejected": -3.0540809631347656, "logps/chosen": -3.8614342212677, "logps/rejected": -289.4422912597656, "loss": 0.0906, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2784929871559143, "rewards/margins": 2.818315267562866, "rewards/rejected": -2.5398223400115967, "step": 17700 }, { "epoch": 0.71, "eval_logits/chosen": -3.0700948238372803, "eval_logits/rejected": -3.096344232559204, "eval_logps/chosen": -0.16018708050251007, "eval_logps/rejected": -283.8846435546875, "eval_loss": 0.06070870906114578, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31499767303466797, "eval_rewards/margins": 2.7936882972717285, "eval_rewards/rejected": -2.4786906242370605, "eval_runtime": 2.5405, "eval_samples_per_second": 1.968, "eval_steps_per_second": 0.394, "step": 17700 }, { "epoch": 0.71, "learning_rate": 1.1870634250967606e-06, "logits/chosen": -3.006059169769287, "logits/rejected": -3.036301374435425, "logps/chosen": -0.18650060892105103, "logps/rejected": -289.52191162109375, "loss": 0.0564, "rewards/accuracies": 1.0, "rewards/chosen": 0.31678178906440735, "rewards/margins": 2.858067035675049, "rewards/rejected": -2.5412850379943848, "step": 17710 }, { "epoch": 0.71, "learning_rate": 1.1840941747557557e-06, "logits/chosen": -3.030272960662842, "logits/rejected": -3.0585267543792725, "logps/chosen": -3.5625157356262207, "logps/rejected": -284.7048034667969, "loss": 0.0803, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27796199917793274, "rewards/margins": 2.7740085124969482, "rewards/rejected": -2.496046543121338, "step": 17720 }, { "epoch": 0.71, "learning_rate": 1.1811274898404903e-06, "logits/chosen": -3.007671594619751, "logits/rejected": -3.0366363525390625, "logps/chosen": -6.416603088378906, "logps/rejected": -285.7349548339844, "loss": 0.1161, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2535912096500397, "rewards/margins": 2.7593131065368652, "rewards/rejected": -2.5057220458984375, "step": 17730 }, { "epoch": 0.71, "learning_rate": 1.178163376134671e-06, "logits/chosen": -3.0254738330841064, "logits/rejected": -3.0507724285125732, "logps/chosen": -13.267300605773926, "logps/rejected": -272.3003234863281, "loss": 0.1883, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.1846054345369339, "rewards/margins": 2.557551145553589, "rewards/rejected": -2.372945785522461, "step": 17740 }, { "epoch": 0.71, "learning_rate": 1.1752018394169882e-06, "logits/chosen": -3.0295403003692627, "logits/rejected": -3.058835983276367, "logps/chosen": -0.19218403100967407, "logps/rejected": -291.0135192871094, "loss": 0.0553, "rewards/accuracies": 1.0, "rewards/chosen": 0.3129698634147644, "rewards/margins": 2.873974561691284, "rewards/rejected": -2.561005115509033, "step": 17750 }, { "epoch": 0.71, "learning_rate": 1.172242885461109e-06, "logits/chosen": -3.0160834789276123, "logits/rejected": -3.04573917388916, "logps/chosen": -2.1561834812164307, "logps/rejected": -290.26837158203125, "loss": 0.0737, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.29432740807533264, "rewards/margins": 2.8473989963531494, "rewards/rejected": -2.5530714988708496, "step": 17760 }, { "epoch": 0.71, "learning_rate": 1.169286520035666e-06, "logits/chosen": -3.0347981452941895, "logits/rejected": -3.0639560222625732, "logps/chosen": -0.5833526849746704, "logps/rejected": -289.4736633300781, "loss": 0.0615, "rewards/accuracies": 1.0, "rewards/chosen": 0.3136759400367737, "rewards/margins": 2.8491501808166504, "rewards/rejected": -2.5354745388031006, "step": 17770 }, { "epoch": 0.71, "learning_rate": 1.1663327489042436e-06, "logits/chosen": -3.0033655166625977, "logits/rejected": -3.0343270301818848, "logps/chosen": -9.185164451599121, "logps/rejected": -279.4400939941406, "loss": 0.1431, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.22435668110847473, "rewards/margins": 2.6693108081817627, "rewards/rejected": -2.4449539184570312, "step": 17780 }, { "epoch": 0.71, "learning_rate": 1.1633815778253721e-06, "logits/chosen": -2.976332187652588, "logits/rejected": -3.0080533027648926, "logps/chosen": -5.051523208618164, "logps/rejected": -285.2135009765625, "loss": 0.0899, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.26493415236473083, "rewards/margins": 2.7648098468780518, "rewards/rejected": -2.499875545501709, "step": 17790 }, { "epoch": 0.71, "learning_rate": 1.160433012552508e-06, "logits/chosen": -3.0330421924591064, "logits/rejected": -3.060859441757202, "logps/chosen": -0.3362041413784027, "logps/rejected": -287.8753662109375, "loss": 0.0622, "rewards/accuracies": 1.0, "rewards/chosen": 0.31414178013801575, "rewards/margins": 2.836557626724243, "rewards/rejected": -2.52241587638855, "step": 17800 }, { "epoch": 0.71, "eval_logits/chosen": -3.0692496299743652, "eval_logits/rejected": -3.0949594974517822, "eval_logps/chosen": -0.18050143122673035, "eval_logps/rejected": -284.12994384765625, "eval_loss": 0.06055053323507309, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31479454040527344, "eval_rewards/margins": 2.795938491821289, "eval_rewards/rejected": -2.4811437129974365, "eval_runtime": 2.5505, "eval_samples_per_second": 1.96, "eval_steps_per_second": 0.392, "step": 17800 }, { "epoch": 0.71, "learning_rate": 1.1574870588340333e-06, "logits/chosen": -3.022905111312866, "logits/rejected": -3.0509836673736572, "logps/chosen": -6.167815208435059, "logps/rejected": -283.4917297363281, "loss": 0.1159, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.25541284680366516, "rewards/margins": 2.738121271133423, "rewards/rejected": -2.48270845413208, "step": 17810 }, { "epoch": 0.71, "learning_rate": 1.154543722413232e-06, "logits/chosen": -3.0147318840026855, "logits/rejected": -3.0442495346069336, "logps/chosen": -2.9443163871765137, "logps/rejected": -287.8927307128906, "loss": 0.0819, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2919546365737915, "rewards/margins": 2.814587354660034, "rewards/rejected": -2.522632598876953, "step": 17820 }, { "epoch": 0.71, "learning_rate": 1.1516030090282915e-06, "logits/chosen": -2.99389910697937, "logits/rejected": -3.0264933109283447, "logps/chosen": -3.064410924911499, "logps/rejected": -286.88885498046875, "loss": 0.0846, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2850993871688843, "rewards/margins": 2.8041043281555176, "rewards/rejected": -2.5190048217773438, "step": 17830 }, { "epoch": 0.71, "learning_rate": 1.1486649244122824e-06, "logits/chosen": -3.0401458740234375, "logits/rejected": -3.0682830810546875, "logps/chosen": -0.36318135261535645, "logps/rejected": -290.6753845214844, "loss": 0.056, "rewards/accuracies": 1.0, "rewards/chosen": 0.3114040195941925, "rewards/margins": 2.8666958808898926, "rewards/rejected": -2.5552916526794434, "step": 17840 }, { "epoch": 0.71, "learning_rate": 1.1457294742931508e-06, "logits/chosen": -3.037119150161743, "logits/rejected": -3.0665767192840576, "logps/chosen": -0.7179316878318787, "logps/rejected": -289.11102294921875, "loss": 0.0576, "rewards/accuracies": 1.0, "rewards/chosen": 0.31264030933380127, "rewards/margins": 2.84346342086792, "rewards/rejected": -2.530822992324829, "step": 17850 }, { "epoch": 0.71, "learning_rate": 1.142796664393707e-06, "logits/chosen": -3.02073073387146, "logits/rejected": -3.0514626502990723, "logps/chosen": -3.784738063812256, "logps/rejected": -286.35589599609375, "loss": 0.0927, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2814430892467499, "rewards/margins": 2.7851312160491943, "rewards/rejected": -2.503688335418701, "step": 17860 }, { "epoch": 0.71, "learning_rate": 1.1398665004316127e-06, "logits/chosen": -3.0299909114837646, "logits/rejected": -3.0584559440612793, "logps/chosen": -3.484889507293701, "logps/rejected": -286.43377685546875, "loss": 0.0874, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2822599411010742, "rewards/margins": 2.797910690307617, "rewards/rejected": -2.515650510787964, "step": 17870 }, { "epoch": 0.72, "learning_rate": 1.136938988119375e-06, "logits/chosen": -3.045070171356201, "logits/rejected": -3.0682883262634277, "logps/chosen": -13.07519817352295, "logps/rejected": -275.9333190917969, "loss": 0.1867, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.1849183738231659, "rewards/margins": 2.5892200469970703, "rewards/rejected": -2.404301404953003, "step": 17880 }, { "epoch": 0.72, "learning_rate": 1.1340141331643276e-06, "logits/chosen": -3.0217125415802, "logits/rejected": -3.0514352321624756, "logps/chosen": -3.752772569656372, "logps/rejected": -286.1201171875, "loss": 0.0938, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2787559926509857, "rewards/margins": 2.7879700660705566, "rewards/rejected": -2.509213924407959, "step": 17890 }, { "epoch": 0.72, "learning_rate": 1.1310919412686248e-06, "logits/chosen": -3.0283396244049072, "logits/rejected": -3.058566093444824, "logps/chosen": -0.16336818039417267, "logps/rejected": -291.8830871582031, "loss": 0.0548, "rewards/accuracies": 1.0, "rewards/chosen": 0.31701403856277466, "rewards/margins": 2.8820395469665527, "rewards/rejected": -2.5650253295898438, "step": 17900 }, { "epoch": 0.72, "eval_logits/chosen": -3.070828437805176, "eval_logits/rejected": -3.0954182147979736, "eval_logps/chosen": -0.17641010880470276, "eval_logps/rejected": -284.35089111328125, "eval_loss": 0.06032929569482803, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31483545899391174, "eval_rewards/margins": 2.7981884479522705, "eval_rewards/rejected": -2.4833531379699707, "eval_runtime": 2.5459, "eval_samples_per_second": 1.964, "eval_steps_per_second": 0.393, "step": 17900 }, { "epoch": 0.72, "learning_rate": 1.1281724181292294e-06, "logits/chosen": -3.0184805393218994, "logits/rejected": -3.047398090362549, "logps/chosen": -4.167956352233887, "logps/rejected": -281.1507873535156, "loss": 0.0983, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.277924120426178, "rewards/margins": 2.7345807552337646, "rewards/rejected": -2.4566566944122314, "step": 17910 }, { "epoch": 0.72, "learning_rate": 1.1252555694379005e-06, "logits/chosen": -3.006788969039917, "logits/rejected": -3.036930561065674, "logps/chosen": -3.7218146324157715, "logps/rejected": -288.65411376953125, "loss": 0.0901, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2798632085323334, "rewards/margins": 2.812648296356201, "rewards/rejected": -2.532785177230835, "step": 17920 }, { "epoch": 0.72, "learning_rate": 1.122341400881185e-06, "logits/chosen": -3.0280110836029053, "logits/rejected": -3.0550129413604736, "logps/chosen": -10.211143493652344, "logps/rejected": -279.87091064453125, "loss": 0.1547, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.21516521275043488, "rewards/margins": 2.6608328819274902, "rewards/rejected": -2.4456679821014404, "step": 17930 }, { "epoch": 0.72, "learning_rate": 1.1194299181404037e-06, "logits/chosen": -3.0126967430114746, "logits/rejected": -3.042933940887451, "logps/chosen": -2.931779384613037, "logps/rejected": -286.17529296875, "loss": 0.0835, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28721731901168823, "rewards/margins": 2.7968688011169434, "rewards/rejected": -2.5096516609191895, "step": 17940 }, { "epoch": 0.72, "learning_rate": 1.11652112689164e-06, "logits/chosen": -3.009639263153076, "logits/rejected": -3.0413448810577393, "logps/chosen": -0.21254651248455048, "logps/rejected": -291.7432556152344, "loss": 0.0552, "rewards/accuracies": 1.0, "rewards/chosen": 0.3163946568965912, "rewards/margins": 2.8758862018585205, "rewards/rejected": -2.5594913959503174, "step": 17950 }, { "epoch": 0.72, "learning_rate": 1.1136150328057324e-06, "logits/chosen": -3.0408642292022705, "logits/rejected": -3.0686779022216797, "logps/chosen": -6.428332328796387, "logps/rejected": -284.0466613769531, "loss": 0.1178, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.25203365087509155, "rewards/margins": 2.7398860454559326, "rewards/rejected": -2.4878525733947754, "step": 17960 }, { "epoch": 0.72, "learning_rate": 1.1107116415482586e-06, "logits/chosen": -2.996800184249878, "logits/rejected": -3.026928186416626, "logps/chosen": -3.9025561809539795, "logps/rejected": -287.47894287109375, "loss": 0.0923, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2769092917442322, "rewards/margins": 2.796950101852417, "rewards/rejected": -2.520040512084961, "step": 17970 }, { "epoch": 0.72, "learning_rate": 1.1078109587795311e-06, "logits/chosen": -3.0139122009277344, "logits/rejected": -3.044419765472412, "logps/chosen": -3.7232868671417236, "logps/rejected": -288.37127685546875, "loss": 0.0902, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27956047654151917, "rewards/margins": 2.81042218208313, "rewards/rejected": -2.5308616161346436, "step": 17980 }, { "epoch": 0.72, "learning_rate": 1.1049129901545756e-06, "logits/chosen": -3.041494846343994, "logits/rejected": -3.06948184967041, "logps/chosen": -3.845111131668091, "logps/rejected": -287.0648498535156, "loss": 0.0924, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2785777449607849, "rewards/margins": 2.792520046234131, "rewards/rejected": -2.513942241668701, "step": 17990 }, { "epoch": 0.72, "learning_rate": 1.1020177413231334e-06, "logits/chosen": -3.0181374549865723, "logits/rejected": -3.0479931831359863, "logps/chosen": -0.23106679320335388, "logps/rejected": -291.03680419921875, "loss": 0.0557, "rewards/accuracies": 1.0, "rewards/chosen": 0.3109726905822754, "rewards/margins": 2.870452404022217, "rewards/rejected": -2.5594799518585205, "step": 18000 }, { "epoch": 0.72, "eval_logits/chosen": -3.0701212882995605, "eval_logits/rejected": -3.0963051319122314, "eval_logps/chosen": -0.1813810020685196, "eval_logps/rejected": -284.40594482421875, "eval_loss": 0.060288310050964355, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3147857189178467, "eval_rewards/margins": 2.798689126968384, "eval_rewards/rejected": -2.483903408050537, "eval_runtime": 2.5356, "eval_samples_per_second": 1.972, "eval_steps_per_second": 0.394, "step": 18000 }, { "epoch": 0.72, "learning_rate": 1.0991252179296389e-06, "logits/chosen": -3.019570827484131, "logits/rejected": -3.046766757965088, "logps/chosen": -4.419492721557617, "logps/rejected": -280.723388671875, "loss": 0.1039, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2768820822238922, "rewards/margins": 2.7264132499694824, "rewards/rejected": -2.449531078338623, "step": 18010 }, { "epoch": 0.72, "learning_rate": 1.096235425613214e-06, "logits/chosen": -3.035086154937744, "logits/rejected": -3.0642991065979004, "logps/chosen": -0.3831818103790283, "logps/rejected": -287.6547546386719, "loss": 0.0579, "rewards/accuracies": 1.0, "rewards/chosen": 0.31128790974617004, "rewards/margins": 2.8374340534210205, "rewards/rejected": -2.526146173477173, "step": 18020 }, { "epoch": 0.72, "learning_rate": 1.0933483700076592e-06, "logits/chosen": -3.005786418914795, "logits/rejected": -3.0375866889953613, "logps/chosen": -1.5350369215011597, "logps/rejected": -289.18841552734375, "loss": 0.0682, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.30338162183761597, "rewards/margins": 2.832115888595581, "rewards/rejected": -2.528733968734741, "step": 18030 }, { "epoch": 0.72, "learning_rate": 1.0904640567414332e-06, "logits/chosen": -3.0247623920440674, "logits/rejected": -3.054403066635132, "logps/chosen": -0.24790045619010925, "logps/rejected": -290.55865478515625, "loss": 0.0566, "rewards/accuracies": 1.0, "rewards/chosen": 0.31421953439712524, "rewards/margins": 2.864478588104248, "rewards/rejected": -2.5502593517303467, "step": 18040 }, { "epoch": 0.72, "learning_rate": 1.0875824914376555e-06, "logits/chosen": -3.009660243988037, "logits/rejected": -3.0450847148895264, "logps/chosen": -0.22878777980804443, "logps/rejected": -291.78533935546875, "loss": 0.0551, "rewards/accuracies": 1.0, "rewards/chosen": 0.3119354844093323, "rewards/margins": 2.876788854598999, "rewards/rejected": -2.5648531913757324, "step": 18050 }, { "epoch": 0.72, "learning_rate": 1.0847036797140832e-06, "logits/chosen": -3.032517910003662, "logits/rejected": -3.061767816543579, "logps/chosen": -12.267068862915039, "logps/rejected": -276.33367919921875, "loss": 0.1769, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.1929825246334076, "rewards/margins": 2.604436159133911, "rewards/rejected": -2.4114532470703125, "step": 18060 }, { "epoch": 0.72, "learning_rate": 1.0818276271831094e-06, "logits/chosen": -3.009683847427368, "logits/rejected": -3.0380122661590576, "logps/chosen": -0.18388177454471588, "logps/rejected": -293.1788024902344, "loss": 0.054, "rewards/accuracies": 1.0, "rewards/chosen": 0.31384700536727905, "rewards/margins": 2.8936774730682373, "rewards/rejected": -2.5798306465148926, "step": 18070 }, { "epoch": 0.72, "learning_rate": 1.0789543394517434e-06, "logits/chosen": -3.0112154483795166, "logits/rejected": -3.043229818344116, "logps/chosen": -2.6787972450256348, "logps/rejected": -288.89093017578125, "loss": 0.0795, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.29166507720947266, "rewards/margins": 2.8232178688049316, "rewards/rejected": -2.531552791595459, "step": 18080 }, { "epoch": 0.72, "learning_rate": 1.0760838221216065e-06, "logits/chosen": -3.0113792419433594, "logits/rejected": -3.0412585735321045, "logps/chosen": -3.5637881755828857, "logps/rejected": -285.397216796875, "loss": 0.0899, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27982985973358154, "rewards/margins": 2.781996250152588, "rewards/rejected": -2.502166271209717, "step": 18090 }, { "epoch": 0.72, "learning_rate": 1.073216080788921e-06, "logits/chosen": -3.042724132537842, "logits/rejected": -3.0716745853424072, "logps/chosen": -0.18502798676490784, "logps/rejected": -291.85955810546875, "loss": 0.0551, "rewards/accuracies": 1.0, "rewards/chosen": 0.31500083208084106, "rewards/margins": 2.877103090286255, "rewards/rejected": -2.5621023178100586, "step": 18100 }, { "epoch": 0.72, "eval_logits/chosen": -3.070185422897339, "eval_logits/rejected": -3.095249891281128, "eval_logps/chosen": -0.18546631932258606, "eval_logps/rejected": -284.35992431640625, "eval_loss": 0.06034581735730171, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31474485993385315, "eval_rewards/margins": 2.7981882095336914, "eval_rewards/rejected": -2.48344349861145, "eval_runtime": 2.5416, "eval_samples_per_second": 1.967, "eval_steps_per_second": 0.393, "step": 18100 }, { "epoch": 0.72, "learning_rate": 1.0703511210444936e-06, "logits/chosen": -2.9874818325042725, "logits/rejected": -3.0163261890411377, "logps/chosen": -11.583921432495117, "logps/rejected": -276.99908447265625, "loss": 0.1706, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.20227082073688507, "rewards/margins": 2.6210427284240723, "rewards/rejected": -2.418771505355835, "step": 18110 }, { "epoch": 0.72, "learning_rate": 1.0674889484737126e-06, "logits/chosen": -3.027702808380127, "logits/rejected": -3.05340576171875, "logps/chosen": -6.655022621154785, "logps/rejected": -283.48651123046875, "loss": 0.1188, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24843525886535645, "rewards/margins": 2.7343497276306152, "rewards/rejected": -2.485914707183838, "step": 18120 }, { "epoch": 0.73, "learning_rate": 1.0646295686565258e-06, "logits/chosen": -2.997488498687744, "logits/rejected": -3.0279698371887207, "logps/chosen": -0.24963533878326416, "logps/rejected": -290.31329345703125, "loss": 0.0563, "rewards/accuracies": 1.0, "rewards/chosen": 0.3136153221130371, "rewards/margins": 2.8607630729675293, "rewards/rejected": -2.547147750854492, "step": 18130 }, { "epoch": 0.73, "learning_rate": 1.0617729871674437e-06, "logits/chosen": -3.0382144451141357, "logits/rejected": -3.066965103149414, "logps/chosen": -0.574134349822998, "logps/rejected": -289.58416748046875, "loss": 0.0575, "rewards/accuracies": 1.0, "rewards/chosen": 0.31459373235702515, "rewards/margins": 2.852489948272705, "rewards/rejected": -2.5378963947296143, "step": 18140 }, { "epoch": 0.73, "learning_rate": 1.0589192095755172e-06, "logits/chosen": -2.9993948936462402, "logits/rejected": -3.0283782482147217, "logps/chosen": -8.09428882598877, "logps/rejected": -273.03460693359375, "loss": 0.1432, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.23321764171123505, "rewards/margins": 2.613830327987671, "rewards/rejected": -2.380612850189209, "step": 18150 }, { "epoch": 0.73, "learning_rate": 1.0560682414443315e-06, "logits/chosen": -3.020810604095459, "logits/rejected": -3.048011541366577, "logps/chosen": -7.471196174621582, "logps/rejected": -277.9347229003906, "loss": 0.1247, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24378180503845215, "rewards/margins": 2.6675074100494385, "rewards/rejected": -2.4237256050109863, "step": 18160 }, { "epoch": 0.73, "learning_rate": 1.053220088331995e-06, "logits/chosen": -2.971193313598633, "logits/rejected": -3.0026378631591797, "logps/chosen": -4.442770957946777, "logps/rejected": -287.07598876953125, "loss": 0.0847, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.26557987928390503, "rewards/margins": 2.7894530296325684, "rewards/rejected": -2.5238733291625977, "step": 18170 }, { "epoch": 0.73, "learning_rate": 1.050374755791127e-06, "logits/chosen": -2.986992359161377, "logits/rejected": -3.0178561210632324, "logps/chosen": -3.849078416824341, "logps/rejected": -286.2798767089844, "loss": 0.0927, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2784402072429657, "rewards/margins": 2.786194324493408, "rewards/rejected": -2.507753849029541, "step": 18180 }, { "epoch": 0.73, "learning_rate": 1.0475322493688506e-06, "logits/chosen": -3.0357797145843506, "logits/rejected": -3.062098979949951, "logps/chosen": -9.090161323547363, "logps/rejected": -279.96746826171875, "loss": 0.1421, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.2256331443786621, "rewards/margins": 2.6727728843688965, "rewards/rejected": -2.4471402168273926, "step": 18190 }, { "epoch": 0.73, "learning_rate": 1.0446925746067768e-06, "logits/chosen": -2.9920246601104736, "logits/rejected": -3.0236947536468506, "logps/chosen": -2.237553119659424, "logps/rejected": -289.3996887207031, "loss": 0.0688, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2924205958843231, "rewards/margins": 2.8356525897979736, "rewards/rejected": -2.54323148727417, "step": 18200 }, { "epoch": 0.73, "eval_logits/chosen": -3.0694212913513184, "eval_logits/rejected": -3.0953664779663086, "eval_logps/chosen": -0.21460480988025665, "eval_logps/rejected": -284.41412353515625, "eval_loss": 0.06032438203692436, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31445345282554626, "eval_rewards/margins": 2.7984390258789062, "eval_rewards/rejected": -2.483985424041748, "eval_runtime": 2.5391, "eval_samples_per_second": 1.969, "eval_steps_per_second": 0.394, "step": 18200 }, { "epoch": 0.73, "learning_rate": 1.0418557370409966e-06, "logits/chosen": -2.990868330001831, "logits/rejected": -3.023829936981201, "logps/chosen": -3.032667875289917, "logps/rejected": -288.2789001464844, "loss": 0.0833, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28547877073287964, "rewards/margins": 2.814556121826172, "rewards/rejected": -2.5290775299072266, "step": 18210 }, { "epoch": 0.73, "learning_rate": 1.03902174220207e-06, "logits/chosen": -3.0012898445129395, "logits/rejected": -3.032557964324951, "logps/chosen": -3.931950092315674, "logps/rejected": -287.1196594238281, "loss": 0.0921, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27752476930618286, "rewards/margins": 2.79292368888855, "rewards/rejected": -2.515399217605591, "step": 18220 }, { "epoch": 0.73, "learning_rate": 1.0361905956150147e-06, "logits/chosen": -3.0316336154937744, "logits/rejected": -3.062131643295288, "logps/chosen": -0.4235619008541107, "logps/rejected": -291.6737365722656, "loss": 0.0561, "rewards/accuracies": 1.0, "rewards/chosen": 0.312721312046051, "rewards/margins": 2.876495599746704, "rewards/rejected": -2.563774585723877, "step": 18230 }, { "epoch": 0.73, "learning_rate": 1.033362302799297e-06, "logits/chosen": -3.0046558380126953, "logits/rejected": -3.0323569774627686, "logps/chosen": -8.611973762512207, "logps/rejected": -279.75494384765625, "loss": 0.1399, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.2312798798084259, "rewards/margins": 2.6753432750701904, "rewards/rejected": -2.444063186645508, "step": 18240 }, { "epoch": 0.73, "learning_rate": 1.0305368692688175e-06, "logits/chosen": -3.0241336822509766, "logits/rejected": -3.053007125854492, "logps/chosen": -10.126974105834961, "logps/rejected": -280.5813903808594, "loss": 0.1526, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.2144370973110199, "rewards/margins": 2.66756272315979, "rewards/rejected": -2.453125476837158, "step": 18250 }, { "epoch": 0.73, "learning_rate": 1.0277143005319038e-06, "logits/chosen": -3.0164337158203125, "logits/rejected": -3.0459494590759277, "logps/chosen": -3.8624775409698486, "logps/rejected": -287.22296142578125, "loss": 0.0916, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2776152491569519, "rewards/margins": 2.7964653968811035, "rewards/rejected": -2.518850088119507, "step": 18260 }, { "epoch": 0.73, "learning_rate": 1.024894602091298e-06, "logits/chosen": -3.00402569770813, "logits/rejected": -3.0334529876708984, "logps/chosen": -0.18792107701301575, "logps/rejected": -291.53656005859375, "loss": 0.055, "rewards/accuracies": 1.0, "rewards/chosen": 0.3141048550605774, "rewards/margins": 2.878382444381714, "rewards/rejected": -2.5642781257629395, "step": 18270 }, { "epoch": 0.73, "learning_rate": 1.022077779444145e-06, "logits/chosen": -3.0283291339874268, "logits/rejected": -3.056265354156494, "logps/chosen": -3.4274089336395264, "logps/rejected": -286.6980895996094, "loss": 0.0867, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28145235776901245, "rewards/margins": 2.795799493789673, "rewards/rejected": -2.5143473148345947, "step": 18280 }, { "epoch": 0.73, "learning_rate": 1.0192638380819884e-06, "logits/chosen": -3.007323741912842, "logits/rejected": -3.040466070175171, "logps/chosen": -0.2158750742673874, "logps/rejected": -290.990234375, "loss": 0.0556, "rewards/accuracies": 1.0, "rewards/chosen": 0.3133971095085144, "rewards/margins": 2.86775803565979, "rewards/rejected": -2.554360866546631, "step": 18290 }, { "epoch": 0.73, "learning_rate": 1.0164527834907468e-06, "logits/chosen": -3.014647960662842, "logits/rejected": -3.0438199043273926, "logps/chosen": -3.6094226837158203, "logps/rejected": -286.7159729003906, "loss": 0.0897, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28150445222854614, "rewards/margins": 2.7966067790985107, "rewards/rejected": -2.5151021480560303, "step": 18300 }, { "epoch": 0.73, "eval_logits/chosen": -3.07033634185791, "eval_logits/rejected": -3.0961482524871826, "eval_logps/chosen": -0.1675456315279007, "eval_logps/rejected": -284.24200439453125, "eval_loss": 0.06041312217712402, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31492406129837036, "eval_rewards/margins": 2.7971878051757812, "eval_rewards/rejected": -2.4822640419006348, "eval_runtime": 2.5418, "eval_samples_per_second": 1.967, "eval_steps_per_second": 0.393, "step": 18300 }, { "epoch": 0.73, "learning_rate": 1.0136446211507175e-06, "logits/chosen": -3.0104598999023438, "logits/rejected": -3.040538787841797, "logps/chosen": -3.682678699493408, "logps/rejected": -287.1846008300781, "loss": 0.0904, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.276691198348999, "rewards/margins": 2.7929792404174805, "rewards/rejected": -2.5162882804870605, "step": 18310 }, { "epoch": 0.73, "learning_rate": 1.010839356536555e-06, "logits/chosen": -3.0141923427581787, "logits/rejected": -3.04468035697937, "logps/chosen": -2.222313404083252, "logps/rejected": -286.20135498046875, "loss": 0.075, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.29385918378829956, "rewards/margins": 2.800011396408081, "rewards/rejected": -2.506152391433716, "step": 18320 }, { "epoch": 0.73, "learning_rate": 1.008036995117268e-06, "logits/chosen": -3.0110127925872803, "logits/rejected": -3.042562961578369, "logps/chosen": -0.24625520408153534, "logps/rejected": -291.14422607421875, "loss": 0.0554, "rewards/accuracies": 1.0, "rewards/chosen": 0.3136373460292816, "rewards/margins": 2.872641086578369, "rewards/rejected": -2.5590033531188965, "step": 18330 }, { "epoch": 0.73, "learning_rate": 1.0052375423562038e-06, "logits/chosen": -3.0042901039123535, "logits/rejected": -3.034221649169922, "logps/chosen": -6.760244846343994, "logps/rejected": -285.00286865234375, "loss": 0.12, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24926066398620605, "rewards/margins": 2.74674654006958, "rewards/rejected": -2.497485637664795, "step": 18340 }, { "epoch": 0.73, "learning_rate": 1.0024410037110358e-06, "logits/chosen": -3.0299911499023438, "logits/rejected": -3.056769847869873, "logps/chosen": -6.730879306793213, "logps/rejected": -281.36590576171875, "loss": 0.1224, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.25046688318252563, "rewards/margins": 2.7071802616119385, "rewards/rejected": -2.4567131996154785, "step": 18350 }, { "epoch": 0.73, "learning_rate": 9.996473846337616e-07, "logits/chosen": -2.9957971572875977, "logits/rejected": -3.026576519012451, "logps/chosen": -3.525038480758667, "logps/rejected": -287.498046875, "loss": 0.0885, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2794782519340515, "rewards/margins": 2.803764820098877, "rewards/rejected": -2.5242867469787598, "step": 18360 }, { "epoch": 0.73, "learning_rate": 9.968566905706833e-07, "logits/chosen": -2.9961211681365967, "logits/rejected": -3.023740291595459, "logps/chosen": -3.8269591331481934, "logps/rejected": -289.10626220703125, "loss": 0.0908, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.279629647731781, "rewards/margins": 2.8177132606506348, "rewards/rejected": -2.538083553314209, "step": 18370 }, { "epoch": 0.74, "learning_rate": 9.94068926962404e-07, "logits/chosen": -3.0265116691589355, "logits/rejected": -3.0554182529449463, "logps/chosen": -3.867600917816162, "logps/rejected": -285.74676513671875, "loss": 0.0929, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.279387503862381, "rewards/margins": 2.782639741897583, "rewards/rejected": -2.5032522678375244, "step": 18380 }, { "epoch": 0.74, "learning_rate": 9.912840992438087e-07, "logits/chosen": -3.028932809829712, "logits/rejected": -3.0566763877868652, "logps/chosen": -1.026984453201294, "logps/rejected": -287.72918701171875, "loss": 0.0611, "rewards/accuracies": 1.0, "rewards/chosen": 0.307380735874176, "rewards/margins": 2.83111310005188, "rewards/rejected": -2.5237321853637695, "step": 18390 }, { "epoch": 0.74, "learning_rate": 9.88502212844063e-07, "logits/chosen": -2.999518871307373, "logits/rejected": -3.0301194190979004, "logps/chosen": -5.7834367752075195, "logps/rejected": -283.17431640625, "loss": 0.1115, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2601511478424072, "rewards/margins": 2.73486590385437, "rewards/rejected": -2.474714756011963, "step": 18400 }, { "epoch": 0.74, "eval_logits/chosen": -3.0702621936798096, "eval_logits/rejected": -3.0952365398406982, "eval_logps/chosen": -0.18827712535858154, "eval_logps/rejected": -284.28778076171875, "eval_loss": 0.060392607003450394, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31471675634384155, "eval_rewards/margins": 2.797438859939575, "eval_rewards/rejected": -2.482722043991089, "eval_runtime": 2.5413, "eval_samples_per_second": 1.967, "eval_steps_per_second": 0.393, "step": 18400 }, { "epoch": 0.74, "learning_rate": 9.857232731865967e-07, "logits/chosen": -3.002856731414795, "logits/rejected": -3.033047676086426, "logps/chosen": -3.8975632190704346, "logps/rejected": -287.66119384765625, "loss": 0.0922, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.276100218296051, "rewards/margins": 2.8008792400360107, "rewards/rejected": -2.5247790813446045, "step": 18410 }, { "epoch": 0.74, "learning_rate": 9.829472856890942e-07, "logits/chosen": -3.0066335201263428, "logits/rejected": -3.039005756378174, "logps/chosen": -0.20065505802631378, "logps/rejected": -290.40020751953125, "loss": 0.0561, "rewards/accuracies": 1.0, "rewards/chosen": 0.3147261142730713, "rewards/margins": 2.8617491722106934, "rewards/rejected": -2.547023057937622, "step": 18420 }, { "epoch": 0.74, "learning_rate": 9.801742557634872e-07, "logits/chosen": -3.0025134086608887, "logits/rejected": -3.034161329269409, "logps/chosen": -2.3191959857940674, "logps/rejected": -287.7107238769531, "loss": 0.063, "rewards/accuracies": 1.0, "rewards/chosen": 0.2909829020500183, "rewards/margins": 2.816981554031372, "rewards/rejected": -2.525998592376709, "step": 18430 }, { "epoch": 0.74, "learning_rate": 9.774041888159364e-07, "logits/chosen": -3.026900053024292, "logits/rejected": -3.0551161766052246, "logps/chosen": -3.6661765575408936, "logps/rejected": -287.5198974609375, "loss": 0.0891, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28183409571647644, "rewards/margins": 2.798574209213257, "rewards/rejected": -2.516740083694458, "step": 18440 }, { "epoch": 0.74, "learning_rate": 9.746370902468311e-07, "logits/chosen": -2.9770078659057617, "logits/rejected": -3.00905179977417, "logps/chosen": -0.21021881699562073, "logps/rejected": -292.0613708496094, "loss": 0.0548, "rewards/accuracies": 1.0, "rewards/chosen": 0.3138294816017151, "rewards/margins": 2.882002353668213, "rewards/rejected": -2.5681731700897217, "step": 18450 }, { "epoch": 0.74, "learning_rate": 9.718729654507713e-07, "logits/chosen": -3.0198214054107666, "logits/rejected": -3.048224925994873, "logps/chosen": -3.696220874786377, "logps/rejected": -287.3160095214844, "loss": 0.09, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2805923521518707, "rewards/margins": 2.7970974445343018, "rewards/rejected": -2.516505241394043, "step": 18460 }, { "epoch": 0.74, "learning_rate": 9.691118198165594e-07, "logits/chosen": -3.035858154296875, "logits/rejected": -3.063655376434326, "logps/chosen": -5.492538928985596, "logps/rejected": -285.0615234375, "loss": 0.1061, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2614666819572449, "rewards/margins": 2.7602744102478027, "rewards/rejected": -2.498807430267334, "step": 18470 }, { "epoch": 0.74, "learning_rate": 9.6635365872719e-07, "logits/chosen": -3.019385576248169, "logits/rejected": -3.047001361846924, "logps/chosen": -5.429821968078613, "logps/rejected": -285.2998962402344, "loss": 0.1072, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.26184582710266113, "rewards/margins": 2.7607221603393555, "rewards/rejected": -2.498875856399536, "step": 18480 }, { "epoch": 0.74, "learning_rate": 9.63598487559839e-07, "logits/chosen": -3.0225958824157715, "logits/rejected": -3.052712917327881, "logps/chosen": -3.1339659690856934, "logps/rejected": -287.78594970703125, "loss": 0.0839, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2804586589336395, "rewards/margins": 2.8148012161254883, "rewards/rejected": -2.534342050552368, "step": 18490 }, { "epoch": 0.74, "learning_rate": 9.608463116858544e-07, "logits/chosen": -3.0150060653686523, "logits/rejected": -3.0427260398864746, "logps/chosen": -3.8823509216308594, "logps/rejected": -287.8819274902344, "loss": 0.0914, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2792535722255707, "rewards/margins": 2.8031516075134277, "rewards/rejected": -2.523898124694824, "step": 18500 }, { "epoch": 0.74, "eval_logits/chosen": -3.0711543560028076, "eval_logits/rejected": -3.0966880321502686, "eval_logps/chosen": -0.17579111456871033, "eval_logps/rejected": -284.4002990722656, "eval_loss": 0.0603189580142498, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.314841628074646, "eval_rewards/margins": 2.7986886501312256, "eval_rewards/rejected": -2.483847141265869, "eval_runtime": 2.5415, "eval_samples_per_second": 1.967, "eval_steps_per_second": 0.393, "step": 18500 }, { "epoch": 0.74, "learning_rate": 9.580971364707438e-07, "logits/chosen": -3.0283586978912354, "logits/rejected": -3.0581161975860596, "logps/chosen": -4.949110984802246, "logps/rejected": -283.88421630859375, "loss": 0.0958, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.26699572801589966, "rewards/margins": 2.752119541168213, "rewards/rejected": -2.485124111175537, "step": 18510 }, { "epoch": 0.74, "learning_rate": 9.553509672741646e-07, "logits/chosen": -3.017213821411133, "logits/rejected": -3.0475430488586426, "logps/chosen": -2.1062674522399902, "logps/rejected": -287.70806884765625, "loss": 0.0616, "rewards/accuracies": 1.0, "rewards/chosen": 0.2948290705680847, "rewards/margins": 2.818045139312744, "rewards/rejected": -2.5232157707214355, "step": 18520 }, { "epoch": 0.74, "learning_rate": 9.526078094499142e-07, "logits/chosen": -3.0022170543670654, "logits/rejected": -3.0330803394317627, "logps/chosen": -5.650944709777832, "logps/rejected": -285.8404846191406, "loss": 0.109, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2565653920173645, "rewards/margins": 2.7668073177337646, "rewards/rejected": -2.510241985321045, "step": 18530 }, { "epoch": 0.74, "learning_rate": 9.498676683459184e-07, "logits/chosen": -3.021787166595459, "logits/rejected": -3.0510265827178955, "logps/chosen": -6.0084357261657715, "logps/rejected": -283.72186279296875, "loss": 0.1134, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.257835328578949, "rewards/margins": 2.738915205001831, "rewards/rejected": -2.481079578399658, "step": 18540 }, { "epoch": 0.74, "learning_rate": 9.471305493042243e-07, "logits/chosen": -2.996284008026123, "logits/rejected": -3.0266754627227783, "logps/chosen": -2.609562397003174, "logps/rejected": -286.69366455078125, "loss": 0.0765, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2877083718776703, "rewards/margins": 2.8050990104675293, "rewards/rejected": -2.517390251159668, "step": 18550 }, { "epoch": 0.74, "learning_rate": 9.443964576609844e-07, "logits/chosen": -3.0037875175476074, "logits/rejected": -3.0343992710113525, "logps/chosen": -3.799706220626831, "logps/rejected": -285.6311340332031, "loss": 0.0923, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27947309613227844, "rewards/margins": 2.7781665325164795, "rewards/rejected": -2.4986937046051025, "step": 18560 }, { "epoch": 0.74, "learning_rate": 9.416653987464503e-07, "logits/chosen": -3.0037460327148438, "logits/rejected": -3.0352160930633545, "logps/chosen": -1.5926003456115723, "logps/rejected": -291.6597595214844, "loss": 0.0614, "rewards/accuracies": 1.0, "rewards/chosen": 0.2991963326931, "rewards/margins": 2.8655145168304443, "rewards/rejected": -2.5663180351257324, "step": 18570 }, { "epoch": 0.74, "learning_rate": 9.389373778849612e-07, "logits/chosen": -3.0284500122070312, "logits/rejected": -3.0568902492523193, "logps/chosen": -3.9588050842285156, "logps/rejected": -285.60614013671875, "loss": 0.0944, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2779909074306488, "rewards/margins": 2.778470754623413, "rewards/rejected": -2.5004801750183105, "step": 18580 }, { "epoch": 0.74, "learning_rate": 9.362124003949324e-07, "logits/chosen": -2.9990005493164062, "logits/rejected": -3.0296666622161865, "logps/chosen": -0.18185552954673767, "logps/rejected": -292.4080505371094, "loss": 0.0547, "rewards/accuracies": 1.0, "rewards/chosen": 0.314196914434433, "rewards/margins": 2.882596254348755, "rewards/rejected": -2.56839919090271, "step": 18590 }, { "epoch": 0.74, "learning_rate": 9.334904715888496e-07, "logits/chosen": -3.0210964679718018, "logits/rejected": -3.050199031829834, "logps/chosen": -3.8641231060028076, "logps/rejected": -287.19964599609375, "loss": 0.092, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2803161144256592, "rewards/margins": 2.7963624000549316, "rewards/rejected": -2.5160465240478516, "step": 18600 }, { "epoch": 0.74, "eval_logits/chosen": -3.070911169052124, "eval_logits/rejected": -3.096160650253296, "eval_logps/chosen": -0.17436599731445312, "eval_logps/rejected": -284.3739013671875, "eval_loss": 0.060356296598911285, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3148558735847473, "eval_rewards/margins": 2.798438787460327, "eval_rewards/rejected": -2.4835829734802246, "eval_runtime": 2.5484, "eval_samples_per_second": 1.962, "eval_steps_per_second": 0.392, "step": 18600 }, { "epoch": 0.74, "learning_rate": 9.307715967732492e-07, "logits/chosen": -3.0438826084136963, "logits/rejected": -3.070777177810669, "logps/chosen": -0.24305739998817444, "logps/rejected": -291.41094970703125, "loss": 0.0556, "rewards/accuracies": 1.0, "rewards/chosen": 0.31629669666290283, "rewards/margins": 2.8715338706970215, "rewards/rejected": -2.55523681640625, "step": 18610 }, { "epoch": 0.74, "learning_rate": 9.280557812487187e-07, "logits/chosen": -3.0168261528015137, "logits/rejected": -3.0444254875183105, "logps/chosen": -7.509366512298584, "logps/rejected": -278.47515869140625, "loss": 0.1282, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.24152138829231262, "rewards/margins": 2.6710000038146973, "rewards/rejected": -2.429478883743286, "step": 18620 }, { "epoch": 0.75, "learning_rate": 9.253430303098782e-07, "logits/chosen": -3.012111186981201, "logits/rejected": -3.03979229927063, "logps/chosen": -0.2799631953239441, "logps/rejected": -289.1927185058594, "loss": 0.057, "rewards/accuracies": 1.0, "rewards/chosen": 0.3153725862503052, "rewards/margins": 2.8487892150878906, "rewards/rejected": -2.533416748046875, "step": 18630 }, { "epoch": 0.75, "learning_rate": 9.226333492453759e-07, "logits/chosen": -3.0017170906066895, "logits/rejected": -3.0295848846435547, "logps/chosen": -9.786956787109375, "logps/rejected": -280.81787109375, "loss": 0.1495, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.21933194994926453, "rewards/margins": 2.676030397415161, "rewards/rejected": -2.456698417663574, "step": 18640 }, { "epoch": 0.75, "learning_rate": 9.199267433378728e-07, "logits/chosen": -3.0061707496643066, "logits/rejected": -3.0340659618377686, "logps/chosen": -5.6395158767700195, "logps/rejected": -283.0304260253906, "loss": 0.1103, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2625441253185272, "rewards/margins": 2.7372701168060303, "rewards/rejected": -2.4747262001037598, "step": 18650 }, { "epoch": 0.75, "learning_rate": 9.172232178640361e-07, "logits/chosen": -3.036374568939209, "logits/rejected": -3.0665271282196045, "logps/chosen": -3.332537889480591, "logps/rejected": -288.48443603515625, "loss": 0.0836, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2862139642238617, "rewards/margins": 2.813042163848877, "rewards/rejected": -2.5268282890319824, "step": 18660 }, { "epoch": 0.75, "learning_rate": 9.145227780945265e-07, "logits/chosen": -3.03861403465271, "logits/rejected": -3.0659983158111572, "logps/chosen": -3.735335111618042, "logps/rejected": -289.59588623046875, "loss": 0.0895, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.282238632440567, "rewards/margins": 2.8181004524230957, "rewards/rejected": -2.5358614921569824, "step": 18670 }, { "epoch": 0.75, "learning_rate": 9.118254292939891e-07, "logits/chosen": -3.032975435256958, "logits/rejected": -3.0619072914123535, "logps/chosen": -0.18356963992118835, "logps/rejected": -291.015869140625, "loss": 0.0554, "rewards/accuracies": 1.0, "rewards/chosen": 0.3131125271320343, "rewards/margins": 2.871932029724121, "rewards/rejected": -2.558819532394409, "step": 18680 }, { "epoch": 0.75, "learning_rate": 9.091311767210453e-07, "logits/chosen": -2.991305351257324, "logits/rejected": -3.0221822261810303, "logps/chosen": -6.587327003479004, "logps/rejected": -282.13482666015625, "loss": 0.1203, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2515459358692169, "rewards/margins": 2.714376211166382, "rewards/rejected": -2.462830066680908, "step": 18690 }, { "epoch": 0.75, "learning_rate": 9.064400256282757e-07, "logits/chosen": -3.00541353225708, "logits/rejected": -3.0343194007873535, "logps/chosen": -0.6361985206604004, "logps/rejected": -286.84796142578125, "loss": 0.0641, "rewards/accuracies": 1.0, "rewards/chosen": 0.3091517388820648, "rewards/margins": 2.8242878913879395, "rewards/rejected": -2.515136241912842, "step": 18700 }, { "epoch": 0.75, "eval_logits/chosen": -3.0704238414764404, "eval_logits/rejected": -3.0953433513641357, "eval_logps/chosen": -0.16414450109004974, "eval_logps/rejected": -283.9635925292969, "eval_loss": 0.06066160276532173, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3149580955505371, "eval_rewards/margins": 2.794438123703003, "eval_rewards/rejected": -2.479480028152466, "eval_runtime": 2.54, "eval_samples_per_second": 1.968, "eval_steps_per_second": 0.394, "step": 18700 }, { "epoch": 0.75, "learning_rate": 9.037519812622195e-07, "logits/chosen": -3.039065361022949, "logits/rejected": -3.0658764839172363, "logps/chosen": -6.799346923828125, "logps/rejected": -285.26690673828125, "loss": 0.1202, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2492431402206421, "rewards/margins": 2.7478411197662354, "rewards/rejected": -2.4985976219177246, "step": 18710 }, { "epoch": 0.75, "learning_rate": 9.010670488633552e-07, "logits/chosen": -3.0205435752868652, "logits/rejected": -3.0486044883728027, "logps/chosen": -3.920691967010498, "logps/rejected": -286.024169921875, "loss": 0.0931, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27909237146377563, "rewards/margins": 2.7810089588165283, "rewards/rejected": -2.5019166469573975, "step": 18720 }, { "epoch": 0.75, "learning_rate": 8.983852336660959e-07, "logits/chosen": -3.0284838676452637, "logits/rejected": -3.0566134452819824, "logps/chosen": -3.805457353591919, "logps/rejected": -288.379638671875, "loss": 0.0902, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2790758013725281, "rewards/margins": 2.805764675140381, "rewards/rejected": -2.526688814163208, "step": 18730 }, { "epoch": 0.75, "learning_rate": 8.957065408987797e-07, "logits/chosen": -3.016465902328491, "logits/rejected": -3.045963764190674, "logps/chosen": -3.110877513885498, "logps/rejected": -285.05487060546875, "loss": 0.0848, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28577297925949097, "rewards/margins": 2.7883219718933105, "rewards/rejected": -2.502548933029175, "step": 18740 }, { "epoch": 0.75, "learning_rate": 8.930309757836517e-07, "logits/chosen": -3.0008602142333984, "logits/rejected": -3.0318069458007812, "logps/chosen": -3.9094016551971436, "logps/rejected": -284.2802734375, "loss": 0.0945, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27446702122688293, "rewards/margins": 2.7637431621551514, "rewards/rejected": -2.48927640914917, "step": 18750 }, { "epoch": 0.75, "learning_rate": 8.903585435368658e-07, "logits/chosen": -3.047288417816162, "logits/rejected": -3.0747718811035156, "logps/chosen": -0.3137054741382599, "logps/rejected": -292.49993896484375, "loss": 0.0547, "rewards/accuracies": 1.0, "rewards/chosen": 0.31361618638038635, "rewards/margins": 2.88680362701416, "rewards/rejected": -2.5731873512268066, "step": 18760 }, { "epoch": 0.75, "learning_rate": 8.876892493684644e-07, "logits/chosen": -3.0097544193267822, "logits/rejected": -3.04282808303833, "logps/chosen": -3.3052573204040527, "logps/rejected": -288.8502197265625, "loss": 0.0853, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28385820984840393, "rewards/margins": 2.8201889991760254, "rewards/rejected": -2.5363311767578125, "step": 18770 }, { "epoch": 0.75, "learning_rate": 8.850230984823735e-07, "logits/chosen": -3.0465939044952393, "logits/rejected": -3.076566219329834, "logps/chosen": -3.2420902252197266, "logps/rejected": -289.2339172363281, "loss": 0.0848, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2870084047317505, "rewards/margins": 2.8184094429016113, "rewards/rejected": -2.5314011573791504, "step": 18780 }, { "epoch": 0.75, "learning_rate": 8.823600960763901e-07, "logits/chosen": -3.032721996307373, "logits/rejected": -3.061018228530884, "logps/chosen": -6.265414237976074, "logps/rejected": -283.2023620605469, "loss": 0.1175, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.25303179025650024, "rewards/margins": 2.7309353351593018, "rewards/rejected": -2.477903366088867, "step": 18790 }, { "epoch": 0.75, "learning_rate": 8.797002473421729e-07, "logits/chosen": -2.9891388416290283, "logits/rejected": -3.0224950313568115, "logps/chosen": -0.39449748396873474, "logps/rejected": -288.1334533691406, "loss": 0.0576, "rewards/accuracies": 1.0, "rewards/chosen": 0.308954119682312, "rewards/margins": 2.8435251712799072, "rewards/rejected": -2.5345711708068848, "step": 18800 }, { "epoch": 0.75, "eval_logits/chosen": -3.0707573890686035, "eval_logits/rejected": -3.0961222648620605, "eval_logps/chosen": -0.18601302802562714, "eval_logps/rejected": -283.98553466796875, "eval_loss": 0.06067793443799019, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3147394061088562, "eval_rewards/margins": 2.7944388389587402, "eval_rewards/rejected": -2.4796996116638184, "eval_runtime": 2.5361, "eval_samples_per_second": 1.972, "eval_steps_per_second": 0.394, "step": 18800 }, { "epoch": 0.75, "learning_rate": 8.77043557465235e-07, "logits/chosen": -2.9836766719818115, "logits/rejected": -3.0148584842681885, "logps/chosen": -2.261561632156372, "logps/rejected": -288.42901611328125, "loss": 0.0664, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28997117280960083, "rewards/margins": 2.8207716941833496, "rewards/rejected": -2.5308005809783936, "step": 18810 }, { "epoch": 0.75, "learning_rate": 8.743900316249273e-07, "logits/chosen": -3.0297112464904785, "logits/rejected": -3.0592620372772217, "logps/chosen": -3.974339246749878, "logps/rejected": -285.3243103027344, "loss": 0.094, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27816203236579895, "rewards/margins": 2.7743172645568848, "rewards/rejected": -2.496155261993408, "step": 18820 }, { "epoch": 0.75, "learning_rate": 8.717396749944373e-07, "logits/chosen": -3.021521806716919, "logits/rejected": -3.0496084690093994, "logps/chosen": -5.066006660461426, "logps/rejected": -285.44537353515625, "loss": 0.0966, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2683029770851135, "rewards/margins": 2.7608847618103027, "rewards/rejected": -2.492581844329834, "step": 18830 }, { "epoch": 0.75, "learning_rate": 8.69092492740768e-07, "logits/chosen": -3.0249953269958496, "logits/rejected": -3.0506443977355957, "logps/chosen": -2.9168031215667725, "logps/rejected": -286.1826477050781, "loss": 0.082, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28806471824645996, "rewards/margins": 2.792505979537964, "rewards/rejected": -2.504441022872925, "step": 18840 }, { "epoch": 0.75, "learning_rate": 8.664484900247363e-07, "logits/chosen": -3.0387425422668457, "logits/rejected": -3.0688140392303467, "logps/chosen": -0.2976202368736267, "logps/rejected": -289.4935302734375, "loss": 0.057, "rewards/accuracies": 1.0, "rewards/chosen": 0.3138512670993805, "rewards/margins": 2.8486313819885254, "rewards/rejected": -2.5347800254821777, "step": 18850 }, { "epoch": 0.75, "learning_rate": 8.63807672000963e-07, "logits/chosen": -3.023127317428589, "logits/rejected": -3.0532965660095215, "logps/chosen": -7.412040710449219, "logps/rejected": -284.6882629394531, "loss": 0.1266, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2448488175868988, "rewards/margins": 2.7347359657287598, "rewards/rejected": -2.48988676071167, "step": 18860 }, { "epoch": 0.75, "learning_rate": 8.611700438178572e-07, "logits/chosen": -3.0336368083953857, "logits/rejected": -3.0615713596343994, "logps/chosen": -0.24159438908100128, "logps/rejected": -290.3606262207031, "loss": 0.0561, "rewards/accuracies": 1.0, "rewards/chosen": 0.3148675560951233, "rewards/margins": 2.862546443939209, "rewards/rejected": -2.5476791858673096, "step": 18870 }, { "epoch": 0.76, "learning_rate": 8.585356106176093e-07, "logits/chosen": -3.001439332962036, "logits/rejected": -3.032996892929077, "logps/chosen": -3.8420403003692627, "logps/rejected": -289.38983154296875, "loss": 0.0905, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27820128202438354, "rewards/margins": 2.8173861503601074, "rewards/rejected": -2.539185047149658, "step": 18880 }, { "epoch": 0.76, "learning_rate": 8.559043775361816e-07, "logits/chosen": -3.032400131225586, "logits/rejected": -3.060129404067993, "logps/chosen": -4.106351852416992, "logps/rejected": -286.14300537109375, "loss": 0.0931, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.276129812002182, "rewards/margins": 2.781954288482666, "rewards/rejected": -2.505824565887451, "step": 18890 }, { "epoch": 0.76, "learning_rate": 8.532763497032987e-07, "logits/chosen": -3.002535104751587, "logits/rejected": -3.0308682918548584, "logps/chosen": -0.18519434332847595, "logps/rejected": -293.31756591796875, "loss": 0.0539, "rewards/accuracies": 1.0, "rewards/chosen": 0.3146587014198303, "rewards/margins": 2.8967807292938232, "rewards/rejected": -2.582122325897217, "step": 18900 }, { "epoch": 0.76, "eval_logits/chosen": -3.0708272457122803, "eval_logits/rejected": -3.096036195755005, "eval_logps/chosen": -0.16044631600379944, "eval_logps/rejected": -284.034912109375, "eval_loss": 0.060582101345062256, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31499508023262024, "eval_rewards/margins": 2.7951884269714355, "eval_rewards/rejected": -2.4801933765411377, "eval_runtime": 2.5376, "eval_samples_per_second": 1.97, "eval_steps_per_second": 0.394, "step": 18900 }, { "epoch": 0.76, "learning_rate": 8.506515322424349e-07, "logits/chosen": -3.0149035453796387, "logits/rejected": -3.0436153411865234, "logps/chosen": -0.24759690463542938, "logps/rejected": -289.1720886230469, "loss": 0.0568, "rewards/accuracies": 1.0, "rewards/chosen": 0.3160659372806549, "rewards/margins": 2.851628541946411, "rewards/rejected": -2.53556227684021, "step": 18910 }, { "epoch": 0.76, "learning_rate": 8.480299302708059e-07, "logits/chosen": -2.9710586071014404, "logits/rejected": -3.005065679550171, "logps/chosen": -7.22275447845459, "logps/rejected": -282.3023681640625, "loss": 0.1283, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24252519011497498, "rewards/margins": 2.7131409645080566, "rewards/rejected": -2.4706156253814697, "step": 18920 }, { "epoch": 0.76, "learning_rate": 8.454115488993592e-07, "logits/chosen": -3.00288724899292, "logits/rejected": -3.0320286750793457, "logps/chosen": -4.850283145904541, "logps/rejected": -283.57586669921875, "loss": 0.0967, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2667330205440521, "rewards/margins": 2.752349853515625, "rewards/rejected": -2.485616445541382, "step": 18930 }, { "epoch": 0.76, "learning_rate": 8.427963932327621e-07, "logits/chosen": -2.9959640502929688, "logits/rejected": -3.0251564979553223, "logps/chosen": -3.751883029937744, "logps/rejected": -285.6430358886719, "loss": 0.0904, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2782065272331238, "rewards/margins": 2.779829740524292, "rewards/rejected": -2.5016233921051025, "step": 18940 }, { "epoch": 0.76, "learning_rate": 8.40184468369396e-07, "logits/chosen": -3.0050313472747803, "logits/rejected": -3.0378270149230957, "logps/chosen": -0.2315187007188797, "logps/rejected": -289.9661560058594, "loss": 0.0564, "rewards/accuracies": 1.0, "rewards/chosen": 0.31340938806533813, "rewards/margins": 2.8569769859313965, "rewards/rejected": -2.543567657470703, "step": 18950 }, { "epoch": 0.76, "learning_rate": 8.375757794013414e-07, "logits/chosen": -3.017937183380127, "logits/rejected": -3.0492520332336426, "logps/chosen": -6.3270063400268555, "logps/rejected": -281.894287109375, "loss": 0.1172, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.25128307938575745, "rewards/margins": 2.721052646636963, "rewards/rejected": -2.4697694778442383, "step": 18960 }, { "epoch": 0.76, "learning_rate": 8.349703314143712e-07, "logits/chosen": -3.0119407176971436, "logits/rejected": -3.0414814949035645, "logps/chosen": -0.16983668506145477, "logps/rejected": -292.83038330078125, "loss": 0.0544, "rewards/accuracies": 1.0, "rewards/chosen": 0.31483787298202515, "rewards/margins": 2.8885550498962402, "rewards/rejected": -2.5737175941467285, "step": 18970 }, { "epoch": 0.76, "learning_rate": 8.323681294879393e-07, "logits/chosen": -3.0007059574127197, "logits/rejected": -3.0322418212890625, "logps/chosen": -0.17807292938232422, "logps/rejected": -291.26019287109375, "loss": 0.0553, "rewards/accuracies": 1.0, "rewards/chosen": 0.3135971426963806, "rewards/margins": 2.8745155334472656, "rewards/rejected": -2.5609183311462402, "step": 18980 }, { "epoch": 0.76, "learning_rate": 8.297691786951706e-07, "logits/chosen": -3.0112102031707764, "logits/rejected": -3.0407214164733887, "logps/chosen": -5.342525482177734, "logps/rejected": -287.40704345703125, "loss": 0.0944, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2660371959209442, "rewards/margins": 2.7807745933532715, "rewards/rejected": -2.514737367630005, "step": 18990 }, { "epoch": 0.76, "learning_rate": 8.271734841028553e-07, "logits/chosen": -3.003174304962158, "logits/rejected": -3.032320022583008, "logps/chosen": -4.859917640686035, "logps/rejected": -286.9779968261719, "loss": 0.0935, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2682982087135315, "rewards/margins": 2.790523052215576, "rewards/rejected": -2.5222249031066895, "step": 19000 }, { "epoch": 0.76, "eval_logits/chosen": -3.071140766143799, "eval_logits/rejected": -3.0963523387908936, "eval_logps/chosen": -0.22732511162757874, "eval_logps/rejected": -284.07672119140625, "eval_loss": 0.060619186609983444, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31432628631591797, "eval_rewards/margins": 2.7949378490448, "eval_rewards/rejected": -2.480611562728882, "eval_runtime": 2.5334, "eval_samples_per_second": 1.974, "eval_steps_per_second": 0.395, "step": 19000 }, { "epoch": 0.76, "learning_rate": 8.245810507714294e-07, "logits/chosen": -3.038635730743408, "logits/rejected": -3.0679352283477783, "logps/chosen": -6.772637844085693, "logps/rejected": -282.73223876953125, "loss": 0.1212, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24932897090911865, "rewards/margins": 2.7238991260528564, "rewards/rejected": -2.4745702743530273, "step": 19010 }, { "epoch": 0.76, "learning_rate": 8.21991883754977e-07, "logits/chosen": -3.035594940185547, "logits/rejected": -3.0645172595977783, "logps/chosen": -0.1718825399875641, "logps/rejected": -290.28857421875, "loss": 0.0558, "rewards/accuracies": 1.0, "rewards/chosen": 0.31495150923728943, "rewards/margins": 2.866070032119751, "rewards/rejected": -2.5511183738708496, "step": 19020 }, { "epoch": 0.76, "learning_rate": 8.194059881012107e-07, "logits/chosen": -2.9997246265411377, "logits/rejected": -3.0315589904785156, "logps/chosen": -0.19906330108642578, "logps/rejected": -291.93450927734375, "loss": 0.0551, "rewards/accuracies": 1.0, "rewards/chosen": 0.31121039390563965, "rewards/margins": 2.8772401809692383, "rewards/rejected": -2.5660300254821777, "step": 19030 }, { "epoch": 0.76, "learning_rate": 8.168233688514654e-07, "logits/chosen": -3.0372824668884277, "logits/rejected": -3.0631799697875977, "logps/chosen": -3.418600559234619, "logps/rejected": -284.49774169921875, "loss": 0.0675, "rewards/accuracies": 1.0, "rewards/chosen": 0.28293806314468384, "rewards/margins": 2.774747848510742, "rewards/rejected": -2.491809606552124, "step": 19040 }, { "epoch": 0.76, "learning_rate": 8.142440310406923e-07, "logits/chosen": -3.015806198120117, "logits/rejected": -3.043550968170166, "logps/chosen": -3.8745524883270264, "logps/rejected": -288.7944641113281, "loss": 0.0912, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2798644006252289, "rewards/margins": 2.811394214630127, "rewards/rejected": -2.531529664993286, "step": 19050 }, { "epoch": 0.76, "learning_rate": 8.116679796974389e-07, "logits/chosen": -2.9986684322357178, "logits/rejected": -3.027263641357422, "logps/chosen": -9.757802963256836, "logps/rejected": -276.6990966796875, "loss": 0.1524, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.22001151740550995, "rewards/margins": 2.631702423095703, "rewards/rejected": -2.4116907119750977, "step": 19060 }, { "epoch": 0.76, "learning_rate": 8.090952198438521e-07, "logits/chosen": -3.038231611251831, "logits/rejected": -3.066168785095215, "logps/chosen": -3.1209545135498047, "logps/rejected": -286.39849853515625, "loss": 0.0857, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2894969582557678, "rewards/margins": 2.794008731842041, "rewards/rejected": -2.504511594772339, "step": 19070 }, { "epoch": 0.76, "learning_rate": 8.065257564956572e-07, "logits/chosen": -3.0414822101593018, "logits/rejected": -3.071106195449829, "logps/chosen": -0.24063511192798615, "logps/rejected": -292.5636291503906, "loss": 0.0546, "rewards/accuracies": 1.0, "rewards/chosen": 0.3194201588630676, "rewards/margins": 2.885437250137329, "rewards/rejected": -2.566016912460327, "step": 19080 }, { "epoch": 0.76, "learning_rate": 8.039595946621551e-07, "logits/chosen": -3.0268654823303223, "logits/rejected": -3.0559823513031006, "logps/chosen": -2.553745746612549, "logps/rejected": -285.33905029296875, "loss": 0.0806, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.29137280583381653, "rewards/margins": 2.792853832244873, "rewards/rejected": -2.501481056213379, "step": 19090 }, { "epoch": 0.76, "learning_rate": 8.013967393462094e-07, "logits/chosen": -3.0210421085357666, "logits/rejected": -3.0508499145507812, "logps/chosen": -5.260729789733887, "logps/rejected": -287.06658935546875, "loss": 0.0887, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.26307862997055054, "rewards/margins": 2.7809224128723145, "rewards/rejected": -2.517843723297119, "step": 19100 }, { "epoch": 0.76, "eval_logits/chosen": -3.0696728229522705, "eval_logits/rejected": -3.0951218605041504, "eval_logps/chosen": -0.16488638520240784, "eval_logps/rejected": -284.1393737792969, "eval_loss": 0.060529015958309174, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3149506449699402, "eval_rewards/margins": 2.7961888313293457, "eval_rewards/rejected": -2.4812381267547607, "eval_runtime": 2.5432, "eval_samples_per_second": 1.966, "eval_steps_per_second": 0.393, "step": 19100 }, { "epoch": 0.76, "learning_rate": 7.98837195544237e-07, "logits/chosen": -3.040004253387451, "logits/rejected": -3.071357011795044, "logps/chosen": -4.89766788482666, "logps/rejected": -285.02154541015625, "loss": 0.1034, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.26746922731399536, "rewards/margins": 2.7673230171203613, "rewards/rejected": -2.4998536109924316, "step": 19110 }, { "epoch": 0.76, "learning_rate": 7.962809682462008e-07, "logits/chosen": -2.994990110397339, "logits/rejected": -3.024095058441162, "logps/chosen": -3.8251595497131348, "logps/rejected": -289.4325256347656, "loss": 0.0898, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27813026309013367, "rewards/margins": 2.8199918270111084, "rewards/rejected": -2.5418617725372314, "step": 19120 }, { "epoch": 0.77, "learning_rate": 7.937280624355955e-07, "logits/chosen": -3.0231776237487793, "logits/rejected": -3.052773952484131, "logps/chosen": -0.20076970756053925, "logps/rejected": -293.7613220214844, "loss": 0.0538, "rewards/accuracies": 1.0, "rewards/chosen": 0.3173568844795227, "rewards/margins": 2.896378517150879, "rewards/rejected": -2.579021453857422, "step": 19130 }, { "epoch": 0.77, "learning_rate": 7.91178483089444e-07, "logits/chosen": -3.01094126701355, "logits/rejected": -3.039828062057495, "logps/chosen": -3.6272404193878174, "logps/rejected": -285.46185302734375, "loss": 0.0912, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28026068210601807, "rewards/margins": 2.780122756958008, "rewards/rejected": -2.4998621940612793, "step": 19140 }, { "epoch": 0.77, "learning_rate": 7.886322351782782e-07, "logits/chosen": -3.0184566974639893, "logits/rejected": -3.048978090286255, "logps/chosen": -0.18552078306674957, "logps/rejected": -291.78662109375, "loss": 0.0552, "rewards/accuracies": 1.0, "rewards/chosen": 0.31488609313964844, "rewards/margins": 2.874918222427368, "rewards/rejected": -2.5600318908691406, "step": 19150 }, { "epoch": 0.77, "learning_rate": 7.860893236661412e-07, "logits/chosen": -3.0318851470947266, "logits/rejected": -3.0615735054016113, "logps/chosen": -3.7053444385528564, "logps/rejected": -284.8966979980469, "loss": 0.0954, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28117498755455017, "rewards/margins": 2.780956506729126, "rewards/rejected": -2.4997811317443848, "step": 19160 }, { "epoch": 0.77, "learning_rate": 7.835497535105685e-07, "logits/chosen": -3.0172600746154785, "logits/rejected": -3.04665470123291, "logps/chosen": -2.603533983230591, "logps/rejected": -288.7715148925781, "loss": 0.0788, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.292306125164032, "rewards/margins": 2.8218576908111572, "rewards/rejected": -2.5295515060424805, "step": 19170 }, { "epoch": 0.77, "learning_rate": 7.810135296625817e-07, "logits/chosen": -3.00954008102417, "logits/rejected": -3.040489912033081, "logps/chosen": -0.20650739967823029, "logps/rejected": -290.0669860839844, "loss": 0.0562, "rewards/accuracies": 1.0, "rewards/chosen": 0.3155820369720459, "rewards/margins": 2.862065076828003, "rewards/rejected": -2.546483039855957, "step": 19180 }, { "epoch": 0.77, "learning_rate": 7.784806570666795e-07, "logits/chosen": -3.0267202854156494, "logits/rejected": -3.0582308769226074, "logps/chosen": -3.2721810340881348, "logps/rejected": -284.2300720214844, "loss": 0.0876, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2823317348957062, "rewards/margins": 2.776235580444336, "rewards/rejected": -2.493903636932373, "step": 19190 }, { "epoch": 0.77, "learning_rate": 7.759511406608255e-07, "logits/chosen": -3.032541275024414, "logits/rejected": -3.06109881401062, "logps/chosen": -7.491339683532715, "logps/rejected": -282.91546630859375, "loss": 0.1274, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24096408486366272, "rewards/margins": 2.717383861541748, "rewards/rejected": -2.4764199256896973, "step": 19200 }, { "epoch": 0.77, "eval_logits/chosen": -3.070648431777954, "eval_logits/rejected": -3.096090793609619, "eval_logps/chosen": -0.21667519211769104, "eval_logps/rejected": -284.2411193847656, "eval_loss": 0.06050436571240425, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3144327700138092, "eval_rewards/margins": 2.7966885566711426, "eval_rewards/rejected": -2.482255697250366, "eval_runtime": 2.5427, "eval_samples_per_second": 1.966, "eval_steps_per_second": 0.393, "step": 19200 }, { "epoch": 0.77, "learning_rate": 7.734249853764428e-07, "logits/chosen": -3.030012607574463, "logits/rejected": -3.056885242462158, "logps/chosen": -3.8869755268096924, "logps/rejected": -287.9263916015625, "loss": 0.0911, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2771347165107727, "rewards/margins": 2.8062362670898438, "rewards/rejected": -2.529101610183716, "step": 19210 }, { "epoch": 0.77, "learning_rate": 7.709021961384e-07, "logits/chosen": -3.0038607120513916, "logits/rejected": -3.03153657913208, "logps/chosen": -6.067347526550293, "logps/rejected": -283.784912109375, "loss": 0.1138, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.25689810514450073, "rewards/margins": 2.7385077476501465, "rewards/rejected": -2.481609344482422, "step": 19220 }, { "epoch": 0.77, "learning_rate": 7.683827778650033e-07, "logits/chosen": -3.031817674636841, "logits/rejected": -3.0576558113098145, "logps/chosen": -10.447847366333008, "logps/rejected": -278.87237548828125, "loss": 0.1584, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.2125459611415863, "rewards/margins": 2.6518313884735107, "rewards/rejected": -2.4392852783203125, "step": 19230 }, { "epoch": 0.77, "learning_rate": 7.65866735467988e-07, "logits/chosen": -3.003892421722412, "logits/rejected": -3.0346226692199707, "logps/chosen": -7.306876182556152, "logps/rejected": -284.90484619140625, "loss": 0.126, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24236884713172913, "rewards/margins": 2.7383813858032227, "rewards/rejected": -2.4960126876831055, "step": 19240 }, { "epoch": 0.77, "learning_rate": 7.633540738525066e-07, "logits/chosen": -3.0146379470825195, "logits/rejected": -3.045022487640381, "logps/chosen": -0.19108964502811432, "logps/rejected": -292.854736328125, "loss": 0.0546, "rewards/accuracies": 1.0, "rewards/chosen": 0.3125861585140228, "rewards/margins": 2.885240077972412, "rewards/rejected": -2.5726540088653564, "step": 19250 }, { "epoch": 0.77, "learning_rate": 7.60844797917123e-07, "logits/chosen": -3.003319501876831, "logits/rejected": -3.031503677368164, "logps/chosen": -6.339613437652588, "logps/rejected": -284.799560546875, "loss": 0.1138, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.25141093134880066, "rewards/margins": 2.749905824661255, "rewards/rejected": -2.498494863510132, "step": 19260 }, { "epoch": 0.77, "learning_rate": 7.583389125537982e-07, "logits/chosen": -3.0353636741638184, "logits/rejected": -3.063471794128418, "logps/chosen": -5.95803165435791, "logps/rejected": -280.5540466308594, "loss": 0.1149, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2572571635246277, "rewards/margins": 2.7099671363830566, "rewards/rejected": -2.452709674835205, "step": 19270 }, { "epoch": 0.77, "learning_rate": 7.558364226478843e-07, "logits/chosen": -2.999953508377075, "logits/rejected": -3.0288949012756348, "logps/chosen": -9.51017951965332, "logps/rejected": -281.57073974609375, "loss": 0.149, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.22200345993041992, "rewards/margins": 2.684455156326294, "rewards/rejected": -2.462451934814453, "step": 19280 }, { "epoch": 0.77, "learning_rate": 7.533373330781127e-07, "logits/chosen": -3.043966770172119, "logits/rejected": -3.0679917335510254, "logps/chosen": -12.256208419799805, "logps/rejected": -277.8588562011719, "loss": 0.1623, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.19588904082775116, "rewards/margins": 2.618486166000366, "rewards/rejected": -2.4225971698760986, "step": 19290 }, { "epoch": 0.77, "learning_rate": 7.508416487165862e-07, "logits/chosen": -3.04948091506958, "logits/rejected": -3.0751259326934814, "logps/chosen": -7.398470878601074, "logps/rejected": -280.14776611328125, "loss": 0.1333, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24284644424915314, "rewards/margins": 2.688143014907837, "rewards/rejected": -2.4452965259552, "step": 19300 }, { "epoch": 0.77, "eval_logits/chosen": -3.0695247650146484, "eval_logits/rejected": -3.0948867797851562, "eval_logps/chosen": -0.1556398570537567, "eval_logps/rejected": -284.33013916015625, "eval_loss": 0.06036083772778511, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3150431513786316, "eval_rewards/margins": 2.7981886863708496, "eval_rewards/rejected": -2.4831454753875732, "eval_runtime": 2.5328, "eval_samples_per_second": 1.974, "eval_steps_per_second": 0.395, "step": 19300 }, { "epoch": 0.77, "learning_rate": 7.483493744287715e-07, "logits/chosen": -3.019261360168457, "logits/rejected": -3.0494813919067383, "logps/chosen": -4.270129203796387, "logps/rejected": -286.02691650390625, "loss": 0.0978, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2744823396205902, "rewards/margins": 2.778333902359009, "rewards/rejected": -2.5038516521453857, "step": 19310 }, { "epoch": 0.77, "learning_rate": 7.458605150734815e-07, "logits/chosen": -3.033679723739624, "logits/rejected": -3.0625007152557373, "logps/chosen": -5.96636438369751, "logps/rejected": -284.78863525390625, "loss": 0.1109, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2566654086112976, "rewards/margins": 2.752500295639038, "rewards/rejected": -2.4958348274230957, "step": 19320 }, { "epoch": 0.77, "learning_rate": 7.433750755028774e-07, "logits/chosen": -3.0298924446105957, "logits/rejected": -3.0592522621154785, "logps/chosen": -3.0099148750305176, "logps/rejected": -285.34039306640625, "loss": 0.0848, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28780072927474976, "rewards/margins": 2.7864723205566406, "rewards/rejected": -2.498671770095825, "step": 19330 }, { "epoch": 0.77, "learning_rate": 7.408930605624498e-07, "logits/chosen": -3.0251519680023193, "logits/rejected": -3.055079936981201, "logps/chosen": -6.0731048583984375, "logps/rejected": -282.5919189453125, "loss": 0.1121, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2551670968532562, "rewards/margins": 2.729923725128174, "rewards/rejected": -2.4747567176818848, "step": 19340 }, { "epoch": 0.77, "learning_rate": 7.384144750910133e-07, "logits/chosen": -3.0131192207336426, "logits/rejected": -3.0424904823303223, "logps/chosen": -6.792653560638428, "logps/rejected": -279.2401428222656, "loss": 0.1283, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.25075846910476685, "rewards/margins": 2.687711238861084, "rewards/rejected": -2.436952590942383, "step": 19350 }, { "epoch": 0.77, "learning_rate": 7.359393239206991e-07, "logits/chosen": -3.0084643363952637, "logits/rejected": -3.038665294647217, "logps/chosen": -0.17132681608200073, "logps/rejected": -290.525146484375, "loss": 0.0558, "rewards/accuracies": 1.0, "rewards/chosen": 0.3144919276237488, "rewards/margins": 2.8668606281280518, "rewards/rejected": -2.5523688793182373, "step": 19360 }, { "epoch": 0.77, "learning_rate": 7.334676118769382e-07, "logits/chosen": -2.978264093399048, "logits/rejected": -3.012434482574463, "logps/chosen": -2.5257954597473145, "logps/rejected": -290.0606689453125, "loss": 0.0695, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2871394753456116, "rewards/margins": 2.8363819122314453, "rewards/rejected": -2.5492424964904785, "step": 19370 }, { "epoch": 0.78, "learning_rate": 7.309993437784624e-07, "logits/chosen": -3.0011069774627686, "logits/rejected": -3.0316736698150635, "logps/chosen": -4.0543975830078125, "logps/rejected": -283.8179626464844, "loss": 0.0947, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27538785338401794, "rewards/margins": 2.7611141204833984, "rewards/rejected": -2.4857265949249268, "step": 19380 }, { "epoch": 0.78, "learning_rate": 7.285345244372843e-07, "logits/chosen": -3.0521082878112793, "logits/rejected": -3.08099627494812, "logps/chosen": -0.20678183436393738, "logps/rejected": -291.715576171875, "loss": 0.055, "rewards/accuracies": 1.0, "rewards/chosen": 0.3152766227722168, "rewards/margins": 2.8779876232147217, "rewards/rejected": -2.562711238861084, "step": 19390 }, { "epoch": 0.78, "learning_rate": 7.260731586586983e-07, "logits/chosen": -3.01775860786438, "logits/rejected": -3.048677921295166, "logps/chosen": -0.19528500735759735, "logps/rejected": -291.565185546875, "loss": 0.0551, "rewards/accuracies": 1.0, "rewards/chosen": 0.31699252128601074, "rewards/margins": 2.876326322555542, "rewards/rejected": -2.5593342781066895, "step": 19400 }, { "epoch": 0.78, "eval_logits/chosen": -3.0712594985961914, "eval_logits/rejected": -3.0966739654541016, "eval_logps/chosen": -0.17772848904132843, "eval_logps/rejected": -284.4021911621094, "eval_loss": 0.06033550947904587, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.314822256565094, "eval_rewards/margins": 2.7986884117126465, "eval_rewards/rejected": -2.4838664531707764, "eval_runtime": 2.5417, "eval_samples_per_second": 1.967, "eval_steps_per_second": 0.393, "step": 19400 }, { "epoch": 0.78, "learning_rate": 7.236152512412594e-07, "logits/chosen": -2.9996790885925293, "logits/rejected": -3.029147148132324, "logps/chosen": -6.839443206787109, "logps/rejected": -282.0938720703125, "loss": 0.1221, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24656438827514648, "rewards/margins": 2.7132394313812256, "rewards/rejected": -2.466675281524658, "step": 19410 }, { "epoch": 0.78, "learning_rate": 7.211608069767867e-07, "logits/chosen": -3.037236452102661, "logits/rejected": -3.0653538703918457, "logps/chosen": -3.3888771533966064, "logps/rejected": -285.71551513671875, "loss": 0.0844, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2831563353538513, "rewards/margins": 2.7872109413146973, "rewards/rejected": -2.504054546356201, "step": 19420 }, { "epoch": 0.78, "learning_rate": 7.187098306503437e-07, "logits/chosen": -3.007848024368286, "logits/rejected": -3.0367941856384277, "logps/chosen": -9.73828411102295, "logps/rejected": -277.9251403808594, "loss": 0.1521, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.21984358131885529, "rewards/margins": 2.6431498527526855, "rewards/rejected": -2.4233059883117676, "step": 19430 }, { "epoch": 0.78, "learning_rate": 7.162623270402336e-07, "logits/chosen": -2.9992830753326416, "logits/rejected": -3.0307202339172363, "logps/chosen": -4.547159194946289, "logps/rejected": -285.3299865722656, "loss": 0.0955, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2725488245487213, "rewards/margins": 2.7697694301605225, "rewards/rejected": -2.497220516204834, "step": 19440 }, { "epoch": 0.78, "learning_rate": 7.138183009179922e-07, "logits/chosen": -3.0481817722320557, "logits/rejected": -3.0786380767822266, "logps/chosen": -3.60003662109375, "logps/rejected": -284.45648193359375, "loss": 0.0896, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28138023614883423, "rewards/margins": 2.772547483444214, "rewards/rejected": -2.4911673069000244, "step": 19450 }, { "epoch": 0.78, "learning_rate": 7.113777570483701e-07, "logits/chosen": -3.0485525131225586, "logits/rejected": -3.0760855674743652, "logps/chosen": -3.1638407707214355, "logps/rejected": -286.2895202636719, "loss": 0.0732, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28197699785232544, "rewards/margins": 2.7886221408843994, "rewards/rejected": -2.5066449642181396, "step": 19460 }, { "epoch": 0.78, "learning_rate": 7.089407001893353e-07, "logits/chosen": -3.02719783782959, "logits/rejected": -3.0571396350860596, "logps/chosen": -0.20493373274803162, "logps/rejected": -292.67486572265625, "loss": 0.0546, "rewards/accuracies": 1.0, "rewards/chosen": 0.31670287251472473, "rewards/margins": 2.8842620849609375, "rewards/rejected": -2.5675594806671143, "step": 19470 }, { "epoch": 0.78, "learning_rate": 7.065071350920538e-07, "logits/chosen": -3.017014503479004, "logits/rejected": -3.0446059703826904, "logps/chosen": -7.295994758605957, "logps/rejected": -282.2385559082031, "loss": 0.1264, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24330198764801025, "rewards/margins": 2.7138147354125977, "rewards/rejected": -2.470512866973877, "step": 19480 }, { "epoch": 0.78, "learning_rate": 7.040770665008853e-07, "logits/chosen": -3.0194058418273926, "logits/rejected": -3.0493650436401367, "logps/chosen": -9.656015396118164, "logps/rejected": -278.1210021972656, "loss": 0.151, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.21805472671985626, "rewards/margins": 2.6474709510803223, "rewards/rejected": -2.4294159412384033, "step": 19490 }, { "epoch": 0.78, "learning_rate": 7.016504991533727e-07, "logits/chosen": -3.0157063007354736, "logits/rejected": -3.0449233055114746, "logps/chosen": -0.2550104260444641, "logps/rejected": -288.2318420410156, "loss": 0.0575, "rewards/accuracies": 1.0, "rewards/chosen": 0.31409263610839844, "rewards/margins": 2.84096622467041, "rewards/rejected": -2.52687406539917, "step": 19500 }, { "epoch": 0.78, "eval_logits/chosen": -3.0711324214935303, "eval_logits/rejected": -3.0963714122772217, "eval_logps/chosen": -0.18266554176807404, "eval_logps/rejected": -284.2572021484375, "eval_loss": 0.060413651168346405, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31477290391921997, "eval_rewards/margins": 2.7971889972686768, "eval_rewards/rejected": -2.4824159145355225, "eval_runtime": 2.5399, "eval_samples_per_second": 1.969, "eval_steps_per_second": 0.394, "step": 19500 }, { "epoch": 0.78, "learning_rate": 6.992274377802328e-07, "logits/chosen": -3.0034689903259277, "logits/rejected": -3.035383462905884, "logps/chosen": -2.088667392730713, "logps/rejected": -285.22137451171875, "loss": 0.0736, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2960374057292938, "rewards/margins": 2.7903242111206055, "rewards/rejected": -2.49428653717041, "step": 19510 }, { "epoch": 0.78, "learning_rate": 6.968078871053488e-07, "logits/chosen": -3.049133777618408, "logits/rejected": -3.0765886306762695, "logps/chosen": -0.734638512134552, "logps/rejected": -289.50982666015625, "loss": 0.0587, "rewards/accuracies": 1.0, "rewards/chosen": 0.3107915222644806, "rewards/margins": 2.8441054821014404, "rewards/rejected": -2.5333142280578613, "step": 19520 }, { "epoch": 0.78, "learning_rate": 6.943918518457584e-07, "logits/chosen": -3.0040783882141113, "logits/rejected": -3.0354886054992676, "logps/chosen": -1.0694907903671265, "logps/rejected": -290.8699645996094, "loss": 0.0571, "rewards/accuracies": 1.0, "rewards/chosen": 0.30737119913101196, "rewards/margins": 2.8629777431488037, "rewards/rejected": -2.5556068420410156, "step": 19530 }, { "epoch": 0.78, "learning_rate": 6.919793367116453e-07, "logits/chosen": -3.0135574340820312, "logits/rejected": -3.0425260066986084, "logps/chosen": -0.1842934787273407, "logps/rejected": -292.3150634765625, "loss": 0.0548, "rewards/accuracies": 1.0, "rewards/chosen": 0.31540435552597046, "rewards/margins": 2.8829760551452637, "rewards/rejected": -2.5675718784332275, "step": 19540 }, { "epoch": 0.78, "learning_rate": 6.895703464063319e-07, "logits/chosen": -2.9973692893981934, "logits/rejected": -3.0292372703552246, "logps/chosen": -3.3585877418518066, "logps/rejected": -284.1430358886719, "loss": 0.0898, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28282660245895386, "rewards/margins": 2.767232656478882, "rewards/rejected": -2.484405994415283, "step": 19550 }, { "epoch": 0.78, "learning_rate": 6.871648856262667e-07, "logits/chosen": -2.9894096851348877, "logits/rejected": -3.0205702781677246, "logps/chosen": -0.7215459942817688, "logps/rejected": -289.1903991699219, "loss": 0.0632, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.30837777256965637, "rewards/margins": 2.8465428352355957, "rewards/rejected": -2.5381648540496826, "step": 19560 }, { "epoch": 0.78, "learning_rate": 6.847629590610202e-07, "logits/chosen": -3.0029990673065186, "logits/rejected": -3.0326669216156006, "logps/chosen": -5.541040420532227, "logps/rejected": -284.2546081542969, "loss": 0.1103, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2610812485218048, "rewards/margins": 2.750392198562622, "rewards/rejected": -2.4893107414245605, "step": 19570 }, { "epoch": 0.78, "learning_rate": 6.823645713932709e-07, "logits/chosen": -3.028764486312866, "logits/rejected": -3.0556387901306152, "logps/chosen": -5.850397109985352, "logps/rejected": -284.7899169921875, "loss": 0.1112, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2602009177207947, "rewards/margins": 2.7513017654418945, "rewards/rejected": -2.491101026535034, "step": 19580 }, { "epoch": 0.78, "learning_rate": 6.799697272987976e-07, "logits/chosen": -3.0348072052001953, "logits/rejected": -3.062267780303955, "logps/chosen": -0.1970411092042923, "logps/rejected": -290.2965087890625, "loss": 0.0559, "rewards/accuracies": 1.0, "rewards/chosen": 0.3168058395385742, "rewards/margins": 2.8645119667053223, "rewards/rejected": -2.547706127166748, "step": 19590 }, { "epoch": 0.78, "learning_rate": 6.775784314464717e-07, "logits/chosen": -3.013582229614258, "logits/rejected": -3.04215407371521, "logps/chosen": -0.4425079822540283, "logps/rejected": -288.3848876953125, "loss": 0.0581, "rewards/accuracies": 1.0, "rewards/chosen": 0.31329643726348877, "rewards/margins": 2.8360722064971924, "rewards/rejected": -2.522775650024414, "step": 19600 }, { "epoch": 0.78, "eval_logits/chosen": -3.070716619491577, "eval_logits/rejected": -3.0966689586639404, "eval_logps/chosen": -0.17977547645568848, "eval_logps/rejected": -284.3291931152344, "eval_loss": 0.06036672741174698, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31480178236961365, "eval_rewards/margins": 2.7979378700256348, "eval_rewards/rejected": -2.4831361770629883, "eval_runtime": 2.5396, "eval_samples_per_second": 1.969, "eval_steps_per_second": 0.394, "step": 19600 }, { "epoch": 0.78, "learning_rate": 6.751906884982462e-07, "logits/chosen": -3.0212371349334717, "logits/rejected": -3.0495433807373047, "logps/chosen": -0.41158947348594666, "logps/rejected": -291.61663818359375, "loss": 0.0556, "rewards/accuracies": 1.0, "rewards/chosen": 0.31455090641975403, "rewards/margins": 2.873993158340454, "rewards/rejected": -2.5594420433044434, "step": 19610 }, { "epoch": 0.78, "learning_rate": 6.728065031091502e-07, "logits/chosen": -2.999938488006592, "logits/rejected": -3.0328547954559326, "logps/chosen": -2.8029346466064453, "logps/rejected": -289.14013671875, "loss": 0.079, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28753918409347534, "rewards/margins": 2.8266472816467285, "rewards/rejected": -2.5391077995300293, "step": 19620 }, { "epoch": 0.79, "learning_rate": 6.704258799272723e-07, "logits/chosen": -3.066432476043701, "logits/rejected": -3.0921578407287598, "logps/chosen": -3.905014753341675, "logps/rejected": -288.2741394042969, "loss": 0.0914, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2816407084465027, "rewards/margins": 2.803579807281494, "rewards/rejected": -2.5219390392303467, "step": 19630 }, { "epoch": 0.79, "learning_rate": 6.680488235937613e-07, "logits/chosen": -3.0026535987854004, "logits/rejected": -3.0324349403381348, "logps/chosen": -3.7623214721679688, "logps/rejected": -284.41802978515625, "loss": 0.0928, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28099867701530457, "rewards/margins": 2.766923427581787, "rewards/rejected": -2.48592472076416, "step": 19640 }, { "epoch": 0.79, "learning_rate": 6.656753387428089e-07, "logits/chosen": -3.0043509006500244, "logits/rejected": -3.0338852405548096, "logps/chosen": -3.8840813636779785, "logps/rejected": -286.656982421875, "loss": 0.0925, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2784481644630432, "rewards/margins": 2.788675308227539, "rewards/rejected": -2.5102272033691406, "step": 19650 }, { "epoch": 0.79, "learning_rate": 6.633054300016464e-07, "logits/chosen": -3.009127616882324, "logits/rejected": -3.0395655632019043, "logps/chosen": -0.9213047027587891, "logps/rejected": -287.53741455078125, "loss": 0.0649, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.30721646547317505, "rewards/margins": 2.8262839317321777, "rewards/rejected": -2.5190672874450684, "step": 19660 }, { "epoch": 0.79, "learning_rate": 6.609391019905317e-07, "logits/chosen": -3.032832622528076, "logits/rejected": -3.0623295307159424, "logps/chosen": -2.726358413696289, "logps/rejected": -288.4491882324219, "loss": 0.0712, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.29179537296295166, "rewards/margins": 2.8186421394348145, "rewards/rejected": -2.526846408843994, "step": 19670 }, { "epoch": 0.79, "learning_rate": 6.58576359322742e-07, "logits/chosen": -3.01442813873291, "logits/rejected": -3.043365955352783, "logps/chosen": -3.8828747272491455, "logps/rejected": -288.00982666015625, "loss": 0.0919, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2773120403289795, "rewards/margins": 2.801997423171997, "rewards/rejected": -2.5246851444244385, "step": 19680 }, { "epoch": 0.79, "learning_rate": 6.562172066045655e-07, "logits/chosen": -3.0121653079986572, "logits/rejected": -3.0390806198120117, "logps/chosen": -3.805459976196289, "logps/rejected": -285.467041015625, "loss": 0.0914, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2795736789703369, "rewards/margins": 2.777099609375, "rewards/rejected": -2.497525691986084, "step": 19690 }, { "epoch": 0.79, "learning_rate": 6.538616484352902e-07, "logits/chosen": -3.037684679031372, "logits/rejected": -3.0637309551239014, "logps/chosen": -3.411588191986084, "logps/rejected": -287.9814453125, "loss": 0.0869, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28188472986221313, "rewards/margins": 2.811868667602539, "rewards/rejected": -2.5299839973449707, "step": 19700 }, { "epoch": 0.79, "eval_logits/chosen": -3.070810079574585, "eval_logits/rejected": -3.096153974533081, "eval_logps/chosen": -0.20417603850364685, "eval_logps/rejected": -284.42864990234375, "eval_loss": 0.06031927466392517, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3145577609539032, "eval_rewards/margins": 2.7986881732940674, "eval_rewards/rejected": -2.484130382537842, "eval_runtime": 2.5405, "eval_samples_per_second": 1.968, "eval_steps_per_second": 0.394, "step": 19700 }, { "epoch": 0.79, "learning_rate": 6.515096894071996e-07, "logits/chosen": -3.0103185176849365, "logits/rejected": -3.0413098335266113, "logps/chosen": -5.353012561798096, "logps/rejected": -284.72735595703125, "loss": 0.0989, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2648283839225769, "rewards/margins": 2.756556749343872, "rewards/rejected": -2.4917280673980713, "step": 19710 }, { "epoch": 0.79, "learning_rate": 6.491613341055547e-07, "logits/chosen": -3.013698101043701, "logits/rejected": -3.0439820289611816, "logps/chosen": -0.18908004462718964, "logps/rejected": -290.0416564941406, "loss": 0.0562, "rewards/accuracies": 1.0, "rewards/chosen": 0.3148341774940491, "rewards/margins": 2.859623432159424, "rewards/rejected": -2.5447895526885986, "step": 19720 }, { "epoch": 0.79, "learning_rate": 6.468165871085972e-07, "logits/chosen": -3.023343563079834, "logits/rejected": -3.0514774322509766, "logps/chosen": -4.1349358558654785, "logps/rejected": -283.4425964355469, "loss": 0.0955, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2733152508735657, "rewards/margins": 2.754042148590088, "rewards/rejected": -2.480726718902588, "step": 19730 }, { "epoch": 0.79, "learning_rate": 6.444754529875302e-07, "logits/chosen": -3.0411338806152344, "logits/rejected": -3.067823886871338, "logps/chosen": -7.052037239074707, "logps/rejected": -280.13006591796875, "loss": 0.1301, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24701769649982452, "rewards/margins": 2.692673444747925, "rewards/rejected": -2.4456558227539062, "step": 19740 }, { "epoch": 0.79, "learning_rate": 6.421379363065142e-07, "logits/chosen": -3.0267093181610107, "logits/rejected": -3.0577683448791504, "logps/chosen": -0.6335383653640747, "logps/rejected": -289.441162109375, "loss": 0.0589, "rewards/accuracies": 1.0, "rewards/chosen": 0.31047117710113525, "rewards/margins": 2.8519110679626465, "rewards/rejected": -2.541440010070801, "step": 19750 }, { "epoch": 0.79, "learning_rate": 6.398040416226592e-07, "logits/chosen": -3.00809907913208, "logits/rejected": -3.0351574420928955, "logps/chosen": -10.991167068481445, "logps/rejected": -277.4362487792969, "loss": 0.1492, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.2042854130268097, "rewards/margins": 2.6308541297912598, "rewards/rejected": -2.4265685081481934, "step": 19760 }, { "epoch": 0.79, "learning_rate": 6.374737734860098e-07, "logits/chosen": -3.003129482269287, "logits/rejected": -3.0352885723114014, "logps/chosen": -3.835862398147583, "logps/rejected": -287.695556640625, "loss": 0.0918, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27856510877609253, "rewards/margins": 2.7960286140441895, "rewards/rejected": -2.517463207244873, "step": 19770 }, { "epoch": 0.79, "learning_rate": 6.351471364395448e-07, "logits/chosen": -3.0333049297332764, "logits/rejected": -3.062656879425049, "logps/chosen": -7.522751808166504, "logps/rejected": -284.1300354003906, "loss": 0.1274, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24165014922618866, "rewards/margins": 2.7317066192626953, "rewards/rejected": -2.4900565147399902, "step": 19780 }, { "epoch": 0.79, "learning_rate": 6.328241350191619e-07, "logits/chosen": -2.998920440673828, "logits/rejected": -3.0301759243011475, "logps/chosen": -0.24845945835113525, "logps/rejected": -291.2838134765625, "loss": 0.0555, "rewards/accuracies": 1.0, "rewards/chosen": 0.31633245944976807, "rewards/margins": 2.871067523956299, "rewards/rejected": -2.5547351837158203, "step": 19790 }, { "epoch": 0.79, "learning_rate": 6.305047737536707e-07, "logits/chosen": -3.0296897888183594, "logits/rejected": -3.058006763458252, "logps/chosen": -1.9250990152359009, "logps/rejected": -288.8249816894531, "loss": 0.0724, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.29969847202301025, "rewards/margins": 2.829899787902832, "rewards/rejected": -2.5302014350891113, "step": 19800 }, { "epoch": 0.79, "eval_logits/chosen": -3.0711796283721924, "eval_logits/rejected": -3.095463752746582, "eval_logps/chosen": -0.15390324592590332, "eval_logps/rejected": -284.4533996582031, "eval_loss": 0.060272254049777985, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31506049633026123, "eval_rewards/margins": 2.7994384765625, "eval_rewards/rejected": -2.484377861022949, "eval_runtime": 2.5288, "eval_samples_per_second": 1.977, "eval_steps_per_second": 0.395, "step": 19800 }, { "epoch": 0.79, "learning_rate": 6.281890571647853e-07, "logits/chosen": -3.011249542236328, "logits/rejected": -3.0411860942840576, "logps/chosen": -4.173542499542236, "logps/rejected": -279.75469970703125, "loss": 0.1021, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2723159193992615, "rewards/margins": 2.7218003273010254, "rewards/rejected": -2.449484348297119, "step": 19810 }, { "epoch": 0.79, "learning_rate": 6.258769897671124e-07, "logits/chosen": -3.029383420944214, "logits/rejected": -3.0568432807922363, "logps/chosen": -3.079216241836548, "logps/rejected": -288.0819396972656, "loss": 0.0846, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2843305468559265, "rewards/margins": 2.8138139247894287, "rewards/rejected": -2.5294835567474365, "step": 19820 }, { "epoch": 0.79, "learning_rate": 6.235685760681473e-07, "logits/chosen": -2.988381862640381, "logits/rejected": -3.0182037353515625, "logps/chosen": -6.74566125869751, "logps/rejected": -284.5984191894531, "loss": 0.1208, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24865181744098663, "rewards/margins": 2.7419028282165527, "rewards/rejected": -2.493250608444214, "step": 19830 }, { "epoch": 0.79, "learning_rate": 6.2126382056826e-07, "logits/chosen": -3.0190224647521973, "logits/rejected": -3.045597553253174, "logps/chosen": -0.30804935097694397, "logps/rejected": -291.08642578125, "loss": 0.0558, "rewards/accuracies": 1.0, "rewards/chosen": 0.312460333108902, "rewards/margins": 2.8683300018310547, "rewards/rejected": -2.5558695793151855, "step": 19840 }, { "epoch": 0.79, "learning_rate": 6.189627277606894e-07, "logits/chosen": -3.002560615539551, "logits/rejected": -3.0321247577667236, "logps/chosen": -5.564377784729004, "logps/rejected": -283.7138671875, "loss": 0.11, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.26065748929977417, "rewards/margins": 2.7419114112854004, "rewards/rejected": -2.4812536239624023, "step": 19850 }, { "epoch": 0.79, "learning_rate": 6.166653021315336e-07, "logits/chosen": -3.00692081451416, "logits/rejected": -3.033396005630493, "logps/chosen": -6.225968360900879, "logps/rejected": -282.6068420410156, "loss": 0.1172, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2564888894557953, "rewards/margins": 2.725921869277954, "rewards/rejected": -2.469433307647705, "step": 19860 }, { "epoch": 0.79, "learning_rate": 6.143715481597404e-07, "logits/chosen": -3.0119595527648926, "logits/rejected": -3.042956829071045, "logps/chosen": -3.44915771484375, "logps/rejected": -285.7361755371094, "loss": 0.0892, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2828792929649353, "rewards/margins": 2.7863550186157227, "rewards/rejected": -2.5034756660461426, "step": 19870 }, { "epoch": 0.8, "learning_rate": 6.120814703171024e-07, "logits/chosen": -3.003459930419922, "logits/rejected": -3.033344030380249, "logps/chosen": -0.24893376231193542, "logps/rejected": -291.38214111328125, "loss": 0.0555, "rewards/accuracies": 1.0, "rewards/chosen": 0.3133556544780731, "rewards/margins": 2.870776653289795, "rewards/rejected": -2.5574212074279785, "step": 19880 }, { "epoch": 0.8, "learning_rate": 6.097950730682426e-07, "logits/chosen": -3.010993719100952, "logits/rejected": -3.0430448055267334, "logps/chosen": -0.23643477261066437, "logps/rejected": -292.19378662109375, "loss": 0.0547, "rewards/accuracies": 1.0, "rewards/chosen": 0.31364360451698303, "rewards/margins": 2.8851380348205566, "rewards/rejected": -2.5714943408966064, "step": 19890 }, { "epoch": 0.8, "learning_rate": 6.075123608706093e-07, "logits/chosen": -3.0177159309387207, "logits/rejected": -3.047261953353882, "logps/chosen": -5.262398719787598, "logps/rejected": -285.3663635253906, "loss": 0.1063, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.26657453179359436, "rewards/margins": 2.7630181312561035, "rewards/rejected": -2.496443510055542, "step": 19900 }, { "epoch": 0.8, "eval_logits/chosen": -3.0701839923858643, "eval_logits/rejected": -3.0964746475219727, "eval_logps/chosen": -0.16716483235359192, "eval_logps/rejected": -284.31658935546875, "eval_loss": 0.06039834022521973, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.314927875995636, "eval_rewards/margins": 2.7979378700256348, "eval_rewards/rejected": -2.4830100536346436, "eval_runtime": 2.5373, "eval_samples_per_second": 1.971, "eval_steps_per_second": 0.394, "step": 19900 }, { "epoch": 0.8, "learning_rate": 6.052333381744663e-07, "logits/chosen": -2.993241310119629, "logits/rejected": -3.0270278453826904, "logps/chosen": -0.22008498013019562, "logps/rejected": -291.61810302734375, "loss": 0.0552, "rewards/accuracies": 1.0, "rewards/chosen": 0.3124344050884247, "rewards/margins": 2.8761541843414307, "rewards/rejected": -2.563720226287842, "step": 19910 }, { "epoch": 0.8, "learning_rate": 6.029580094228862e-07, "logits/chosen": -3.0156359672546387, "logits/rejected": -3.0444445610046387, "logps/chosen": -0.1851811558008194, "logps/rejected": -289.9269104003906, "loss": 0.0562, "rewards/accuracies": 1.0, "rewards/chosen": 0.3167978525161743, "rewards/margins": 2.859539747238159, "rewards/rejected": -2.5427420139312744, "step": 19920 }, { "epoch": 0.8, "learning_rate": 6.006863790517392e-07, "logits/chosen": -2.9962940216064453, "logits/rejected": -3.029555559158325, "logps/chosen": -0.24946144223213196, "logps/rejected": -288.95452880859375, "loss": 0.0567, "rewards/accuracies": 1.0, "rewards/chosen": 0.3155350089073181, "rewards/margins": 2.853261947631836, "rewards/rejected": -2.5377273559570312, "step": 19930 }, { "epoch": 0.8, "learning_rate": 5.984184514896821e-07, "logits/chosen": -3.020761489868164, "logits/rejected": -3.053316116333008, "logps/chosen": -3.6812338829040527, "logps/rejected": -287.9261779785156, "loss": 0.0903, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28011947870254517, "rewards/margins": 2.803401470184326, "rewards/rejected": -2.523282289505005, "step": 19940 }, { "epoch": 0.8, "learning_rate": 5.961542311581586e-07, "logits/chosen": -3.0125067234039307, "logits/rejected": -3.0439400672912598, "logps/chosen": -0.15059874951839447, "logps/rejected": -291.6501159667969, "loss": 0.055, "rewards/accuracies": 1.0, "rewards/chosen": 0.3165060579776764, "rewards/margins": 2.879068374633789, "rewards/rejected": -2.5625624656677246, "step": 19950 }, { "epoch": 0.8, "learning_rate": 5.9389372247138e-07, "logits/chosen": -3.0233936309814453, "logits/rejected": -3.050651788711548, "logps/chosen": -1.3525137901306152, "logps/rejected": -290.2934875488281, "loss": 0.0675, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.3054468035697937, "rewards/margins": 2.852842092514038, "rewards/rejected": -2.5473954677581787, "step": 19960 }, { "epoch": 0.8, "learning_rate": 5.916369298363259e-07, "logits/chosen": -3.016831874847412, "logits/rejected": -3.047943353652954, "logps/chosen": -0.19998005032539368, "logps/rejected": -291.17596435546875, "loss": 0.0554, "rewards/accuracies": 1.0, "rewards/chosen": 0.31595176458358765, "rewards/margins": 2.87250018119812, "rewards/rejected": -2.556548595428467, "step": 19970 }, { "epoch": 0.8, "learning_rate": 5.893838576527275e-07, "logits/chosen": -3.017730951309204, "logits/rejected": -3.0497162342071533, "logps/chosen": -2.0412611961364746, "logps/rejected": -288.44512939453125, "loss": 0.0723, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2956765294075012, "rewards/margins": 2.823387861251831, "rewards/rejected": -2.5277113914489746, "step": 19980 }, { "epoch": 0.8, "learning_rate": 5.871345103130646e-07, "logits/chosen": -3.0243210792541504, "logits/rejected": -3.0516390800476074, "logps/chosen": -10.857800483703613, "logps/rejected": -279.39007568359375, "loss": 0.1626, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.20661461353302002, "rewards/margins": 2.648256540298462, "rewards/rejected": -2.4416420459747314, "step": 19990 }, { "epoch": 0.8, "learning_rate": 5.848888922025553e-07, "logits/chosen": -3.0194687843322754, "logits/rejected": -3.0494701862335205, "logps/chosen": -3.1715784072875977, "logps/rejected": -286.93145751953125, "loss": 0.0857, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28375330567359924, "rewards/margins": 2.8003811836242676, "rewards/rejected": -2.516627550125122, "step": 20000 }, { "epoch": 0.8, "eval_logits/chosen": -3.070155620574951, "eval_logits/rejected": -3.0957961082458496, "eval_logps/chosen": -0.173853799700737, "eval_logps/rejected": -284.44830322265625, "eval_loss": 0.06029314920306206, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3148609697818756, "eval_rewards/margins": 2.7991881370544434, "eval_rewards/rejected": -2.4843268394470215, "eval_runtime": 2.5368, "eval_samples_per_second": 1.971, "eval_steps_per_second": 0.394, "step": 20000 }, { "epoch": 0.8, "learning_rate": 5.826470076991459e-07, "logits/chosen": -3.044435977935791, "logits/rejected": -3.071894645690918, "logps/chosen": -0.20759257674217224, "logps/rejected": -292.6017150878906, "loss": 0.0547, "rewards/accuracies": 1.0, "rewards/chosen": 0.31696629524230957, "rewards/margins": 2.8837008476257324, "rewards/rejected": -2.566734790802002, "step": 20010 }, { "epoch": 0.8, "learning_rate": 5.80408861173507e-07, "logits/chosen": -3.007974147796631, "logits/rejected": -3.0359365940093994, "logps/chosen": -6.250969409942627, "logps/rejected": -283.09210205078125, "loss": 0.1162, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2557337284088135, "rewards/margins": 2.7339534759521484, "rewards/rejected": -2.478219509124756, "step": 20020 }, { "epoch": 0.8, "learning_rate": 5.781744569890172e-07, "logits/chosen": -3.024116039276123, "logits/rejected": -3.052543878555298, "logps/chosen": -3.601020336151123, "logps/rejected": -286.54510498046875, "loss": 0.0899, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28208452463150024, "rewards/margins": 2.790964126586914, "rewards/rejected": -2.5088791847229004, "step": 20030 }, { "epoch": 0.8, "learning_rate": 5.759437995017639e-07, "logits/chosen": -3.031480550765991, "logits/rejected": -3.0569980144500732, "logps/chosen": -7.298519134521484, "logps/rejected": -282.9543762207031, "loss": 0.1254, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24498474597930908, "rewards/margins": 2.719371795654297, "rewards/rejected": -2.474386692047119, "step": 20040 }, { "epoch": 0.8, "learning_rate": 5.737168930605272e-07, "logits/chosen": -3.018972158432007, "logits/rejected": -3.0474650859832764, "logps/chosen": -0.1756066232919693, "logps/rejected": -287.8888854980469, "loss": 0.0578, "rewards/accuracies": 1.0, "rewards/chosen": 0.31397828459739685, "rewards/margins": 2.8347604274749756, "rewards/rejected": -2.520782470703125, "step": 20050 }, { "epoch": 0.8, "learning_rate": 5.714937420067746e-07, "logits/chosen": -3.0164060592651367, "logits/rejected": -3.043306350708008, "logps/chosen": -0.17872276902198792, "logps/rejected": -290.0773010253906, "loss": 0.0562, "rewards/accuracies": 1.0, "rewards/chosen": 0.31633260846138, "rewards/margins": 2.8613991737365723, "rewards/rejected": -2.5450661182403564, "step": 20060 }, { "epoch": 0.8, "learning_rate": 5.692743506746548e-07, "logits/chosen": -2.9999687671661377, "logits/rejected": -3.0319573879241943, "logps/chosen": -0.46644172072410583, "logps/rejected": -289.97271728515625, "loss": 0.0567, "rewards/accuracies": 1.0, "rewards/chosen": 0.3098914921283722, "rewards/margins": 2.8600258827209473, "rewards/rejected": -2.5501346588134766, "step": 20070 }, { "epoch": 0.8, "learning_rate": 5.670587233909819e-07, "logits/chosen": -3.0151543617248535, "logits/rejected": -3.0450682640075684, "logps/chosen": -2.7390406131744385, "logps/rejected": -286.8623046875, "loss": 0.0801, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.288101464509964, "rewards/margins": 2.804985523223877, "rewards/rejected": -2.5168840885162354, "step": 20080 }, { "epoch": 0.8, "learning_rate": 5.64846864475237e-07, "logits/chosen": -3.016693592071533, "logits/rejected": -3.046828508377075, "logps/chosen": -0.2709905207157135, "logps/rejected": -290.1189270019531, "loss": 0.0559, "rewards/accuracies": 1.0, "rewards/chosen": 0.31444281339645386, "rewards/margins": 2.8672895431518555, "rewards/rejected": -2.552846670150757, "step": 20090 }, { "epoch": 0.8, "learning_rate": 5.626387782395512e-07, "logits/chosen": -3.0342299938201904, "logits/rejected": -3.062554359436035, "logps/chosen": -0.19469378888607025, "logps/rejected": -290.37542724609375, "loss": 0.0561, "rewards/accuracies": 1.0, "rewards/chosen": 0.31669363379478455, "rewards/margins": 2.8637547492980957, "rewards/rejected": -2.5470612049102783, "step": 20100 }, { "epoch": 0.8, "eval_logits/chosen": -3.070676803588867, "eval_logits/rejected": -3.096409797668457, "eval_logps/chosen": -0.20689566433429718, "eval_logps/rejected": -284.50634765625, "eval_loss": 0.060256510972976685, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31453055143356323, "eval_rewards/margins": 2.799438238143921, "eval_rewards/rejected": -2.484907627105713, "eval_runtime": 2.5406, "eval_samples_per_second": 1.968, "eval_steps_per_second": 0.394, "step": 20100 }, { "epoch": 0.8, "learning_rate": 5.60434468988702e-07, "logits/chosen": -3.012647867202759, "logits/rejected": -3.042332649230957, "logps/chosen": -3.068351984024048, "logps/rejected": -288.8038330078125, "loss": 0.0838, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2872264087200165, "rewards/margins": 2.8189239501953125, "rewards/rejected": -2.5316975116729736, "step": 20110 }, { "epoch": 0.8, "learning_rate": 5.582339410201029e-07, "logits/chosen": -3.0170822143554688, "logits/rejected": -3.0466108322143555, "logps/chosen": -7.63440465927124, "logps/rejected": -280.49285888671875, "loss": 0.1319, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.24369969964027405, "rewards/margins": 2.6937546730041504, "rewards/rejected": -2.4500551223754883, "step": 20120 }, { "epoch": 0.81, "learning_rate": 5.56037198623795e-07, "logits/chosen": -3.016305446624756, "logits/rejected": -3.044900894165039, "logps/chosen": -3.419347047805786, "logps/rejected": -286.3233642578125, "loss": 0.0878, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28299883008003235, "rewards/margins": 2.7910754680633545, "rewards/rejected": -2.5080769062042236, "step": 20130 }, { "epoch": 0.81, "learning_rate": 5.538442460824417e-07, "logits/chosen": -3.0105252265930176, "logits/rejected": -3.0392253398895264, "logps/chosen": -0.1985289305448532, "logps/rejected": -289.9409484863281, "loss": 0.0562, "rewards/accuracies": 1.0, "rewards/chosen": 0.31390661001205444, "rewards/margins": 2.8592276573181152, "rewards/rejected": -2.545321226119995, "step": 20140 }, { "epoch": 0.81, "learning_rate": 5.516550876713142e-07, "logits/chosen": -3.002223253250122, "logits/rejected": -3.030656337738037, "logps/chosen": -2.7929203510284424, "logps/rejected": -288.55902099609375, "loss": 0.0802, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2911677956581116, "rewards/margins": 2.8186287879943848, "rewards/rejected": -2.527460813522339, "step": 20150 }, { "epoch": 0.81, "learning_rate": 5.494697276582916e-07, "logits/chosen": -3.0183353424072266, "logits/rejected": -3.0478992462158203, "logps/chosen": -3.837584972381592, "logps/rejected": -288.8074951171875, "loss": 0.0912, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2784840166568756, "rewards/margins": 2.811950206756592, "rewards/rejected": -2.533466339111328, "step": 20160 }, { "epoch": 0.81, "learning_rate": 5.472881703038418e-07, "logits/chosen": -2.993647575378418, "logits/rejected": -3.025439739227295, "logps/chosen": -0.371376097202301, "logps/rejected": -291.11395263671875, "loss": 0.0558, "rewards/accuracies": 1.0, "rewards/chosen": 0.3152356743812561, "rewards/margins": 2.8690667152404785, "rewards/rejected": -2.553830623626709, "step": 20170 }, { "epoch": 0.81, "learning_rate": 5.45110419861025e-07, "logits/chosen": -3.0053508281707764, "logits/rejected": -3.033374309539795, "logps/chosen": -0.2212844341993332, "logps/rejected": -290.9537048339844, "loss": 0.0554, "rewards/accuracies": 1.0, "rewards/chosen": 0.3134203255176544, "rewards/margins": 2.8724327087402344, "rewards/rejected": -2.559011936187744, "step": 20180 }, { "epoch": 0.81, "learning_rate": 5.429364805754758e-07, "logits/chosen": -3.035491943359375, "logits/rejected": -3.065528154373169, "logps/chosen": -0.18313749134540558, "logps/rejected": -291.1951904296875, "loss": 0.0557, "rewards/accuracies": 1.0, "rewards/chosen": 0.3156503438949585, "rewards/margins": 2.8680849075317383, "rewards/rejected": -2.5524344444274902, "step": 20190 }, { "epoch": 0.81, "learning_rate": 5.407663566854008e-07, "logits/chosen": -3.00537371635437, "logits/rejected": -3.0357577800750732, "logps/chosen": -0.21702273190021515, "logps/rejected": -290.86492919921875, "loss": 0.0556, "rewards/accuracies": 1.0, "rewards/chosen": 0.31437134742736816, "rewards/margins": 2.8694710731506348, "rewards/rejected": -2.5550999641418457, "step": 20200 }, { "epoch": 0.81, "eval_logits/chosen": -3.069638252258301, "eval_logits/rejected": -3.0954854488372803, "eval_logps/chosen": -0.17296424508094788, "eval_logps/rejected": -284.2973937988281, "eval_loss": 0.060371674597263336, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31486988067626953, "eval_rewards/margins": 2.7976880073547363, "eval_rewards/rejected": -2.482818126678467, "eval_runtime": 2.5316, "eval_samples_per_second": 1.975, "eval_steps_per_second": 0.395, "step": 20200 }, { "epoch": 0.81, "learning_rate": 5.38600052421567e-07, "logits/chosen": -3.0093019008636475, "logits/rejected": -3.038151979446411, "logps/chosen": -1.029259204864502, "logps/rejected": -288.16461181640625, "loss": 0.0594, "rewards/accuracies": 1.0, "rewards/chosen": 0.3056420385837555, "rewards/margins": 2.834432363510132, "rewards/rejected": -2.5287904739379883, "step": 20210 }, { "epoch": 0.81, "learning_rate": 5.364375720072954e-07, "logits/chosen": -2.9872772693634033, "logits/rejected": -3.016976833343506, "logps/chosen": -7.871077537536621, "logps/rejected": -280.1622009277344, "loss": 0.1327, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.236453577876091, "rewards/margins": 2.6873795986175537, "rewards/rejected": -2.4509263038635254, "step": 20220 }, { "epoch": 0.81, "learning_rate": 5.342789196584527e-07, "logits/chosen": -3.0155491828918457, "logits/rejected": -3.0440421104431152, "logps/chosen": -0.23745529353618622, "logps/rejected": -291.79644775390625, "loss": 0.0551, "rewards/accuracies": 1.0, "rewards/chosen": 0.3151070475578308, "rewards/margins": 2.877214193344116, "rewards/rejected": -2.5621070861816406, "step": 20230 }, { "epoch": 0.81, "learning_rate": 5.32124099583442e-07, "logits/chosen": -3.0263473987579346, "logits/rejected": -3.058356523513794, "logps/chosen": -0.2513282001018524, "logps/rejected": -292.0820617675781, "loss": 0.0548, "rewards/accuracies": 1.0, "rewards/chosen": 0.3161300718784332, "rewards/margins": 2.8811347484588623, "rewards/rejected": -2.565004825592041, "step": 20240 }, { "epoch": 0.81, "learning_rate": 5.299731159831953e-07, "logits/chosen": -3.041964054107666, "logits/rejected": -3.0687689781188965, "logps/chosen": -7.098435878753662, "logps/rejected": -284.9207458496094, "loss": 0.1231, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24774949252605438, "rewards/margins": 2.741699695587158, "rewards/rejected": -2.4939496517181396, "step": 20250 }, { "epoch": 0.81, "learning_rate": 5.278259730511651e-07, "logits/chosen": -3.0150365829467773, "logits/rejected": -3.0451173782348633, "logps/chosen": -1.7806068658828735, "logps/rejected": -289.20355224609375, "loss": 0.0651, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.29987961053848267, "rewards/margins": 2.8374743461608887, "rewards/rejected": -2.537594795227051, "step": 20260 }, { "epoch": 0.81, "learning_rate": 5.256826749733158e-07, "logits/chosen": -3.026967763900757, "logits/rejected": -3.056293487548828, "logps/chosen": -3.072582721710205, "logps/rejected": -287.00189208984375, "loss": 0.0849, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2873064875602722, "rewards/margins": 2.8015146255493164, "rewards/rejected": -2.5142083168029785, "step": 20270 }, { "epoch": 0.81, "learning_rate": 5.235432259281175e-07, "logits/chosen": -3.0157980918884277, "logits/rejected": -3.041632890701294, "logps/chosen": -9.417409896850586, "logps/rejected": -282.0577697753906, "loss": 0.1449, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.22469159960746765, "rewards/margins": 2.689847946166992, "rewards/rejected": -2.465156078338623, "step": 20280 }, { "epoch": 0.81, "learning_rate": 5.214076300865359e-07, "logits/chosen": -3.0033981800079346, "logits/rejected": -3.0327136516571045, "logps/chosen": -10.577092170715332, "logps/rejected": -278.00811767578125, "loss": 0.1572, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.21453270316123962, "rewards/margins": 2.638232946395874, "rewards/rejected": -2.4237000942230225, "step": 20290 }, { "epoch": 0.81, "learning_rate": 5.192758916120236e-07, "logits/chosen": -3.000728130340576, "logits/rejected": -3.0303940773010254, "logps/chosen": -3.8058857917785645, "logps/rejected": -287.65228271484375, "loss": 0.091, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2776055932044983, "rewards/margins": 2.8059751987457275, "rewards/rejected": -2.528369426727295, "step": 20300 }, { "epoch": 0.81, "eval_logits/chosen": -3.068936586380005, "eval_logits/rejected": -3.094954013824463, "eval_logps/chosen": -0.1502102166414261, "eval_logps/rejected": -284.4747009277344, "eval_loss": 0.06025205925107002, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3150974214076996, "eval_rewards/margins": 2.7996888160705566, "eval_rewards/rejected": -2.484591245651245, "eval_runtime": 2.5391, "eval_samples_per_second": 1.969, "eval_steps_per_second": 0.394, "step": 20300 }, { "epoch": 0.81, "learning_rate": 5.171480146605141e-07, "logits/chosen": -3.002833127975464, "logits/rejected": -3.0316803455352783, "logps/chosen": -0.2635360360145569, "logps/rejected": -289.9896545410156, "loss": 0.0564, "rewards/accuracies": 1.0, "rewards/chosen": 0.31404754519462585, "rewards/margins": 2.860442876815796, "rewards/rejected": -2.5463950634002686, "step": 20310 }, { "epoch": 0.81, "learning_rate": 5.150240033804116e-07, "logits/chosen": -3.0372605323791504, "logits/rejected": -3.064877986907959, "logps/chosen": -0.18896515667438507, "logps/rejected": -292.3150939941406, "loss": 0.0547, "rewards/accuracies": 1.0, "rewards/chosen": 0.31676357984542847, "rewards/margins": 2.882986068725586, "rewards/rejected": -2.5662224292755127, "step": 20320 }, { "epoch": 0.81, "learning_rate": 5.129038619125868e-07, "logits/chosen": -3.007368564605713, "logits/rejected": -3.0405704975128174, "logps/chosen": -0.21957719326019287, "logps/rejected": -290.7877502441406, "loss": 0.0558, "rewards/accuracies": 1.0, "rewards/chosen": 0.3152260482311249, "rewards/margins": 2.8667781352996826, "rewards/rejected": -2.5515522956848145, "step": 20330 }, { "epoch": 0.81, "learning_rate": 5.107875943903614e-07, "logits/chosen": -3.0186150074005127, "logits/rejected": -3.048957586288452, "logps/chosen": -7.543633460998535, "logps/rejected": -281.05535888671875, "loss": 0.1296, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2432563751935959, "rewards/margins": 2.69765043258667, "rewards/rejected": -2.4543943405151367, "step": 20340 }, { "epoch": 0.81, "learning_rate": 5.086752049395094e-07, "logits/chosen": -3.0137176513671875, "logits/rejected": -3.041348934173584, "logps/chosen": -7.4382805824279785, "logps/rejected": -283.9596252441406, "loss": 0.1271, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24315595626831055, "rewards/margins": 2.729368209838867, "rewards/rejected": -2.4862122535705566, "step": 20350 }, { "epoch": 0.81, "learning_rate": 5.065666976782412e-07, "logits/chosen": -3.023681163787842, "logits/rejected": -3.0527231693267822, "logps/chosen": -0.1724010407924652, "logps/rejected": -292.8829040527344, "loss": 0.0544, "rewards/accuracies": 1.0, "rewards/chosen": 0.3141844868659973, "rewards/margins": 2.8886213302612305, "rewards/rejected": -2.574436664581299, "step": 20360 }, { "epoch": 0.81, "learning_rate": 5.044620767171993e-07, "logits/chosen": -3.0223240852355957, "logits/rejected": -3.0527801513671875, "logps/chosen": -3.120891571044922, "logps/rejected": -287.0079650878906, "loss": 0.0844, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2827572822570801, "rewards/margins": 2.8036794662475586, "rewards/rejected": -2.5209224224090576, "step": 20370 }, { "epoch": 0.82, "learning_rate": 5.023613461594512e-07, "logits/chosen": -3.0265629291534424, "logits/rejected": -3.057248115539551, "logps/chosen": -0.17381809651851654, "logps/rejected": -291.55462646484375, "loss": 0.0551, "rewards/accuracies": 1.0, "rewards/chosen": 0.3152366578578949, "rewards/margins": 2.87650465965271, "rewards/rejected": -2.561267852783203, "step": 20380 }, { "epoch": 0.82, "learning_rate": 5.002645101004766e-07, "logits/chosen": -3.006131649017334, "logits/rejected": -3.03682804107666, "logps/chosen": -0.4144471287727356, "logps/rejected": -291.099365234375, "loss": 0.0562, "rewards/accuracies": 1.0, "rewards/chosen": 0.3105317950248718, "rewards/margins": 2.8654961585998535, "rewards/rejected": -2.554964303970337, "step": 20390 }, { "epoch": 0.82, "learning_rate": 4.981715726281666e-07, "logits/chosen": -3.0134143829345703, "logits/rejected": -3.045480489730835, "logps/chosen": -3.6893444061279297, "logps/rejected": -288.8420715332031, "loss": 0.0899, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28178101778030396, "rewards/margins": 2.812875509262085, "rewards/rejected": -2.5310940742492676, "step": 20400 }, { "epoch": 0.82, "eval_logits/chosen": -3.069953203201294, "eval_logits/rejected": -3.0963666439056396, "eval_logps/chosen": -0.19005133211612701, "eval_logps/rejected": -284.48956298828125, "eval_loss": 0.060257066041231155, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3146990239620209, "eval_rewards/margins": 2.799438953399658, "eval_rewards/rejected": -2.4847397804260254, "eval_runtime": 2.5356, "eval_samples_per_second": 1.972, "eval_steps_per_second": 0.394, "step": 20400 }, { "epoch": 0.82, "learning_rate": 4.960825378228082e-07, "logits/chosen": -2.9894676208496094, "logits/rejected": -3.0200295448303223, "logps/chosen": -2.820568323135376, "logps/rejected": -285.35040283203125, "loss": 0.0839, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2875911593437195, "rewards/margins": 2.790719509124756, "rewards/rejected": -2.5031285285949707, "step": 20410 }, { "epoch": 0.82, "learning_rate": 4.939974097570841e-07, "logits/chosen": -3.016232490539551, "logits/rejected": -3.0467336177825928, "logps/chosen": -3.200312852859497, "logps/rejected": -286.37445068359375, "loss": 0.0844, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2854054272174835, "rewards/margins": 2.793417453765869, "rewards/rejected": -2.508012294769287, "step": 20420 }, { "epoch": 0.82, "learning_rate": 4.919161924960561e-07, "logits/chosen": -3.0412089824676514, "logits/rejected": -3.068444013595581, "logps/chosen": -5.800314903259277, "logps/rejected": -282.8283996582031, "loss": 0.1117, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2592109441757202, "rewards/margins": 2.7320640087127686, "rewards/rejected": -2.472853183746338, "step": 20430 }, { "epoch": 0.82, "learning_rate": 4.898388900971635e-07, "logits/chosen": -3.0032565593719482, "logits/rejected": -3.0325498580932617, "logps/chosen": -10.336950302124023, "logps/rejected": -280.2763977050781, "loss": 0.1555, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.21108070015907288, "rewards/margins": 2.6663689613342285, "rewards/rejected": -2.4552886486053467, "step": 20440 }, { "epoch": 0.82, "learning_rate": 4.87765506610215e-07, "logits/chosen": -3.0169677734375, "logits/rejected": -3.0460727214813232, "logps/chosen": -0.9547429084777832, "logps/rejected": -289.529541015625, "loss": 0.0579, "rewards/accuracies": 1.0, "rewards/chosen": 0.3096189498901367, "rewards/margins": 2.8452911376953125, "rewards/rejected": -2.535672426223755, "step": 20450 }, { "epoch": 0.82, "learning_rate": 4.856960460773766e-07, "logits/chosen": -3.0154805183410645, "logits/rejected": -3.0461387634277344, "logps/chosen": -2.1037354469299316, "logps/rejected": -289.2232971191406, "loss": 0.0726, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2972291111946106, "rewards/margins": 2.833766222000122, "rewards/rejected": -2.536536931991577, "step": 20460 }, { "epoch": 0.82, "learning_rate": 4.836305125331695e-07, "logits/chosen": -3.0006518363952637, "logits/rejected": -3.029578685760498, "logps/chosen": -3.826094388961792, "logps/rejected": -286.6303405761719, "loss": 0.0926, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2782774567604065, "rewards/margins": 2.7906079292297363, "rewards/rejected": -2.5123302936553955, "step": 20470 }, { "epoch": 0.82, "learning_rate": 4.815689100044541e-07, "logits/chosen": -3.0109775066375732, "logits/rejected": -3.0382676124572754, "logps/chosen": -2.92647385597229, "logps/rejected": -288.093505859375, "loss": 0.0819, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28949135541915894, "rewards/margins": 2.812852144241333, "rewards/rejected": -2.5233607292175293, "step": 20480 }, { "epoch": 0.82, "learning_rate": 4.795112425104323e-07, "logits/chosen": -2.9922235012054443, "logits/rejected": -3.024092435836792, "logps/chosen": -0.44903650879859924, "logps/rejected": -290.4204406738281, "loss": 0.0561, "rewards/accuracies": 1.0, "rewards/chosen": 0.3106149137020111, "rewards/margins": 2.8641531467437744, "rewards/rejected": -2.5535383224487305, "step": 20490 }, { "epoch": 0.82, "learning_rate": 4.774575140626317e-07, "logits/chosen": -3.005059242248535, "logits/rejected": -3.0334811210632324, "logps/chosen": -4.194851398468018, "logps/rejected": -287.1059875488281, "loss": 0.0849, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27642983198165894, "rewards/margins": 2.7921812534332275, "rewards/rejected": -2.515751361846924, "step": 20500 }, { "epoch": 0.82, "eval_logits/chosen": -3.070075273513794, "eval_logits/rejected": -3.0959055423736572, "eval_logps/chosen": -0.15471713244915009, "eval_logps/rejected": -284.25421142578125, "eval_loss": 0.06037665531039238, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3150523602962494, "eval_rewards/margins": 2.797438859939575, "eval_rewards/rejected": -2.482386350631714, "eval_runtime": 2.5351, "eval_samples_per_second": 1.972, "eval_steps_per_second": 0.394, "step": 20500 }, { "epoch": 0.82, "learning_rate": 4.754077286649006e-07, "logits/chosen": -3.0112385749816895, "logits/rejected": -3.0400805473327637, "logps/chosen": -10.212185859680176, "logps/rejected": -280.0523986816406, "loss": 0.1555, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.2137465476989746, "rewards/margins": 2.662708282470703, "rewards/rejected": -2.4489619731903076, "step": 20510 }, { "epoch": 0.82, "learning_rate": 4.7336189031340047e-07, "logits/chosen": -3.029726505279541, "logits/rejected": -3.0604958534240723, "logps/chosen": -3.79339861869812, "logps/rejected": -288.7725830078125, "loss": 0.0904, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2781820595264435, "rewards/margins": 2.8119213581085205, "rewards/rejected": -2.5337395668029785, "step": 20520 }, { "epoch": 0.82, "learning_rate": 4.713200029965978e-07, "logits/chosen": -2.9878134727478027, "logits/rejected": -3.0189015865325928, "logps/chosen": -6.252182960510254, "logps/rejected": -285.20074462890625, "loss": 0.113, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.25289803743362427, "rewards/margins": 2.7519359588623047, "rewards/rejected": -2.499037981033325, "step": 20530 }, { "epoch": 0.82, "learning_rate": 4.6928207069525695e-07, "logits/chosen": -3.0321450233459473, "logits/rejected": -3.059687376022339, "logps/chosen": -0.200919508934021, "logps/rejected": -292.3267517089844, "loss": 0.0546, "rewards/accuracies": 1.0, "rewards/chosen": 0.31694477796554565, "rewards/margins": 2.885850191116333, "rewards/rejected": -2.5689053535461426, "step": 20540 }, { "epoch": 0.82, "learning_rate": 4.672480973824312e-07, "logits/chosen": -3.01649808883667, "logits/rejected": -3.04693865776062, "logps/chosen": -0.19509461522102356, "logps/rejected": -291.6571044921875, "loss": 0.0551, "rewards/accuracies": 1.0, "rewards/chosen": 0.3145148456096649, "rewards/margins": 2.876394748687744, "rewards/rejected": -2.5618796348571777, "step": 20550 }, { "epoch": 0.82, "learning_rate": 4.6521808702345516e-07, "logits/chosen": -3.0210459232330322, "logits/rejected": -3.046673536300659, "logps/chosen": -5.645872116088867, "logps/rejected": -282.37469482421875, "loss": 0.1107, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2601853013038635, "rewards/margins": 2.731065511703491, "rewards/rejected": -2.4708800315856934, "step": 20560 }, { "epoch": 0.82, "learning_rate": 4.6319204357593794e-07, "logits/chosen": -3.0166592597961426, "logits/rejected": -3.0448975563049316, "logps/chosen": -5.935582637786865, "logps/rejected": -283.7242431640625, "loss": 0.1128, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.25723689794540405, "rewards/margins": 2.737361431121826, "rewards/rejected": -2.4801242351531982, "step": 20570 }, { "epoch": 0.82, "learning_rate": 4.6116997098975465e-07, "logits/chosen": -3.0176141262054443, "logits/rejected": -3.0466129779815674, "logps/chosen": -0.3512941002845764, "logps/rejected": -288.94561767578125, "loss": 0.0578, "rewards/accuracies": 1.0, "rewards/chosen": 0.31145086884498596, "rewards/margins": 2.8507418632507324, "rewards/rejected": -2.5392909049987793, "step": 20580 }, { "epoch": 0.82, "learning_rate": 4.591518732070402e-07, "logits/chosen": -3.030362844467163, "logits/rejected": -3.057173252105713, "logps/chosen": -3.8584914207458496, "logps/rejected": -282.40179443359375, "loss": 0.0941, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.279495507478714, "rewards/margins": 2.751569986343384, "rewards/rejected": -2.472074508666992, "step": 20590 }, { "epoch": 0.82, "learning_rate": 4.5713775416217884e-07, "logits/chosen": -3.0269370079040527, "logits/rejected": -3.0558371543884277, "logps/chosen": -3.972724437713623, "logps/rejected": -284.6208801269531, "loss": 0.092, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.27807432413101196, "rewards/margins": 2.7669901847839355, "rewards/rejected": -2.4889156818389893, "step": 20600 }, { "epoch": 0.82, "eval_logits/chosen": -3.0713114738464355, "eval_logits/rejected": -3.0964365005493164, "eval_logps/chosen": -0.20961475372314453, "eval_logps/rejected": -284.53411865234375, "eval_loss": 0.06025111675262451, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31450337171554565, "eval_rewards/margins": 2.7996888160705566, "eval_rewards/rejected": -2.485185146331787, "eval_runtime": 2.5434, "eval_samples_per_second": 1.966, "eval_steps_per_second": 0.393, "step": 20600 }, { "epoch": 0.82, "learning_rate": 4.55127617781799e-07, "logits/chosen": -3.0271921157836914, "logits/rejected": -3.054384469985962, "logps/chosen": -7.70320987701416, "logps/rejected": -279.3706970214844, "loss": 0.127, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.24239739775657654, "rewards/margins": 2.6777267456054688, "rewards/rejected": -2.4353294372558594, "step": 20610 }, { "epoch": 0.82, "learning_rate": 4.53121467984764e-07, "logits/chosen": -3.009349822998047, "logits/rejected": -3.038490056991577, "logps/chosen": -0.2714509069919586, "logps/rejected": -291.2830505371094, "loss": 0.0555, "rewards/accuracies": 1.0, "rewards/chosen": 0.31307077407836914, "rewards/margins": 2.8723297119140625, "rewards/rejected": -2.5592586994171143, "step": 20620 }, { "epoch": 0.83, "learning_rate": 4.5111930868216525e-07, "logits/chosen": -3.037508726119995, "logits/rejected": -3.0647902488708496, "logps/chosen": -0.19706803560256958, "logps/rejected": -291.48565673828125, "loss": 0.0557, "rewards/accuracies": 1.0, "rewards/chosen": 0.31761032342910767, "rewards/margins": 2.867745876312256, "rewards/rejected": -2.5501351356506348, "step": 20630 }, { "epoch": 0.83, "learning_rate": 4.491211437773166e-07, "logits/chosen": -2.999473810195923, "logits/rejected": -3.0321357250213623, "logps/chosen": -0.20713195204734802, "logps/rejected": -292.2440185546875, "loss": 0.0549, "rewards/accuracies": 1.0, "rewards/chosen": 0.3109907805919647, "rewards/margins": 2.8805770874023438, "rewards/rejected": -2.5695860385894775, "step": 20640 }, { "epoch": 0.83, "learning_rate": 4.4712697716573994e-07, "logits/chosen": -3.0164687633514404, "logits/rejected": -3.044679641723633, "logps/chosen": -8.776045799255371, "logps/rejected": -283.14056396484375, "loss": 0.1353, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.22948899865150452, "rewards/margins": 2.70810604095459, "rewards/rejected": -2.478616952896118, "step": 20650 }, { "epoch": 0.83, "learning_rate": 4.451368127351674e-07, "logits/chosen": -3.002141237258911, "logits/rejected": -3.034240245819092, "logps/chosen": -0.22051003575325012, "logps/rejected": -291.687255859375, "loss": 0.0554, "rewards/accuracies": 1.0, "rewards/chosen": 0.3143063187599182, "rewards/margins": 2.871943712234497, "rewards/rejected": -2.5576374530792236, "step": 20660 }, { "epoch": 0.83, "learning_rate": 4.431506543655251e-07, "logits/chosen": -3.036818265914917, "logits/rejected": -3.065251111984253, "logps/chosen": -0.7124171257019043, "logps/rejected": -288.29071044921875, "loss": 0.0585, "rewards/accuracies": 1.0, "rewards/chosen": 0.3102453649044037, "rewards/margins": 2.8369836807250977, "rewards/rejected": -2.526738405227661, "step": 20670 }, { "epoch": 0.83, "learning_rate": 4.411685059289314e-07, "logits/chosen": -3.009953022003174, "logits/rejected": -3.0403575897216797, "logps/chosen": -6.056807994842529, "logps/rejected": -285.79876708984375, "loss": 0.11, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2549263536930084, "rewards/margins": 2.760908365249634, "rewards/rejected": -2.505981922149658, "step": 20680 }, { "epoch": 0.83, "learning_rate": 4.391903712896861e-07, "logits/chosen": -3.0330698490142822, "logits/rejected": -3.0625946521759033, "logps/chosen": -0.18550436198711395, "logps/rejected": -291.9772033691406, "loss": 0.0548, "rewards/accuracies": 1.0, "rewards/chosen": 0.3140457272529602, "rewards/margins": 2.8824048042297363, "rewards/rejected": -2.568358898162842, "step": 20690 }, { "epoch": 0.83, "learning_rate": 4.372162543042624e-07, "logits/chosen": -3.0046255588531494, "logits/rejected": -3.0344624519348145, "logps/chosen": -3.321650981903076, "logps/rejected": -286.11322021484375, "loss": 0.0871, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28316518664360046, "rewards/margins": 2.789978504180908, "rewards/rejected": -2.5068135261535645, "step": 20700 }, { "epoch": 0.83, "eval_logits/chosen": -3.069755792617798, "eval_logits/rejected": -3.095733404159546, "eval_logps/chosen": -0.15080204606056213, "eval_logps/rejected": -284.30029296875, "eval_loss": 0.06035115569829941, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31509149074554443, "eval_rewards/margins": 2.797938823699951, "eval_rewards/rejected": -2.482847213745117, "eval_runtime": 2.5415, "eval_samples_per_second": 1.967, "eval_steps_per_second": 0.393, "step": 20700 }, { "epoch": 0.83, "learning_rate": 4.352461588213036e-07, "logits/chosen": -3.0116159915924072, "logits/rejected": -3.0437674522399902, "logps/chosen": -0.17735609412193298, "logps/rejected": -291.80194091796875, "loss": 0.0547, "rewards/accuracies": 1.0, "rewards/chosen": 0.3175176680088043, "rewards/margins": 2.882718563079834, "rewards/rejected": -2.5652008056640625, "step": 20710 }, { "epoch": 0.83, "learning_rate": 4.332800886816113e-07, "logits/chosen": -3.0110397338867188, "logits/rejected": -3.040595293045044, "logps/chosen": -0.1901349574327469, "logps/rejected": -292.6959533691406, "loss": 0.0544, "rewards/accuracies": 1.0, "rewards/chosen": 0.3156692087650299, "rewards/margins": 2.8883373737335205, "rewards/rejected": -2.5726678371429443, "step": 20720 }, { "epoch": 0.83, "learning_rate": 4.3131804771814083e-07, "logits/chosen": -3.034531354904175, "logits/rejected": -3.0640676021575928, "logps/chosen": -0.19039860367774963, "logps/rejected": -291.2790832519531, "loss": 0.0555, "rewards/accuracies": 1.0, "rewards/chosen": 0.31701168417930603, "rewards/margins": 2.871803045272827, "rewards/rejected": -2.55479097366333, "step": 20730 }, { "epoch": 0.83, "learning_rate": 4.293600397559897e-07, "logits/chosen": -3.023179531097412, "logits/rejected": -3.0511107444763184, "logps/chosen": -1.0506669282913208, "logps/rejected": -291.28533935546875, "loss": 0.0561, "rewards/accuracies": 1.0, "rewards/chosen": 0.30685344338417053, "rewards/margins": 2.8673417568206787, "rewards/rejected": -2.56048846244812, "step": 20740 }, { "epoch": 0.83, "learning_rate": 4.27406068612396e-07, "logits/chosen": -3.0391669273376465, "logits/rejected": -3.065119981765747, "logps/chosen": -3.169898509979248, "logps/rejected": -286.95306396484375, "loss": 0.0861, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2856668531894684, "rewards/margins": 2.7984890937805176, "rewards/rejected": -2.512821912765503, "step": 20750 }, { "epoch": 0.83, "learning_rate": 4.2545613809672594e-07, "logits/chosen": -2.9892146587371826, "logits/rejected": -3.016603946685791, "logps/chosen": -8.784722328186035, "logps/rejected": -278.89996337890625, "loss": 0.1401, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.23010043799877167, "rewards/margins": 2.6675150394439697, "rewards/rejected": -2.4374146461486816, "step": 20760 }, { "epoch": 0.83, "learning_rate": 4.235102520104681e-07, "logits/chosen": -3.003624439239502, "logits/rejected": -3.0331244468688965, "logps/chosen": -3.1925292015075684, "logps/rejected": -284.42742919921875, "loss": 0.087, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.283576101064682, "rewards/margins": 2.775111675262451, "rewards/rejected": -2.491535186767578, "step": 20770 }, { "epoch": 0.83, "learning_rate": 4.2156841414722925e-07, "logits/chosen": -3.0341784954071045, "logits/rejected": -3.0621249675750732, "logps/chosen": -6.829137325286865, "logps/rejected": -282.2137756347656, "loss": 0.1224, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2503601610660553, "rewards/margins": 2.7164063453674316, "rewards/rejected": -2.4660468101501465, "step": 20780 }, { "epoch": 0.83, "learning_rate": 4.196306282927187e-07, "logits/chosen": -3.016141414642334, "logits/rejected": -3.045239210128784, "logps/chosen": -0.52727210521698, "logps/rejected": -287.98907470703125, "loss": 0.0583, "rewards/accuracies": 1.0, "rewards/chosen": 0.31230711936950684, "rewards/margins": 2.83697772026062, "rewards/rejected": -2.5246706008911133, "step": 20790 }, { "epoch": 0.83, "learning_rate": 4.1769689822475147e-07, "logits/chosen": -3.036452531814575, "logits/rejected": -3.066863775253296, "logps/chosen": -3.830984592437744, "logps/rejected": -287.7242736816406, "loss": 0.0915, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27952349185943604, "rewards/margins": 2.8034708499908447, "rewards/rejected": -2.523947238922119, "step": 20800 }, { "epoch": 0.83, "eval_logits/chosen": -3.0707383155822754, "eval_logits/rejected": -3.0960373878479004, "eval_logps/chosen": -0.17601943016052246, "eval_logps/rejected": -284.400390625, "eval_loss": 0.060304127633571625, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31483933329582214, "eval_rewards/margins": 2.798687696456909, "eval_rewards/rejected": -2.4838483333587646, "eval_runtime": 2.5496, "eval_samples_per_second": 1.961, "eval_steps_per_second": 0.392, "step": 20800 }, { "epoch": 0.83, "learning_rate": 4.1576722771323215e-07, "logits/chosen": -3.036580801010132, "logits/rejected": -3.0670268535614014, "logps/chosen": -3.8826904296875, "logps/rejected": -287.0108337402344, "loss": 0.0919, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27826374769210815, "rewards/margins": 2.7965691089630127, "rewards/rejected": -2.5183053016662598, "step": 20810 }, { "epoch": 0.83, "learning_rate": 4.1384162052015256e-07, "logits/chosen": -3.0139365196228027, "logits/rejected": -3.0425193309783936, "logps/chosen": -7.002201080322266, "logps/rejected": -284.11553955078125, "loss": 0.1229, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2486923485994339, "rewards/margins": 2.7350716590881348, "rewards/rejected": -2.486379384994507, "step": 20820 }, { "epoch": 0.83, "learning_rate": 4.1192008039958236e-07, "logits/chosen": -3.0210540294647217, "logits/rejected": -3.0514016151428223, "logps/chosen": -0.17144832015037537, "logps/rejected": -292.29290771484375, "loss": 0.0547, "rewards/accuracies": 1.0, "rewards/chosen": 0.31375226378440857, "rewards/margins": 2.881943941116333, "rewards/rejected": -2.5681920051574707, "step": 20830 }, { "epoch": 0.83, "learning_rate": 4.1000261109766156e-07, "logits/chosen": -2.9994752407073975, "logits/rejected": -3.030503034591675, "logps/chosen": -0.21149368584156036, "logps/rejected": -293.3470153808594, "loss": 0.0541, "rewards/accuracies": 1.0, "rewards/chosen": 0.3144281506538391, "rewards/margins": 2.89268159866333, "rewards/rejected": -2.5782535076141357, "step": 20840 }, { "epoch": 0.83, "learning_rate": 4.0808921635259595e-07, "logits/chosen": -3.0134119987487793, "logits/rejected": -3.0403168201446533, "logps/chosen": -9.890581130981445, "logps/rejected": -274.76934814453125, "loss": 0.1551, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.21781602501869202, "rewards/margins": 2.6124541759490967, "rewards/rejected": -2.3946382999420166, "step": 20850 }, { "epoch": 0.83, "learning_rate": 4.061798998946459e-07, "logits/chosen": -3.0310134887695312, "logits/rejected": -3.058074712753296, "logps/chosen": -7.163272857666016, "logps/rejected": -281.5877380371094, "loss": 0.1263, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2420860081911087, "rewards/margins": 2.710768938064575, "rewards/rejected": -2.4686827659606934, "step": 20860 }, { "epoch": 0.83, "learning_rate": 4.042746654461216e-07, "logits/chosen": -3.013131856918335, "logits/rejected": -3.0422801971435547, "logps/chosen": -7.5614776611328125, "logps/rejected": -283.3655700683594, "loss": 0.1284, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24047835171222687, "rewards/margins": 2.7206127643585205, "rewards/rejected": -2.4801344871520996, "step": 20870 }, { "epoch": 0.84, "learning_rate": 4.023735167213752e-07, "logits/chosen": -3.015557289123535, "logits/rejected": -3.0434305667877197, "logps/chosen": -5.237832069396973, "logps/rejected": -283.4172058105469, "loss": 0.1042, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2629074156284332, "rewards/margins": 2.748445510864258, "rewards/rejected": -2.4855380058288574, "step": 20880 }, { "epoch": 0.84, "learning_rate": 4.0047645742679275e-07, "logits/chosen": -3.0055575370788574, "logits/rejected": -3.0358006954193115, "logps/chosen": -4.222500324249268, "logps/rejected": -284.39910888671875, "loss": 0.0918, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27258163690567017, "rewards/margins": 2.7650887966156006, "rewards/rejected": -2.492507219314575, "step": 20890 }, { "epoch": 0.84, "learning_rate": 3.9858349126078945e-07, "logits/chosen": -3.0068740844726562, "logits/rejected": -3.0364887714385986, "logps/chosen": -0.2788704037666321, "logps/rejected": -289.4126281738281, "loss": 0.0566, "rewards/accuracies": 1.0, "rewards/chosen": 0.3155846893787384, "rewards/margins": 2.856574535369873, "rewards/rejected": -2.540990114212036, "step": 20900 }, { "epoch": 0.84, "eval_logits/chosen": -3.068997621536255, "eval_logits/rejected": -3.095078229904175, "eval_logps/chosen": -0.16116522252559662, "eval_logps/rejected": -284.46063232421875, "eval_loss": 0.060241419821977615, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3149878680706024, "eval_rewards/margins": 2.799438238143921, "eval_rewards/rejected": -2.484450578689575, "eval_runtime": 2.5391, "eval_samples_per_second": 1.969, "eval_steps_per_second": 0.394, "step": 20900 }, { "epoch": 0.84, "learning_rate": 3.9669462191379933e-07, "logits/chosen": -3.0279788970947266, "logits/rejected": -3.055497407913208, "logps/chosen": -0.3394574820995331, "logps/rejected": -287.8210144042969, "loss": 0.0581, "rewards/accuracies": 1.0, "rewards/chosen": 0.3134227991104126, "rewards/margins": 2.835052013397217, "rewards/rejected": -2.5216288566589355, "step": 20910 }, { "epoch": 0.84, "learning_rate": 3.948098530682695e-07, "logits/chosen": -3.0219063758850098, "logits/rejected": -3.049959659576416, "logps/chosen": -10.161858558654785, "logps/rejected": -279.45452880859375, "loss": 0.1535, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.2146388590335846, "rewards/margins": 2.6577258110046387, "rewards/rejected": -2.443086862564087, "step": 20920 }, { "epoch": 0.84, "learning_rate": 3.9292918839865355e-07, "logits/chosen": -2.985581159591675, "logits/rejected": -3.0159449577331543, "logps/chosen": -3.908496141433716, "logps/rejected": -286.82281494140625, "loss": 0.0923, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27718400955200195, "rewards/margins": 2.7910923957824707, "rewards/rejected": -2.5139083862304688, "step": 20930 }, { "epoch": 0.84, "learning_rate": 3.9105263157140304e-07, "logits/chosen": -3.0033183097839355, "logits/rejected": -3.036918878555298, "logps/chosen": -0.16855594515800476, "logps/rejected": -288.57122802734375, "loss": 0.057, "rewards/accuracies": 1.0, "rewards/chosen": 0.31378594040870667, "rewards/margins": 2.847313404083252, "rewards/rejected": -2.5335278511047363, "step": 20940 }, { "epoch": 0.84, "learning_rate": 3.891801862449629e-07, "logits/chosen": -3.0071465969085693, "logits/rejected": -3.0368690490722656, "logps/chosen": -0.18478640913963318, "logps/rejected": -293.26385498046875, "loss": 0.0541, "rewards/accuracies": 1.0, "rewards/chosen": 0.3163655698299408, "rewards/margins": 2.893465995788574, "rewards/rejected": -2.5771002769470215, "step": 20950 }, { "epoch": 0.84, "learning_rate": 3.873118560697592e-07, "logits/chosen": -3.022723913192749, "logits/rejected": -3.0520882606506348, "logps/chosen": -6.97763204574585, "logps/rejected": -285.9897155761719, "loss": 0.1213, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24766263365745544, "rewards/margins": 2.757460594177246, "rewards/rejected": -2.5097978115081787, "step": 20960 }, { "epoch": 0.84, "learning_rate": 3.854476446881986e-07, "logits/chosen": -3.044206380844116, "logits/rejected": -3.0700628757476807, "logps/chosen": -2.9073426723480225, "logps/rejected": -288.12213134765625, "loss": 0.0679, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.29006606340408325, "rewards/margins": 2.8150696754455566, "rewards/rejected": -2.525003433227539, "step": 20970 }, { "epoch": 0.84, "learning_rate": 3.835875557346552e-07, "logits/chosen": -2.9919614791870117, "logits/rejected": -3.0213332176208496, "logps/chosen": -2.5003812313079834, "logps/rejected": -287.97479248046875, "loss": 0.0766, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.29131612181663513, "rewards/margins": 2.8157615661621094, "rewards/rejected": -2.5244457721710205, "step": 20980 }, { "epoch": 0.84, "learning_rate": 3.817315928354695e-07, "logits/chosen": -3.0130982398986816, "logits/rejected": -3.04398775100708, "logps/chosen": -0.19304002821445465, "logps/rejected": -291.7034606933594, "loss": 0.055, "rewards/accuracies": 1.0, "rewards/chosen": 0.312950074672699, "rewards/margins": 2.878188371658325, "rewards/rejected": -2.5652384757995605, "step": 20990 }, { "epoch": 0.84, "learning_rate": 3.798797596089351e-07, "logits/chosen": -3.043456554412842, "logits/rejected": -3.072129011154175, "logps/chosen": -0.3068476617336273, "logps/rejected": -292.0617980957031, "loss": 0.0552, "rewards/accuracies": 1.0, "rewards/chosen": 0.3148597776889801, "rewards/margins": 2.8781447410583496, "rewards/rejected": -2.5632848739624023, "step": 21000 }, { "epoch": 0.84, "eval_logits/chosen": -3.071096658706665, "eval_logits/rejected": -3.0962817668914795, "eval_logps/chosen": -0.20621223747730255, "eval_logps/rejected": -284.5057067871094, "eval_loss": 0.06025779992341995, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31453737616539, "eval_rewards/margins": 2.799438953399658, "eval_rewards/rejected": -2.4849014282226562, "eval_runtime": 2.5361, "eval_samples_per_second": 1.972, "eval_steps_per_second": 0.394, "step": 21000 }, { "epoch": 0.84, "learning_rate": 3.780320596652956e-07, "logits/chosen": -3.020709991455078, "logits/rejected": -3.047283411026001, "logps/chosen": -7.2970452308654785, "logps/rejected": -283.20672607421875, "loss": 0.1257, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24414865672588348, "rewards/margins": 2.7214646339416504, "rewards/rejected": -2.477315902709961, "step": 21010 }, { "epoch": 0.84, "learning_rate": 3.7618849660673693e-07, "logits/chosen": -3.0086586475372314, "logits/rejected": -3.0374085903167725, "logps/chosen": -2.002948760986328, "logps/rejected": -287.8826599121094, "loss": 0.0731, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.29774290323257446, "rewards/margins": 2.821223497390747, "rewards/rejected": -2.5234806537628174, "step": 21020 }, { "epoch": 0.84, "learning_rate": 3.7434907402737837e-07, "logits/chosen": -3.0447909832000732, "logits/rejected": -3.073247194290161, "logps/chosen": -3.995145082473755, "logps/rejected": -285.26751708984375, "loss": 0.095, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27445322275161743, "rewards/margins": 2.778780460357666, "rewards/rejected": -2.5043270587921143, "step": 21030 }, { "epoch": 0.84, "learning_rate": 3.725137955132707e-07, "logits/chosen": -3.023117780685425, "logits/rejected": -3.0528557300567627, "logps/chosen": -3.4494800567626953, "logps/rejected": -288.2818298339844, "loss": 0.0875, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28300291299819946, "rewards/margins": 2.81036376953125, "rewards/rejected": -2.527360677719116, "step": 21040 }, { "epoch": 0.84, "learning_rate": 3.7068266464238085e-07, "logits/chosen": -3.0020499229431152, "logits/rejected": -3.0324673652648926, "logps/chosen": -0.18673570454120636, "logps/rejected": -289.7674560546875, "loss": 0.0563, "rewards/accuracies": 1.0, "rewards/chosen": 0.31495821475982666, "rewards/margins": 2.859011650085449, "rewards/rejected": -2.544053316116333, "step": 21050 }, { "epoch": 0.84, "learning_rate": 3.6885568498459395e-07, "logits/chosen": -2.9962024688720703, "logits/rejected": -3.0291829109191895, "logps/chosen": -5.65508508682251, "logps/rejected": -285.52886962890625, "loss": 0.1083, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2615153193473816, "rewards/margins": 2.759006977081299, "rewards/rejected": -2.4974913597106934, "step": 21060 }, { "epoch": 0.84, "learning_rate": 3.670328601016995e-07, "logits/chosen": -3.003523111343384, "logits/rejected": -3.0341734886169434, "logps/chosen": -0.38898083567619324, "logps/rejected": -291.4787902832031, "loss": 0.0555, "rewards/accuracies": 1.0, "rewards/chosen": 0.3129435181617737, "rewards/margins": 2.872987747192383, "rewards/rejected": -2.560044288635254, "step": 21070 }, { "epoch": 0.84, "learning_rate": 3.652141935473874e-07, "logits/chosen": -3.026808261871338, "logits/rejected": -3.054168701171875, "logps/chosen": -7.461012840270996, "logps/rejected": -285.7901916503906, "loss": 0.1263, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24432606995105743, "rewards/margins": 2.7482452392578125, "rewards/rejected": -2.5039193630218506, "step": 21080 }, { "epoch": 0.84, "learning_rate": 3.633996888672428e-07, "logits/chosen": -3.0476112365722656, "logits/rejected": -3.074765682220459, "logps/chosen": -0.19783280789852142, "logps/rejected": -290.2942810058594, "loss": 0.0563, "rewards/accuracies": 1.0, "rewards/chosen": 0.31686919927597046, "rewards/margins": 2.858469009399414, "rewards/rejected": -2.541599988937378, "step": 21090 }, { "epoch": 0.84, "learning_rate": 3.615893495987335e-07, "logits/chosen": -3.01597261428833, "logits/rejected": -3.0438027381896973, "logps/chosen": -10.94372272491455, "logps/rejected": -276.9010314941406, "loss": 0.164, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.20689666271209717, "rewards/margins": 2.624314785003662, "rewards/rejected": -2.4174180030822754, "step": 21100 }, { "epoch": 0.84, "eval_logits/chosen": -3.070467233657837, "eval_logits/rejected": -3.096076250076294, "eval_logps/chosen": -0.16613224148750305, "eval_logps/rejected": -284.41558837890625, "eval_loss": 0.060282934457063675, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3149382174015045, "eval_rewards/margins": 2.798938274383545, "eval_rewards/rejected": -2.4839999675750732, "eval_runtime": 2.5389, "eval_samples_per_second": 1.969, "eval_steps_per_second": 0.394, "step": 21100 }, { "epoch": 0.84, "learning_rate": 3.5978317927120973e-07, "logits/chosen": -3.0182223320007324, "logits/rejected": -3.0473172664642334, "logps/chosen": -0.30995044112205505, "logps/rejected": -288.6157531738281, "loss": 0.0591, "rewards/accuracies": 1.0, "rewards/chosen": 0.3141062557697296, "rewards/margins": 2.8446154594421387, "rewards/rejected": -2.5305094718933105, "step": 21110 }, { "epoch": 0.84, "learning_rate": 3.579811814058928e-07, "logits/chosen": -3.019412040710449, "logits/rejected": -3.0493946075439453, "logps/chosen": -0.17765077948570251, "logps/rejected": -292.0193786621094, "loss": 0.0549, "rewards/accuracies": 1.0, "rewards/chosen": 0.31559523940086365, "rewards/margins": 2.8802342414855957, "rewards/rejected": -2.564638376235962, "step": 21120 }, { "epoch": 0.85, "learning_rate": 3.561833595158698e-07, "logits/chosen": -3.026650905609131, "logits/rejected": -3.0549373626708984, "logps/chosen": -0.32977235317230225, "logps/rejected": -287.21392822265625, "loss": 0.0594, "rewards/accuracies": 1.0, "rewards/chosen": 0.31501588225364685, "rewards/margins": 2.8287055492401123, "rewards/rejected": -2.5136897563934326, "step": 21130 }, { "epoch": 0.85, "learning_rate": 3.543897171060862e-07, "logits/chosen": -3.0260977745056152, "logits/rejected": -3.057191848754883, "logps/chosen": -0.25444597005844116, "logps/rejected": -289.58868408203125, "loss": 0.0569, "rewards/accuracies": 1.0, "rewards/chosen": 0.3151113986968994, "rewards/margins": 2.8540332317352295, "rewards/rejected": -2.53892183303833, "step": 21140 }, { "epoch": 0.85, "learning_rate": 3.5260025767333894e-07, "logits/chosen": -3.0385470390319824, "logits/rejected": -3.0661978721618652, "logps/chosen": -10.552682876586914, "logps/rejected": -278.82562255859375, "loss": 0.1598, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.20971214771270752, "rewards/margins": 2.644686698913574, "rewards/rejected": -2.4349746704101562, "step": 21150 }, { "epoch": 0.85, "learning_rate": 3.508149847062725e-07, "logits/chosen": -3.0115599632263184, "logits/rejected": -3.040239095687866, "logps/chosen": -2.606210231781006, "logps/rejected": -287.6190490722656, "loss": 0.0805, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.29061445593833923, "rewards/margins": 2.8145008087158203, "rewards/rejected": -2.5238864421844482, "step": 21160 }, { "epoch": 0.85, "learning_rate": 3.4903390168536666e-07, "logits/chosen": -3.0397045612335205, "logits/rejected": -3.0660758018493652, "logps/chosen": -0.16712966561317444, "logps/rejected": -290.47442626953125, "loss": 0.0559, "rewards/accuracies": 1.0, "rewards/chosen": 0.31612667441368103, "rewards/margins": 2.864046096801758, "rewards/rejected": -2.547919273376465, "step": 21170 }, { "epoch": 0.85, "learning_rate": 3.472570120829344e-07, "logits/chosen": -3.021071434020996, "logits/rejected": -3.048098564147949, "logps/chosen": -7.090695858001709, "logps/rejected": -283.97381591796875, "loss": 0.1246, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24561433494091034, "rewards/margins": 2.732544183731079, "rewards/rejected": -2.4869301319122314, "step": 21180 }, { "epoch": 0.85, "learning_rate": 3.4548431936311275e-07, "logits/chosen": -3.015993595123291, "logits/rejected": -3.04331636428833, "logps/chosen": -9.664144515991211, "logps/rejected": -281.2149353027344, "loss": 0.1297, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.21992573142051697, "rewards/margins": 2.6769473552703857, "rewards/rejected": -2.4570212364196777, "step": 21190 }, { "epoch": 0.85, "learning_rate": 3.4371582698185636e-07, "logits/chosen": -3.019425868988037, "logits/rejected": -3.048121929168701, "logps/chosen": -2.91518497467041, "logps/rejected": -286.93304443359375, "loss": 0.0829, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.29138365387916565, "rewards/margins": 2.798521041870117, "rewards/rejected": -2.5071377754211426, "step": 21200 }, { "epoch": 0.85, "eval_logits/chosen": -3.070699691772461, "eval_logits/rejected": -3.0961475372314453, "eval_logps/chosen": -0.20729783177375793, "eval_logps/rejected": -284.3567810058594, "eval_loss": 0.060350775718688965, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3145265281200409, "eval_rewards/margins": 2.797938823699951, "eval_rewards/rejected": -2.483412027359009, "eval_runtime": 2.5429, "eval_samples_per_second": 1.966, "eval_steps_per_second": 0.393, "step": 21200 }, { "epoch": 0.85, "learning_rate": 3.419515383869326e-07, "logits/chosen": -3.003723621368408, "logits/rejected": -3.0359294414520264, "logps/chosen": -0.1880073994398117, "logps/rejected": -291.38116455078125, "loss": 0.0554, "rewards/accuracies": 1.0, "rewards/chosen": 0.3153587281703949, "rewards/margins": 2.8717265129089355, "rewards/rejected": -2.556368350982666, "step": 21210 }, { "epoch": 0.85, "learning_rate": 3.4019145701791186e-07, "logits/chosen": -3.0261828899383545, "logits/rejected": -3.0553383827209473, "logps/chosen": -7.035775661468506, "logps/rejected": -278.94219970703125, "loss": 0.1253, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24282100796699524, "rewards/margins": 2.6778688430786133, "rewards/rejected": -2.4350481033325195, "step": 21220 }, { "epoch": 0.85, "learning_rate": 3.3843558630616306e-07, "logits/chosen": -3.023939847946167, "logits/rejected": -3.055215835571289, "logps/chosen": -0.17436029016971588, "logps/rejected": -291.88946533203125, "loss": 0.055, "rewards/accuracies": 1.0, "rewards/chosen": 0.3142440915107727, "rewards/margins": 2.878014087677002, "rewards/rejected": -2.563770055770874, "step": 21230 }, { "epoch": 0.85, "learning_rate": 3.3668392967484505e-07, "logits/chosen": -3.0018322467803955, "logits/rejected": -3.032288074493408, "logps/chosen": -0.3154297471046448, "logps/rejected": -292.8220520019531, "loss": 0.0546, "rewards/accuracies": 1.0, "rewards/chosen": 0.31512126326560974, "rewards/margins": 2.8867812156677246, "rewards/rejected": -2.571660041809082, "step": 21240 }, { "epoch": 0.85, "learning_rate": 3.3493649053890325e-07, "logits/chosen": -2.990054130554199, "logits/rejected": -3.0221266746520996, "logps/chosen": -0.22991561889648438, "logps/rejected": -290.75531005859375, "loss": 0.0557, "rewards/accuracies": 1.0, "rewards/chosen": 0.31314578652381897, "rewards/margins": 2.8686323165893555, "rewards/rejected": -2.5554864406585693, "step": 21250 }, { "epoch": 0.85, "learning_rate": 3.331932723050596e-07, "logits/chosen": -2.993426561355591, "logits/rejected": -3.022345781326294, "logps/chosen": -0.1772424876689911, "logps/rejected": -293.7877197265625, "loss": 0.0539, "rewards/accuracies": 1.0, "rewards/chosen": 0.31572458148002625, "rewards/margins": 2.895103931427002, "rewards/rejected": -2.5793793201446533, "step": 21260 }, { "epoch": 0.85, "learning_rate": 3.314542783718067e-07, "logits/chosen": -3.0491886138916016, "logits/rejected": -3.0777182579040527, "logps/chosen": -2.3560662269592285, "logps/rejected": -286.8943786621094, "loss": 0.072, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.291814386844635, "rewards/margins": 2.808103084564209, "rewards/rejected": -2.5162887573242188, "step": 21270 }, { "epoch": 0.85, "learning_rate": 3.297195121294022e-07, "logits/chosen": -3.0173821449279785, "logits/rejected": -3.044649600982666, "logps/chosen": -6.24945068359375, "logps/rejected": -284.23883056640625, "loss": 0.1153, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2551650404930115, "rewards/margins": 2.7449564933776855, "rewards/rejected": -2.4897913932800293, "step": 21280 }, { "epoch": 0.85, "learning_rate": 3.2798897695986155e-07, "logits/chosen": -3.0119426250457764, "logits/rejected": -3.041243076324463, "logps/chosen": -3.0883047580718994, "logps/rejected": -286.6925354003906, "loss": 0.0848, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.284869909286499, "rewards/margins": 2.796652317047119, "rewards/rejected": -2.51178240776062, "step": 21290 }, { "epoch": 0.85, "learning_rate": 3.262626762369525e-07, "logits/chosen": -2.994311571121216, "logits/rejected": -3.0269250869750977, "logps/chosen": -0.19118982553482056, "logps/rejected": -291.35479736328125, "loss": 0.0552, "rewards/accuracies": 1.0, "rewards/chosen": 0.31546616554260254, "rewards/margins": 2.875422716140747, "rewards/rejected": -2.5599565505981445, "step": 21300 }, { "epoch": 0.85, "eval_logits/chosen": -3.0706005096435547, "eval_logits/rejected": -3.095447301864624, "eval_logps/chosen": -0.20823800563812256, "eval_logps/rejected": -284.58270263671875, "eval_loss": 0.060209788382053375, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31451717019081116, "eval_rewards/margins": 2.8001885414123535, "eval_rewards/rejected": -2.485671281814575, "eval_runtime": 2.5316, "eval_samples_per_second": 1.975, "eval_steps_per_second": 0.395, "step": 21300 }, { "epoch": 0.85, "learning_rate": 3.245406133261858e-07, "logits/chosen": -2.9847474098205566, "logits/rejected": -3.0172057151794434, "logps/chosen": -3.1018519401550293, "logps/rejected": -287.32489013671875, "loss": 0.0847, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2856610417366028, "rewards/margins": 2.8043320178985596, "rewards/rejected": -2.5186710357666016, "step": 21310 }, { "epoch": 0.85, "learning_rate": 3.228227915848117e-07, "logits/chosen": -3.023066282272339, "logits/rejected": -3.053194761276245, "logps/chosen": -0.29642534255981445, "logps/rejected": -287.66290283203125, "loss": 0.058, "rewards/accuracies": 1.0, "rewards/chosen": 0.3103800415992737, "rewards/margins": 2.8343894481658936, "rewards/rejected": -2.5240092277526855, "step": 21320 }, { "epoch": 0.85, "learning_rate": 3.2110921436181134e-07, "logits/chosen": -3.007486581802368, "logits/rejected": -3.038667917251587, "logps/chosen": -6.33370304107666, "logps/rejected": -283.43121337890625, "loss": 0.1138, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.25413578748703003, "rewards/margins": 2.7360548973083496, "rewards/rejected": -2.481919527053833, "step": 21330 }, { "epoch": 0.85, "learning_rate": 3.1939988499789075e-07, "logits/chosen": -3.0113463401794434, "logits/rejected": -3.0413405895233154, "logps/chosen": -2.3877296447753906, "logps/rejected": -283.2085876464844, "loss": 0.0806, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2923418879508972, "rewards/margins": 2.76503849029541, "rewards/rejected": -2.4726967811584473, "step": 21340 }, { "epoch": 0.85, "learning_rate": 3.176948068254762e-07, "logits/chosen": -3.0392231941223145, "logits/rejected": -3.0668482780456543, "logps/chosen": -3.8817718029022217, "logps/rejected": -286.29681396484375, "loss": 0.093, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27875861525535583, "rewards/margins": 2.785736083984375, "rewards/rejected": -2.5069775581359863, "step": 21350 }, { "epoch": 0.85, "learning_rate": 3.159939831687034e-07, "logits/chosen": -3.010075092315674, "logits/rejected": -3.0417308807373047, "logps/chosen": -2.3475050926208496, "logps/rejected": -287.28009033203125, "loss": 0.0674, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2935992479324341, "rewards/margins": 2.8114418983459473, "rewards/rejected": -2.5178420543670654, "step": 21360 }, { "epoch": 0.85, "learning_rate": 3.14297417343416e-07, "logits/chosen": -3.04427433013916, "logits/rejected": -3.072465419769287, "logps/chosen": -0.4768088459968567, "logps/rejected": -286.92181396484375, "loss": 0.0635, "rewards/accuracies": 1.0, "rewards/chosen": 0.3135904371738434, "rewards/margins": 2.8274593353271484, "rewards/rejected": -2.513869285583496, "step": 21370 }, { "epoch": 0.86, "learning_rate": 3.126051126571561e-07, "logits/chosen": -3.030411958694458, "logits/rejected": -3.0600616931915283, "logps/chosen": -0.19185484945774078, "logps/rejected": -292.178955078125, "loss": 0.0548, "rewards/accuracies": 1.0, "rewards/chosen": 0.3146376609802246, "rewards/margins": 2.8830008506774902, "rewards/rejected": -2.5683629512786865, "step": 21380 }, { "epoch": 0.86, "learning_rate": 3.1091707240915704e-07, "logits/chosen": -3.022895336151123, "logits/rejected": -3.04878568649292, "logps/chosen": -13.926281929016113, "logps/rejected": -272.474365234375, "loss": 0.1893, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.1773061454296112, "rewards/margins": 2.5506703853607178, "rewards/rejected": -2.3733644485473633, "step": 21390 }, { "epoch": 0.86, "learning_rate": 3.092332998903416e-07, "logits/chosen": -3.0157132148742676, "logits/rejected": -3.046431303024292, "logps/chosen": -2.6492295265197754, "logps/rejected": -287.2140197753906, "loss": 0.0797, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.29012465476989746, "rewards/margins": 2.8093113899230957, "rewards/rejected": -2.5191867351531982, "step": 21400 }, { "epoch": 0.86, "eval_logits/chosen": -3.0707812309265137, "eval_logits/rejected": -3.09671950340271, "eval_logps/chosen": -0.17693129181861877, "eval_logps/rejected": -284.40142822265625, "eval_loss": 0.06030380725860596, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3148302435874939, "eval_rewards/margins": 2.7986884117126465, "eval_rewards/rejected": -2.483858585357666, "eval_runtime": 2.5412, "eval_samples_per_second": 1.968, "eval_steps_per_second": 0.394, "step": 21400 }, { "epoch": 0.86, "learning_rate": 3.0755379838330823e-07, "logits/chosen": -3.0098724365234375, "logits/rejected": -3.0420000553131104, "logps/chosen": -2.5415139198303223, "logps/rejected": -287.7461242675781, "loss": 0.0738, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28887972235679626, "rewards/margins": 2.8175551891326904, "rewards/rejected": -2.5286755561828613, "step": 21410 }, { "epoch": 0.86, "learning_rate": 3.0587857116233274e-07, "logits/chosen": -3.0277061462402344, "logits/rejected": -3.056244373321533, "logps/chosen": -0.18861865997314453, "logps/rejected": -292.86004638671875, "loss": 0.0544, "rewards/accuracies": 1.0, "rewards/chosen": 0.3162083029747009, "rewards/margins": 2.887316942214966, "rewards/rejected": -2.57110857963562, "step": 21420 }, { "epoch": 0.86, "learning_rate": 3.0420762149335566e-07, "logits/chosen": -3.0186941623687744, "logits/rejected": -3.0468523502349854, "logps/chosen": -5.730774402618408, "logps/rejected": -285.5497741699219, "loss": 0.1099, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2615538239479065, "rewards/margins": 2.7597527503967285, "rewards/rejected": -2.498198986053467, "step": 21430 }, { "epoch": 0.86, "learning_rate": 3.0254095263397925e-07, "logits/chosen": -2.9986748695373535, "logits/rejected": -3.0298099517822266, "logps/chosen": -3.290304183959961, "logps/rejected": -288.3429870605469, "loss": 0.0862, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28073960542678833, "rewards/margins": 2.8145787715911865, "rewards/rejected": -2.5338387489318848, "step": 21440 }, { "epoch": 0.86, "learning_rate": 3.0087856783345916e-07, "logits/chosen": -3.0186047554016113, "logits/rejected": -3.0436160564422607, "logps/chosen": -0.3410149812698364, "logps/rejected": -288.2511291503906, "loss": 0.0576, "rewards/accuracies": 1.0, "rewards/chosen": 0.3134051561355591, "rewards/margins": 2.8424506187438965, "rewards/rejected": -2.529045581817627, "step": 21450 }, { "epoch": 0.86, "learning_rate": 2.992204703326995e-07, "logits/chosen": -3.018007755279541, "logits/rejected": -3.0485339164733887, "logps/chosen": -0.2151811122894287, "logps/rejected": -292.0896911621094, "loss": 0.0548, "rewards/accuracies": 1.0, "rewards/chosen": 0.3127824366092682, "rewards/margins": 2.8796913623809814, "rewards/rejected": -2.566909074783325, "step": 21460 }, { "epoch": 0.86, "learning_rate": 2.975666633642471e-07, "logits/chosen": -3.02319073677063, "logits/rejected": -3.0522584915161133, "logps/chosen": -0.3144446909427643, "logps/rejected": -290.87689208984375, "loss": 0.0558, "rewards/accuracies": 1.0, "rewards/chosen": 0.3153216242790222, "rewards/margins": 2.869562864303589, "rewards/rejected": -2.554241418838501, "step": 21470 }, { "epoch": 0.86, "learning_rate": 2.959171501522828e-07, "logits/chosen": -3.029135227203369, "logits/rejected": -3.0571727752685547, "logps/chosen": -4.035048961639404, "logps/rejected": -285.7845153808594, "loss": 0.0931, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2777000963687897, "rewards/margins": 2.781240940093994, "rewards/rejected": -2.503540515899658, "step": 21480 }, { "epoch": 0.86, "learning_rate": 2.942719339126171e-07, "logits/chosen": -3.0197300910949707, "logits/rejected": -3.048215389251709, "logps/chosen": -5.811244487762451, "logps/rejected": -287.90618896484375, "loss": 0.0962, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.25994357466697693, "rewards/margins": 2.784552812576294, "rewards/rejected": -2.524609088897705, "step": 21490 }, { "epoch": 0.86, "learning_rate": 2.9263101785268253e-07, "logits/chosen": -3.0405282974243164, "logits/rejected": -3.0699353218078613, "logps/chosen": -0.18872876465320587, "logps/rejected": -289.0413513183594, "loss": 0.0569, "rewards/accuracies": 1.0, "rewards/chosen": 0.3168265223503113, "rewards/margins": 2.8492326736450195, "rewards/rejected": -2.5324063301086426, "step": 21500 }, { "epoch": 0.86, "eval_logits/chosen": -3.0703988075256348, "eval_logits/rejected": -3.095341920852661, "eval_logps/chosen": -0.16658297181129456, "eval_logps/rejected": -284.3410339355469, "eval_loss": 0.06036081165075302, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.314933717250824, "eval_rewards/margins": 2.7981882095336914, "eval_rewards/rejected": -2.4832544326782227, "eval_runtime": 2.5399, "eval_samples_per_second": 1.969, "eval_steps_per_second": 0.394, "step": 21500 }, { "epoch": 0.86, "learning_rate": 2.909944051715299e-07, "logits/chosen": -3.0199790000915527, "logits/rejected": -3.049933671951294, "logps/chosen": -3.1420960426330566, "logps/rejected": -287.8752136230469, "loss": 0.0837, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2878536581993103, "rewards/margins": 2.8123788833618164, "rewards/rejected": -2.5245249271392822, "step": 21510 }, { "epoch": 0.86, "learning_rate": 2.893620990598192e-07, "logits/chosen": -3.017996072769165, "logits/rejected": -3.046271562576294, "logps/chosen": -0.2705802917480469, "logps/rejected": -289.79052734375, "loss": 0.0566, "rewards/accuracies": 1.0, "rewards/chosen": 0.31398946046829224, "rewards/margins": 2.8561553955078125, "rewards/rejected": -2.542166233062744, "step": 21520 }, { "epoch": 0.86, "learning_rate": 2.8773410269981454e-07, "logits/chosen": -3.006986379623413, "logits/rejected": -3.036884069442749, "logps/chosen": -0.3728044629096985, "logps/rejected": -290.11334228515625, "loss": 0.0564, "rewards/accuracies": 1.0, "rewards/chosen": 0.31252235174179077, "rewards/margins": 2.8614273071289062, "rewards/rejected": -2.54890513420105, "step": 21530 }, { "epoch": 0.86, "learning_rate": 2.8611041926537796e-07, "logits/chosen": -3.0271809101104736, "logits/rejected": -3.0534164905548096, "logps/chosen": -5.584228515625, "logps/rejected": -285.0545349121094, "loss": 0.0972, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2622888684272766, "rewards/margins": 2.7577881813049316, "rewards/rejected": -2.4954993724823, "step": 21540 }, { "epoch": 0.86, "learning_rate": 2.844910519219632e-07, "logits/chosen": -3.017091989517212, "logits/rejected": -3.0461132526397705, "logps/chosen": -3.849733352661133, "logps/rejected": -286.4741516113281, "loss": 0.093, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2783263027667999, "rewards/margins": 2.7857398986816406, "rewards/rejected": -2.507413148880005, "step": 21550 }, { "epoch": 0.86, "learning_rate": 2.828760038266104e-07, "logits/chosen": -3.0123419761657715, "logits/rejected": -3.0409891605377197, "logps/chosen": -2.618626832962036, "logps/rejected": -289.4635925292969, "loss": 0.0775, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.29018160700798035, "rewards/margins": 2.8327383995056152, "rewards/rejected": -2.5425570011138916, "step": 21560 }, { "epoch": 0.86, "learning_rate": 2.812652781279382e-07, "logits/chosen": -3.0160162448883057, "logits/rejected": -3.0412516593933105, "logps/chosen": -0.49200692772865295, "logps/rejected": -290.2848205566406, "loss": 0.0565, "rewards/accuracies": 1.0, "rewards/chosen": 0.3107617497444153, "rewards/margins": 2.8567264080047607, "rewards/rejected": -2.5459647178649902, "step": 21570 }, { "epoch": 0.86, "learning_rate": 2.796588779661388e-07, "logits/chosen": -3.014768123626709, "logits/rejected": -3.0443921089172363, "logps/chosen": -0.28448885679244995, "logps/rejected": -292.02716064453125, "loss": 0.0553, "rewards/accuracies": 1.0, "rewards/chosen": 0.3116264045238495, "rewards/margins": 2.8779239654541016, "rewards/rejected": -2.5662975311279297, "step": 21580 }, { "epoch": 0.86, "learning_rate": 2.780568064729716e-07, "logits/chosen": -3.0056443214416504, "logits/rejected": -3.033444881439209, "logps/chosen": -3.778017520904541, "logps/rejected": -287.3467102050781, "loss": 0.0914, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27737921476364136, "rewards/margins": 2.799917459487915, "rewards/rejected": -2.522538185119629, "step": 21590 }, { "epoch": 0.86, "learning_rate": 2.764590667717562e-07, "logits/chosen": -3.028291702270508, "logits/rejected": -3.056978225708008, "logps/chosen": -6.929032802581787, "logps/rejected": -282.7856750488281, "loss": 0.1239, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24932265281677246, "rewards/margins": 2.722283124923706, "rewards/rejected": -2.4729607105255127, "step": 21600 }, { "epoch": 0.86, "eval_logits/chosen": -3.0709950923919678, "eval_logits/rejected": -3.0961387157440186, "eval_logps/chosen": -0.14030200242996216, "eval_logps/rejected": -284.3148498535156, "eval_loss": 0.06034598872065544, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3151964843273163, "eval_rewards/margins": 2.798189163208008, "eval_rewards/rejected": -2.482992649078369, "eval_runtime": 2.5402, "eval_samples_per_second": 1.968, "eval_steps_per_second": 0.394, "step": 21600 }, { "epoch": 0.86, "learning_rate": 2.748656619773687e-07, "logits/chosen": -3.0273070335388184, "logits/rejected": -3.0553290843963623, "logps/chosen": -3.509154796600342, "logps/rejected": -287.9789733886719, "loss": 0.0887, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28021448850631714, "rewards/margins": 2.8059539794921875, "rewards/rejected": -2.5257396697998047, "step": 21610 }, { "epoch": 0.86, "learning_rate": 2.732765951962335e-07, "logits/chosen": -3.0060606002807617, "logits/rejected": -3.0341246128082275, "logps/chosen": -0.15782994031906128, "logps/rejected": -291.134765625, "loss": 0.0555, "rewards/accuracies": 1.0, "rewards/chosen": 0.31582969427108765, "rewards/margins": 2.871450662612915, "rewards/rejected": -2.5556211471557617, "step": 21620 }, { "epoch": 0.87, "learning_rate": 2.716918695263171e-07, "logits/chosen": -3.025667190551758, "logits/rejected": -3.0529990196228027, "logps/chosen": -5.859775066375732, "logps/rejected": -284.1271667480469, "loss": 0.0995, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2584509253501892, "rewards/margins": 2.745594024658203, "rewards/rejected": -2.487143039703369, "step": 21630 }, { "epoch": 0.87, "learning_rate": 2.701114880571232e-07, "logits/chosen": -3.0111756324768066, "logits/rejected": -3.040910005569458, "logps/chosen": -6.977840423583984, "logps/rejected": -283.3843688964844, "loss": 0.1234, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2482050359249115, "rewards/margins": 2.726500988006592, "rewards/rejected": -2.4782958030700684, "step": 21640 }, { "epoch": 0.87, "learning_rate": 2.6853545386968607e-07, "logits/chosen": -2.9939961433410645, "logits/rejected": -3.0249054431915283, "logps/chosen": -0.19698378443717957, "logps/rejected": -291.1933898925781, "loss": 0.0554, "rewards/accuracies": 1.0, "rewards/chosen": 0.3142628073692322, "rewards/margins": 2.871352195739746, "rewards/rejected": -2.557089328765869, "step": 21650 }, { "epoch": 0.87, "learning_rate": 2.6696377003656654e-07, "logits/chosen": -3.016268491744995, "logits/rejected": -3.044450283050537, "logps/chosen": -0.2732848525047302, "logps/rejected": -291.2684631347656, "loss": 0.0557, "rewards/accuracies": 1.0, "rewards/chosen": 0.3132117688655853, "rewards/margins": 2.869189977645874, "rewards/rejected": -2.555978298187256, "step": 21660 }, { "epoch": 0.87, "learning_rate": 2.653964396218406e-07, "logits/chosen": -3.0125226974487305, "logits/rejected": -3.043731927871704, "logps/chosen": -0.1807314157485962, "logps/rejected": -293.0755310058594, "loss": 0.0538, "rewards/accuracies": 1.0, "rewards/chosen": 0.3161952495574951, "rewards/margins": 2.8983263969421387, "rewards/rejected": -2.5821313858032227, "step": 21670 }, { "epoch": 0.87, "learning_rate": 2.6383346568110065e-07, "logits/chosen": -3.0318171977996826, "logits/rejected": -3.0605249404907227, "logps/chosen": -0.17176134884357452, "logps/rejected": -291.6150207519531, "loss": 0.0553, "rewards/accuracies": 1.0, "rewards/chosen": 0.31464606523513794, "rewards/margins": 2.873837947845459, "rewards/rejected": -2.559191942214966, "step": 21680 }, { "epoch": 0.87, "learning_rate": 2.622748512614437e-07, "logits/chosen": -3.0130844116210938, "logits/rejected": -3.042750120162964, "logps/chosen": -3.959962844848633, "logps/rejected": -289.0181579589844, "loss": 0.091, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27842575311660767, "rewards/margins": 2.8140130043029785, "rewards/rejected": -2.5355875492095947, "step": 21690 }, { "epoch": 0.87, "learning_rate": 2.6072059940146775e-07, "logits/chosen": -3.0105397701263428, "logits/rejected": -3.03889536857605, "logps/chosen": -7.2438459396362305, "logps/rejected": -278.826904296875, "loss": 0.1285, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24225100874900818, "rewards/margins": 2.6782968044281006, "rewards/rejected": -2.4360461235046387, "step": 21700 }, { "epoch": 0.87, "eval_logits/chosen": -3.070042610168457, "eval_logits/rejected": -3.0966217517852783, "eval_logps/chosen": -0.17925992608070374, "eval_logps/rejected": -284.5537414550781, "eval_loss": 0.06022629886865616, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3148069381713867, "eval_rewards/margins": 2.8001885414123535, "eval_rewards/rejected": -2.485381603240967, "eval_runtime": 2.5347, "eval_samples_per_second": 1.973, "eval_steps_per_second": 0.395, "step": 21700 }, { "epoch": 0.87, "learning_rate": 2.591707131312682e-07, "logits/chosen": -3.0107309818267822, "logits/rejected": -3.0410854816436768, "logps/chosen": -9.888160705566406, "logps/rejected": -279.7221984863281, "loss": 0.1493, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.21931703388690948, "rewards/margins": 2.66227650642395, "rewards/rejected": -2.4429595470428467, "step": 21710 }, { "epoch": 0.87, "learning_rate": 2.5762519547242516e-07, "logits/chosen": -3.005415201187134, "logits/rejected": -3.035142421722412, "logps/chosen": -0.18381889164447784, "logps/rejected": -291.3754577636719, "loss": 0.0554, "rewards/accuracies": 1.0, "rewards/chosen": 0.3155983090400696, "rewards/margins": 2.872957229614258, "rewards/rejected": -2.557359218597412, "step": 21720 }, { "epoch": 0.87, "learning_rate": 2.5608404943800627e-07, "logits/chosen": -3.0017192363739014, "logits/rejected": -3.032090425491333, "logps/chosen": -3.160127639770508, "logps/rejected": -287.093017578125, "loss": 0.0856, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28517666459083557, "rewards/margins": 2.7960400581359863, "rewards/rejected": -2.5108630657196045, "step": 21730 }, { "epoch": 0.87, "learning_rate": 2.5454727803255363e-07, "logits/chosen": -2.9970862865448, "logits/rejected": -3.030423402786255, "logps/chosen": -0.26323798298835754, "logps/rejected": -290.57684326171875, "loss": 0.056, "rewards/accuracies": 1.0, "rewards/chosen": 0.3124222755432129, "rewards/margins": 2.865111827850342, "rewards/rejected": -2.55268931388855, "step": 21740 }, { "epoch": 0.87, "learning_rate": 2.53014884252083e-07, "logits/chosen": -3.01469349861145, "logits/rejected": -3.043736219406128, "logps/chosen": -3.863503932952881, "logps/rejected": -287.66448974609375, "loss": 0.0921, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2782531678676605, "rewards/margins": 2.796201229095459, "rewards/rejected": -2.5179479122161865, "step": 21750 }, { "epoch": 0.87, "learning_rate": 2.514868710840723e-07, "logits/chosen": -3.0068790912628174, "logits/rejected": -3.0363433361053467, "logps/chosen": -3.28534197807312, "logps/rejected": -285.6402587890625, "loss": 0.0868, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2853166460990906, "rewards/margins": 2.7835757732391357, "rewards/rejected": -2.4982593059539795, "step": 21760 }, { "epoch": 0.87, "learning_rate": 2.499632415074635e-07, "logits/chosen": -3.0169241428375244, "logits/rejected": -3.0467538833618164, "logps/chosen": -3.261280059814453, "logps/rejected": -287.83563232421875, "loss": 0.0854, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28505370020866394, "rewards/margins": 2.810883045196533, "rewards/rejected": -2.525829315185547, "step": 21770 }, { "epoch": 0.87, "learning_rate": 2.4844399849264926e-07, "logits/chosen": -3.0317111015319824, "logits/rejected": -3.0601248741149902, "logps/chosen": -3.4340572357177734, "logps/rejected": -289.07427978515625, "loss": 0.0871, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28021320700645447, "rewards/margins": 2.8194971084594727, "rewards/rejected": -2.5392839908599854, "step": 21780 }, { "epoch": 0.87, "learning_rate": 2.4692914500147185e-07, "logits/chosen": -3.0463027954101562, "logits/rejected": -3.071411371231079, "logps/chosen": -2.621708393096924, "logps/rejected": -289.5912170410156, "loss": 0.0785, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2948247790336609, "rewards/margins": 2.8327019214630127, "rewards/rejected": -2.537876844406128, "step": 21790 }, { "epoch": 0.87, "learning_rate": 2.454186839872158e-07, "logits/chosen": -3.040635347366333, "logits/rejected": -3.0677735805511475, "logps/chosen": -0.1964402198791504, "logps/rejected": -290.6318359375, "loss": 0.0557, "rewards/accuracies": 1.0, "rewards/chosen": 0.31661948561668396, "rewards/margins": 2.868445634841919, "rewards/rejected": -2.551826000213623, "step": 21800 }, { "epoch": 0.87, "eval_logits/chosen": -3.0704104900360107, "eval_logits/rejected": -3.0964839458465576, "eval_logps/chosen": -0.19548380374908447, "eval_logps/rejected": -284.44500732421875, "eval_loss": 0.06028320640325546, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3146446943283081, "eval_rewards/margins": 2.7989392280578613, "eval_rewards/rejected": -2.4842944145202637, "eval_runtime": 2.5354, "eval_samples_per_second": 1.972, "eval_steps_per_second": 0.394, "step": 21800 }, { "epoch": 0.87, "learning_rate": 2.4391261839460167e-07, "logits/chosen": -3.007996082305908, "logits/rejected": -3.0383572578430176, "logps/chosen": -0.8153899908065796, "logps/rejected": -286.1347351074219, "loss": 0.0664, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.30558842420578003, "rewards/margins": 2.8143258094787598, "rewards/rejected": -2.508737087249756, "step": 21810 }, { "epoch": 0.87, "learning_rate": 2.424109511597822e-07, "logits/chosen": -3.0154194831848145, "logits/rejected": -3.047375440597534, "logps/chosen": -0.18524058163166046, "logps/rejected": -291.56915283203125, "loss": 0.0552, "rewards/accuracies": 1.0, "rewards/chosen": 0.3151232600212097, "rewards/margins": 2.876516819000244, "rewards/rejected": -2.5613932609558105, "step": 21820 }, { "epoch": 0.87, "learning_rate": 2.409136852103339e-07, "logits/chosen": -2.993436098098755, "logits/rejected": -3.0264928340911865, "logps/chosen": -0.21540656685829163, "logps/rejected": -289.46466064453125, "loss": 0.0567, "rewards/accuracies": 1.0, "rewards/chosen": 0.3134618401527405, "rewards/margins": 2.8510448932647705, "rewards/rejected": -2.5375828742980957, "step": 21830 }, { "epoch": 0.87, "learning_rate": 2.394208234652534e-07, "logits/chosen": -2.9875571727752686, "logits/rejected": -3.0185112953186035, "logps/chosen": -0.2916143834590912, "logps/rejected": -290.1922912597656, "loss": 0.0564, "rewards/accuracies": 1.0, "rewards/chosen": 0.3145386874675751, "rewards/margins": 2.8616833686828613, "rewards/rejected": -2.547145128250122, "step": 21840 }, { "epoch": 0.87, "learning_rate": 2.3793236883495164e-07, "logits/chosen": -3.009986162185669, "logits/rejected": -3.0380778312683105, "logps/chosen": -8.852178573608398, "logps/rejected": -279.6834716796875, "loss": 0.1419, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.22700592875480652, "rewards/margins": 2.672959566116333, "rewards/rejected": -2.445953845977783, "step": 21850 }, { "epoch": 0.87, "learning_rate": 2.3644832422124565e-07, "logits/chosen": -2.9947943687438965, "logits/rejected": -3.024362564086914, "logps/chosen": -7.641757965087891, "logps/rejected": -280.58087158203125, "loss": 0.1309, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.2392328679561615, "rewards/margins": 2.6904330253601074, "rewards/rejected": -2.451200246810913, "step": 21860 }, { "epoch": 0.87, "learning_rate": 2.3496869251735805e-07, "logits/chosen": -3.0115981101989746, "logits/rejected": -3.039865732192993, "logps/chosen": -3.59112811088562, "logps/rejected": -283.4495849609375, "loss": 0.0955, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.28109872341156006, "rewards/margins": 2.761702299118042, "rewards/rejected": -2.4806032180786133, "step": 21870 }, { "epoch": 0.88, "learning_rate": 2.3349347660790582e-07, "logits/chosen": -3.019890069961548, "logits/rejected": -3.049506187438965, "logps/chosen": -0.20770354568958282, "logps/rejected": -291.85015869140625, "loss": 0.0554, "rewards/accuracies": 1.0, "rewards/chosen": 0.3185926675796509, "rewards/margins": 2.872810125350952, "rewards/rejected": -2.5542171001434326, "step": 21880 }, { "epoch": 0.88, "learning_rate": 2.320226793688979e-07, "logits/chosen": -3.0370306968688965, "logits/rejected": -3.062647819519043, "logps/chosen": -4.880463600158691, "logps/rejected": -284.7921447753906, "loss": 0.1012, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.27062851190567017, "rewards/margins": 2.7599380016326904, "rewards/rejected": -2.489309310913086, "step": 21890 }, { "epoch": 0.88, "learning_rate": 2.3055630366772857e-07, "logits/chosen": -3.0136337280273438, "logits/rejected": -3.04264497756958, "logps/chosen": -4.275749206542969, "logps/rejected": -283.43109130859375, "loss": 0.0999, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27220481634140015, "rewards/margins": 2.7527315616607666, "rewards/rejected": -2.480526924133301, "step": 21900 }, { "epoch": 0.88, "eval_logits/chosen": -3.0707895755767822, "eval_logits/rejected": -3.0962181091308594, "eval_logps/chosen": -0.17161890864372253, "eval_logps/rejected": -284.3460388183594, "eval_loss": 0.06036122888326645, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31488335132598877, "eval_rewards/margins": 2.7981879711151123, "eval_rewards/rejected": -2.483304500579834, "eval_runtime": 2.5343, "eval_samples_per_second": 1.973, "eval_steps_per_second": 0.395, "step": 21900 }, { "epoch": 0.88, "learning_rate": 2.2909435236317224e-07, "logits/chosen": -3.008187770843506, "logits/rejected": -3.034719467163086, "logps/chosen": -7.5641608238220215, "logps/rejected": -277.50457763671875, "loss": 0.1341, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.24240200221538544, "rewards/margins": 2.6646735668182373, "rewards/rejected": -2.422271251678467, "step": 21910 }, { "epoch": 0.88, "learning_rate": 2.2763682830537814e-07, "logits/chosen": -3.022449493408203, "logits/rejected": -3.050750970840454, "logps/chosen": -0.34275153279304504, "logps/rejected": -288.51934814453125, "loss": 0.0617, "rewards/accuracies": 1.0, "rewards/chosen": 0.31413736939430237, "rewards/margins": 2.8429136276245117, "rewards/rejected": -2.528775930404663, "step": 21920 }, { "epoch": 0.88, "learning_rate": 2.2618373433586415e-07, "logits/chosen": -3.0266311168670654, "logits/rejected": -3.0558338165283203, "logps/chosen": -3.4495627880096436, "logps/rejected": -287.565185546875, "loss": 0.0869, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28309381008148193, "rewards/margins": 2.804410457611084, "rewards/rejected": -2.5213170051574707, "step": 21930 }, { "epoch": 0.88, "learning_rate": 2.2473507328751086e-07, "logits/chosen": -3.0210909843444824, "logits/rejected": -3.0504322052001953, "logps/chosen": -2.951693058013916, "logps/rejected": -287.0989685058594, "loss": 0.0834, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2886459529399872, "rewards/margins": 2.806278944015503, "rewards/rejected": -2.5176329612731934, "step": 21940 }, { "epoch": 0.88, "learning_rate": 2.2329084798455747e-07, "logits/chosen": -3.0431199073791504, "logits/rejected": -3.0716445446014404, "logps/chosen": -0.16892752051353455, "logps/rejected": -291.5997009277344, "loss": 0.0553, "rewards/accuracies": 1.0, "rewards/chosen": 0.31551140546798706, "rewards/margins": 2.87381649017334, "rewards/rejected": -2.558305025100708, "step": 21950 }, { "epoch": 0.88, "learning_rate": 2.2185106124259447e-07, "logits/chosen": -3.0287277698516846, "logits/rejected": -3.0588748455047607, "logps/chosen": -0.21488769352436066, "logps/rejected": -291.3316650390625, "loss": 0.0553, "rewards/accuracies": 1.0, "rewards/chosen": 0.31376904249191284, "rewards/margins": 2.874302387237549, "rewards/rejected": -2.560533285140991, "step": 21960 }, { "epoch": 0.88, "learning_rate": 2.2041571586856104e-07, "logits/chosen": -3.018454074859619, "logits/rejected": -3.0482873916625977, "logps/chosen": -3.839447021484375, "logps/rejected": -286.90460205078125, "loss": 0.092, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2805021107196808, "rewards/margins": 2.794036388397217, "rewards/rejected": -2.5135343074798584, "step": 21970 }, { "epoch": 0.88, "learning_rate": 2.1898481466073484e-07, "logits/chosen": -3.0466809272766113, "logits/rejected": -3.074108839035034, "logps/chosen": -2.1299939155578613, "logps/rejected": -285.6206359863281, "loss": 0.0734, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.29728710651397705, "rewards/margins": 2.7968242168426514, "rewards/rejected": -2.499537229537964, "step": 21980 }, { "epoch": 0.88, "learning_rate": 2.1755836040873197e-07, "logits/chosen": -3.020798921585083, "logits/rejected": -3.0493550300598145, "logps/chosen": -0.16678589582443237, "logps/rejected": -291.4070129394531, "loss": 0.0554, "rewards/accuracies": 1.0, "rewards/chosen": 0.3152659833431244, "rewards/margins": 2.8719544410705566, "rewards/rejected": -2.5566883087158203, "step": 21990 }, { "epoch": 0.88, "learning_rate": 2.1613635589349756e-07, "logits/chosen": -2.999786853790283, "logits/rejected": -3.0296339988708496, "logps/chosen": -2.348482608795166, "logps/rejected": -289.31475830078125, "loss": 0.0704, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2944539189338684, "rewards/margins": 2.830946445465088, "rewards/rejected": -2.536492109298706, "step": 22000 }, { "epoch": 0.88, "eval_logits/chosen": -3.0703442096710205, "eval_logits/rejected": -3.0955429077148438, "eval_logps/chosen": -0.15307357907295227, "eval_logps/rejected": -284.277587890625, "eval_loss": 0.06038760021328926, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.315068781375885, "eval_rewards/margins": 2.7976889610290527, "eval_rewards/rejected": -2.4826202392578125, "eval_runtime": 2.5373, "eval_samples_per_second": 1.971, "eval_steps_per_second": 0.394, "step": 22000 }, { "epoch": 0.88, "learning_rate": 2.14718803887303e-07, "logits/chosen": -3.004687786102295, "logits/rejected": -3.0361294746398926, "logps/chosen": -3.9396042823791504, "logps/rejected": -285.43914794921875, "loss": 0.0931, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2769964337348938, "rewards/margins": 2.779550075531006, "rewards/rejected": -2.502553701400757, "step": 22010 }, { "epoch": 0.88, "learning_rate": 2.1330570715373755e-07, "logits/chosen": -3.017360210418701, "logits/rejected": -3.0473270416259766, "logps/chosen": -5.441538333892822, "logps/rejected": -284.8744812011719, "loss": 0.0974, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2617858052253723, "rewards/margins": 2.75956654548645, "rewards/rejected": -2.4977807998657227, "step": 22020 }, { "epoch": 0.88, "learning_rate": 2.118970684477062e-07, "logits/chosen": -3.029873847961426, "logits/rejected": -3.057804584503174, "logps/chosen": -10.617562294006348, "logps/rejected": -276.32415771484375, "loss": 0.1581, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.2121940404176712, "rewards/margins": 2.624248504638672, "rewards/rejected": -2.4120543003082275, "step": 22030 }, { "epoch": 0.88, "learning_rate": 2.1049289051542188e-07, "logits/chosen": -3.0345985889434814, "logits/rejected": -3.0659594535827637, "logps/chosen": -0.16126872599124908, "logps/rejected": -292.90765380859375, "loss": 0.0541, "rewards/accuracies": 1.0, "rewards/chosen": 0.3129754662513733, "rewards/margins": 2.8923802375793457, "rewards/rejected": -2.5794050693511963, "step": 22040 }, { "epoch": 0.88, "learning_rate": 2.0909317609440093e-07, "logits/chosen": -3.0018744468688965, "logits/rejected": -3.033421039581299, "logps/chosen": -0.4290514886379242, "logps/rejected": -286.7161865234375, "loss": 0.0586, "rewards/accuracies": 1.0, "rewards/chosen": 0.31270766258239746, "rewards/margins": 2.8277881145477295, "rewards/rejected": -2.515080690383911, "step": 22050 }, { "epoch": 0.88, "learning_rate": 2.0769792791345945e-07, "logits/chosen": -3.0126566886901855, "logits/rejected": -3.042600154876709, "logps/chosen": -3.101954221725464, "logps/rejected": -288.0545349121094, "loss": 0.0842, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2863241136074066, "rewards/margins": 2.814669132232666, "rewards/rejected": -2.5283446311950684, "step": 22060 }, { "epoch": 0.88, "learning_rate": 2.0630714869270347e-07, "logits/chosen": -3.010237216949463, "logits/rejected": -3.040663480758667, "logps/chosen": -0.23755177855491638, "logps/rejected": -291.3509826660156, "loss": 0.0555, "rewards/accuracies": 1.0, "rewards/chosen": 0.31572166085243225, "rewards/margins": 2.8716554641723633, "rewards/rejected": -2.555933713912964, "step": 22070 }, { "epoch": 0.88, "learning_rate": 2.0492084114352967e-07, "logits/chosen": -2.9784231185913086, "logits/rejected": -3.012486696243286, "logps/chosen": -3.8566665649414062, "logps/rejected": -288.4327697753906, "loss": 0.0917, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2787380814552307, "rewards/margins": 2.8045926094055176, "rewards/rejected": -2.5258545875549316, "step": 22080 }, { "epoch": 0.88, "learning_rate": 2.0353900796861503e-07, "logits/chosen": -3.0154454708099365, "logits/rejected": -3.042691469192505, "logps/chosen": -0.27650243043899536, "logps/rejected": -290.7283020019531, "loss": 0.0559, "rewards/accuracies": 1.0, "rewards/chosen": 0.3136342167854309, "rewards/margins": 2.8664116859436035, "rewards/rejected": -2.552777051925659, "step": 22090 }, { "epoch": 0.88, "learning_rate": 2.0216165186191406e-07, "logits/chosen": -3.0192179679870605, "logits/rejected": -3.0477359294891357, "logps/chosen": -3.721545696258545, "logps/rejected": -288.8820495605469, "loss": 0.0902, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2794826626777649, "rewards/margins": 2.8134963512420654, "rewards/rejected": -2.5340137481689453, "step": 22100 }, { "epoch": 0.88, "eval_logits/chosen": -3.0699801445007324, "eval_logits/rejected": -3.094973087310791, "eval_logps/chosen": -0.18484742939472198, "eval_logps/rejected": -284.3843078613281, "eval_loss": 0.06032426282763481, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3147510588169098, "eval_rewards/margins": 2.798438310623169, "eval_rewards/rejected": -2.483687162399292, "eval_runtime": 2.5394, "eval_samples_per_second": 1.969, "eval_steps_per_second": 0.394, "step": 22100 }, { "epoch": 0.88, "learning_rate": 2.0078877550865323e-07, "logits/chosen": -3.023698329925537, "logits/rejected": -3.053453207015991, "logps/chosen": -7.360299587249756, "logps/rejected": -282.00714111328125, "loss": 0.1296, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24258503317832947, "rewards/margins": 2.7073729038238525, "rewards/rejected": -2.464787721633911, "step": 22110 }, { "epoch": 0.88, "learning_rate": 1.9942038158532407e-07, "logits/chosen": -3.031290054321289, "logits/rejected": -3.061117649078369, "logps/chosen": -3.829418659210205, "logps/rejected": -287.3141174316406, "loss": 0.0917, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28056859970092773, "rewards/margins": 2.797903537750244, "rewards/rejected": -2.5173346996307373, "step": 22120 }, { "epoch": 0.89, "learning_rate": 1.9805647275968204e-07, "logits/chosen": -3.016963005065918, "logits/rejected": -3.0460572242736816, "logps/chosen": -3.145237445831299, "logps/rejected": -286.31488037109375, "loss": 0.0858, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28360074758529663, "rewards/margins": 2.795072555541992, "rewards/rejected": -2.511471748352051, "step": 22130 }, { "epoch": 0.89, "learning_rate": 1.9669705169073682e-07, "logits/chosen": -3.016552448272705, "logits/rejected": -3.0419020652770996, "logps/chosen": -12.078256607055664, "logps/rejected": -275.17718505859375, "loss": 0.1714, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.19651752710342407, "rewards/margins": 2.600914716720581, "rewards/rejected": -2.404397487640381, "step": 22140 }, { "epoch": 0.89, "learning_rate": 1.95342121028749e-07, "logits/chosen": -3.015990734100342, "logits/rejected": -3.0447568893432617, "logps/chosen": -3.916722059249878, "logps/rejected": -282.34466552734375, "loss": 0.0942, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.27699780464172363, "rewards/margins": 2.746992349624634, "rewards/rejected": -2.46999454498291, "step": 22150 }, { "epoch": 0.89, "learning_rate": 1.939916834152253e-07, "logits/chosen": -2.999854803085327, "logits/rejected": -3.0329952239990234, "logps/chosen": -0.17939212918281555, "logps/rejected": -292.8359375, "loss": 0.0544, "rewards/accuracies": 1.0, "rewards/chosen": 0.3162514567375183, "rewards/margins": 2.8881587982177734, "rewards/rejected": -2.5719075202941895, "step": 22160 }, { "epoch": 0.89, "learning_rate": 1.926457414829125e-07, "logits/chosen": -2.9905014038085938, "logits/rejected": -3.0236403942108154, "logps/chosen": -0.311839759349823, "logps/rejected": -290.353759765625, "loss": 0.0564, "rewards/accuracies": 1.0, "rewards/chosen": 0.3151508867740631, "rewards/margins": 2.864062786102295, "rewards/rejected": -2.5489120483398438, "step": 22170 }, { "epoch": 0.89, "learning_rate": 1.9130429785579441e-07, "logits/chosen": -3.020472764968872, "logits/rejected": -3.0485730171203613, "logps/chosen": -3.694378614425659, "logps/rejected": -287.5798645019531, "loss": 0.0906, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28072160482406616, "rewards/margins": 2.801194429397583, "rewards/rejected": -2.520472526550293, "step": 22180 }, { "epoch": 0.89, "learning_rate": 1.8996735514908327e-07, "logits/chosen": -3.019662857055664, "logits/rejected": -3.0501627922058105, "logps/chosen": -0.19320528209209442, "logps/rejected": -291.74737548828125, "loss": 0.055, "rewards/accuracies": 1.0, "rewards/chosen": 0.31541723012924194, "rewards/margins": 2.8782403469085693, "rewards/rejected": -2.5628228187561035, "step": 22190 }, { "epoch": 0.89, "learning_rate": 1.8863491596921745e-07, "logits/chosen": -3.0069000720977783, "logits/rejected": -3.0347530841827393, "logps/chosen": -2.9184865951538086, "logps/rejected": -288.2479248046875, "loss": 0.0842, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2873879373073578, "rewards/margins": 2.8161845207214355, "rewards/rejected": -2.5287961959838867, "step": 22200 }, { "epoch": 0.89, "eval_logits/chosen": -3.070835590362549, "eval_logits/rejected": -3.0965638160705566, "eval_logps/chosen": -0.17605578899383545, "eval_logps/rejected": -284.50042724609375, "eval_loss": 0.06025145575404167, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3148389756679535, "eval_rewards/margins": 2.799687623977661, "eval_rewards/rejected": -2.4848484992980957, "eval_runtime": 2.5386, "eval_samples_per_second": 1.97, "eval_steps_per_second": 0.394, "step": 22200 }, { "epoch": 0.89, "learning_rate": 1.8730698291385518e-07, "logits/chosen": -3.021294116973877, "logits/rejected": -3.0517382621765137, "logps/chosen": -6.825006008148193, "logps/rejected": -283.11834716796875, "loss": 0.122, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2483467310667038, "rewards/margins": 2.724630117416382, "rewards/rejected": -2.476283311843872, "step": 22210 }, { "epoch": 0.89, "learning_rate": 1.8598355857186973e-07, "logits/chosen": -3.0222535133361816, "logits/rejected": -3.048811197280884, "logps/chosen": -3.1100244522094727, "logps/rejected": -288.43621826171875, "loss": 0.0834, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28295236825942993, "rewards/margins": 2.8186283111572266, "rewards/rejected": -2.5356762409210205, "step": 22220 }, { "epoch": 0.89, "learning_rate": 1.846646455233453e-07, "logits/chosen": -3.0310349464416504, "logits/rejected": -3.057598829269409, "logps/chosen": -2.9808311462402344, "logps/rejected": -286.1131286621094, "loss": 0.0851, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2866284251213074, "rewards/margins": 2.791182041168213, "rewards/rejected": -2.5045533180236816, "step": 22230 }, { "epoch": 0.89, "learning_rate": 1.8335024633956977e-07, "logits/chosen": -3.015652656555176, "logits/rejected": -3.0437734127044678, "logps/chosen": -6.262088298797607, "logps/rejected": -285.2343444824219, "loss": 0.115, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2543913722038269, "rewards/margins": 2.7554309368133545, "rewards/rejected": -2.501039505004883, "step": 22240 }, { "epoch": 0.89, "learning_rate": 1.8204036358303173e-07, "logits/chosen": -3.0263094902038574, "logits/rejected": -3.0560860633850098, "logps/chosen": -0.18394215404987335, "logps/rejected": -289.3786926269531, "loss": 0.0568, "rewards/accuracies": 1.0, "rewards/chosen": 0.3148994743824005, "rewards/margins": 2.8495869636535645, "rewards/rejected": -2.5346875190734863, "step": 22250 }, { "epoch": 0.89, "learning_rate": 1.8073499980741426e-07, "logits/chosen": -3.0114026069641113, "logits/rejected": -3.0430662631988525, "logps/chosen": -3.1647439002990723, "logps/rejected": -289.07830810546875, "loss": 0.084, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2834862470626831, "rewards/margins": 2.8211569786071777, "rewards/rejected": -2.537670612335205, "step": 22260 }, { "epoch": 0.89, "learning_rate": 1.7943415755759168e-07, "logits/chosen": -3.0278680324554443, "logits/rejected": -3.0580785274505615, "logps/chosen": -1.4364006519317627, "logps/rejected": -289.2267150878906, "loss": 0.0652, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.3049456477165222, "rewards/margins": 2.8377280235290527, "rewards/rejected": -2.5327820777893066, "step": 22270 }, { "epoch": 0.89, "learning_rate": 1.781378393696226e-07, "logits/chosen": -3.023270606994629, "logits/rejected": -3.0493149757385254, "logps/chosen": -3.262986660003662, "logps/rejected": -286.460205078125, "loss": 0.0873, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2854081392288208, "rewards/margins": 2.7884647846221924, "rewards/rejected": -2.503056526184082, "step": 22280 }, { "epoch": 0.89, "learning_rate": 1.7684604777074427e-07, "logits/chosen": -2.9971721172332764, "logits/rejected": -3.0267584323883057, "logps/chosen": -3.156531810760498, "logps/rejected": -285.1638488769531, "loss": 0.0866, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2869463860988617, "rewards/margins": 2.782681941986084, "rewards/rejected": -2.4957354068756104, "step": 22290 }, { "epoch": 0.89, "learning_rate": 1.7555878527937164e-07, "logits/chosen": -3.0303852558135986, "logits/rejected": -3.0571093559265137, "logps/chosen": -3.8574867248535156, "logps/rejected": -288.6773681640625, "loss": 0.091, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.277972936630249, "rewards/margins": 2.8102240562438965, "rewards/rejected": -2.5322513580322266, "step": 22300 }, { "epoch": 0.89, "eval_logits/chosen": -3.0710256099700928, "eval_logits/rejected": -3.0961732864379883, "eval_logps/chosen": -0.14752694964408875, "eval_logps/rejected": -284.32196044921875, "eval_loss": 0.06034557893872261, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3151242733001709, "eval_rewards/margins": 2.7981882095336914, "eval_rewards/rejected": -2.4830641746520996, "eval_runtime": 2.5366, "eval_samples_per_second": 1.971, "eval_steps_per_second": 0.394, "step": 22300 }, { "epoch": 0.89, "learning_rate": 1.7427605440508837e-07, "logits/chosen": -3.0027120113372803, "logits/rejected": -3.032985210418701, "logps/chosen": -9.83898639678955, "logps/rejected": -278.5379333496094, "loss": 0.1506, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.21915800869464874, "rewards/margins": 2.653442144393921, "rewards/rejected": -2.434284210205078, "step": 22310 }, { "epoch": 0.89, "learning_rate": 1.7299785764864434e-07, "logits/chosen": -3.013721466064453, "logits/rejected": -3.0432403087615967, "logps/chosen": -3.1023507118225098, "logps/rejected": -288.1606140136719, "loss": 0.0844, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28683018684387207, "rewards/margins": 2.8106820583343506, "rewards/rejected": -2.5238518714904785, "step": 22320 }, { "epoch": 0.89, "learning_rate": 1.717241975019493e-07, "logits/chosen": -3.0356149673461914, "logits/rejected": -3.0646982192993164, "logps/chosen": -4.104119777679443, "logps/rejected": -286.8938903808594, "loss": 0.0894, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2756999135017395, "rewards/margins": 2.792503833770752, "rewards/rejected": -2.516803741455078, "step": 22330 }, { "epoch": 0.89, "learning_rate": 1.704550764480689e-07, "logits/chosen": -3.0076520442962646, "logits/rejected": -3.035611391067505, "logps/chosen": -6.272554397583008, "logps/rejected": -282.81781005859375, "loss": 0.1168, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2525036931037903, "rewards/margins": 2.728872537612915, "rewards/rejected": -2.4763686656951904, "step": 22340 }, { "epoch": 0.89, "learning_rate": 1.6919049696121957e-07, "logits/chosen": -2.9953742027282715, "logits/rejected": -3.023801326751709, "logps/chosen": -2.828618049621582, "logps/rejected": -286.053466796875, "loss": 0.0817, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2877464294433594, "rewards/margins": 2.792900800704956, "rewards/rejected": -2.5051543712615967, "step": 22350 }, { "epoch": 0.89, "learning_rate": 1.679304615067634e-07, "logits/chosen": -3.0300378799438477, "logits/rejected": -3.058952808380127, "logps/chosen": -0.2698937654495239, "logps/rejected": -290.2510681152344, "loss": 0.0576, "rewards/accuracies": 1.0, "rewards/chosen": 0.31571823358535767, "rewards/margins": 2.862407684326172, "rewards/rejected": -2.54668927192688, "step": 22360 }, { "epoch": 0.89, "learning_rate": 1.6667497254120506e-07, "logits/chosen": -3.0198018550872803, "logits/rejected": -3.0489115715026855, "logps/chosen": -3.0793840885162354, "logps/rejected": -288.2093811035156, "loss": 0.083, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2891962230205536, "rewards/margins": 2.8171744346618652, "rewards/rejected": -2.52797794342041, "step": 22370 }, { "epoch": 0.9, "learning_rate": 1.6542403251218308e-07, "logits/chosen": -3.0072569847106934, "logits/rejected": -3.035095691680908, "logps/chosen": -12.365407943725586, "logps/rejected": -277.8640441894531, "loss": 0.1748, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.19336764514446259, "rewards/margins": 2.618320941925049, "rewards/rejected": -2.424952983856201, "step": 22380 }, { "epoch": 0.9, "learning_rate": 1.6417764385846996e-07, "logits/chosen": -3.0105817317962646, "logits/rejected": -3.037363052368164, "logps/chosen": -7.593419075012207, "logps/rejected": -279.34051513671875, "loss": 0.1284, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.24095647037029266, "rewards/margins": 2.6803619861602783, "rewards/rejected": -2.4394052028656006, "step": 22390 }, { "epoch": 0.9, "learning_rate": 1.629358090099639e-07, "logits/chosen": -3.043687343597412, "logits/rejected": -3.0711119174957275, "logps/chosen": -1.2177555561065674, "logps/rejected": -289.3421325683594, "loss": 0.0612, "rewards/accuracies": 1.0, "rewards/chosen": 0.3050677478313446, "rewards/margins": 2.8422484397888184, "rewards/rejected": -2.5371804237365723, "step": 22400 }, { "epoch": 0.9, "eval_logits/chosen": -3.071587085723877, "eval_logits/rejected": -3.0968894958496094, "eval_logps/chosen": -0.17128124833106995, "eval_logps/rejected": -284.2958068847656, "eval_loss": 0.0603872649371624, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3148867189884186, "eval_rewards/margins": 2.7976889610290527, "eval_rewards/rejected": -2.482802391052246, "eval_runtime": 2.5418, "eval_samples_per_second": 1.967, "eval_steps_per_second": 0.393, "step": 22400 }, { "epoch": 0.9, "learning_rate": 1.6169853038768585e-07, "logits/chosen": -2.9764554500579834, "logits/rejected": -3.0094261169433594, "logps/chosen": -0.18952010571956635, "logps/rejected": -290.6593322753906, "loss": 0.0558, "rewards/accuracies": 1.0, "rewards/chosen": 0.3131345808506012, "rewards/margins": 2.865954637527466, "rewards/rejected": -2.5528197288513184, "step": 22410 }, { "epoch": 0.9, "learning_rate": 1.6046581040377317e-07, "logits/chosen": -3.021721363067627, "logits/rejected": -3.0526351928710938, "logps/chosen": -0.1989537924528122, "logps/rejected": -291.38836669921875, "loss": 0.0552, "rewards/accuracies": 1.0, "rewards/chosen": 0.31527119874954224, "rewards/margins": 2.875521183013916, "rewards/rejected": -2.5602500438690186, "step": 22420 }, { "epoch": 0.9, "learning_rate": 1.5923765146147656e-07, "logits/chosen": -3.016993522644043, "logits/rejected": -3.0463709831237793, "logps/chosen": -8.804555892944336, "logps/rejected": -277.10418701171875, "loss": 0.1441, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.23027610778808594, "rewards/margins": 2.6460978984832764, "rewards/rejected": -2.4158220291137695, "step": 22430 }, { "epoch": 0.9, "learning_rate": 1.5801405595515545e-07, "logits/chosen": -3.023838520050049, "logits/rejected": -3.050332546234131, "logps/chosen": -1.9149307012557983, "logps/rejected": -284.3644104003906, "loss": 0.0666, "rewards/accuracies": 1.0, "rewards/chosen": 0.3015586733818054, "rewards/margins": 2.7849180698394775, "rewards/rejected": -2.4833590984344482, "step": 22440 }, { "epoch": 0.9, "learning_rate": 1.567950262702714e-07, "logits/chosen": -3.0126242637634277, "logits/rejected": -3.0422523021698, "logps/chosen": -3.5187621116638184, "logps/rejected": -287.97216796875, "loss": 0.0876, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2823764383792877, "rewards/margins": 2.8056297302246094, "rewards/rejected": -2.5232529640197754, "step": 22450 }, { "epoch": 0.9, "learning_rate": 1.5558056478338523e-07, "logits/chosen": -3.0149741172790527, "logits/rejected": -3.0424811840057373, "logps/chosen": -3.829007625579834, "logps/rejected": -285.58734130859375, "loss": 0.0925, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.281181275844574, "rewards/margins": 2.7811179161071777, "rewards/rejected": -2.499937057495117, "step": 22460 }, { "epoch": 0.9, "learning_rate": 1.5437067386215172e-07, "logits/chosen": -2.9920401573181152, "logits/rejected": -3.021282196044922, "logps/chosen": -6.930137634277344, "logps/rejected": -282.6788024902344, "loss": 0.1264, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24585285782814026, "rewards/margins": 2.717242479324341, "rewards/rejected": -2.4713892936706543, "step": 22470 }, { "epoch": 0.9, "learning_rate": 1.5316535586531483e-07, "logits/chosen": -3.0271904468536377, "logits/rejected": -3.0566468238830566, "logps/chosen": -5.774171352386475, "logps/rejected": -281.85247802734375, "loss": 0.1044, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.25931817293167114, "rewards/margins": 2.7217857837677, "rewards/rejected": -2.462467670440674, "step": 22480 }, { "epoch": 0.9, "learning_rate": 1.5196461314270438e-07, "logits/chosen": -3.007305145263672, "logits/rejected": -3.0390548706054688, "logps/chosen": -3.303436756134033, "logps/rejected": -285.28338623046875, "loss": 0.0882, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28302258253097534, "rewards/margins": 2.779331922531128, "rewards/rejected": -2.496309280395508, "step": 22490 }, { "epoch": 0.9, "learning_rate": 1.507684480352292e-07, "logits/chosen": -3.024315357208252, "logits/rejected": -3.0533385276794434, "logps/chosen": -0.1941853016614914, "logps/rejected": -290.984375, "loss": 0.0555, "rewards/accuracies": 1.0, "rewards/chosen": 0.31636396050453186, "rewards/margins": 2.869879961013794, "rewards/rejected": -2.553515911102295, "step": 22500 }, { "epoch": 0.9, "eval_logits/chosen": -3.070605516433716, "eval_logits/rejected": -3.0965304374694824, "eval_logps/chosen": -0.17852500081062317, "eval_logps/rejected": -284.4779357910156, "eval_loss": 0.060304511338472366, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3148142695426941, "eval_rewards/margins": 2.7994377613067627, "eval_rewards/rejected": -2.484623432159424, "eval_runtime": 2.5396, "eval_samples_per_second": 1.969, "eval_steps_per_second": 0.394, "step": 22500 }, { "epoch": 0.9, "learning_rate": 1.495768628748745e-07, "logits/chosen": -3.0218453407287598, "logits/rejected": -3.048736095428467, "logps/chosen": -0.14829587936401367, "logps/rejected": -290.9877014160156, "loss": 0.0556, "rewards/accuracies": 1.0, "rewards/chosen": 0.31533706188201904, "rewards/margins": 2.869701623916626, "rewards/rejected": -2.5543646812438965, "step": 22510 }, { "epoch": 0.9, "learning_rate": 1.483898599846964e-07, "logits/chosen": -3.0455667972564697, "logits/rejected": -3.0726630687713623, "logps/chosen": -0.1983906328678131, "logps/rejected": -291.0821228027344, "loss": 0.0557, "rewards/accuracies": 1.0, "rewards/chosen": 0.3142072260379791, "rewards/margins": 2.8689169883728027, "rewards/rejected": -2.5547096729278564, "step": 22520 }, { "epoch": 0.9, "learning_rate": 1.4720744167881828e-07, "logits/chosen": -3.017357349395752, "logits/rejected": -3.0476503372192383, "logps/chosen": -0.19421811401844025, "logps/rejected": -293.0829162597656, "loss": 0.0543, "rewards/accuracies": 1.0, "rewards/chosen": 0.316925048828125, "rewards/margins": 2.8896846771240234, "rewards/rejected": -2.5727598667144775, "step": 22530 }, { "epoch": 0.9, "learning_rate": 1.460296102624248e-07, "logits/chosen": -3.022292375564575, "logits/rejected": -3.0546491146087646, "logps/chosen": -0.19528868794441223, "logps/rejected": -289.21099853515625, "loss": 0.0567, "rewards/accuracies": 1.0, "rewards/chosen": 0.3142591416835785, "rewards/margins": 2.85215425491333, "rewards/rejected": -2.5378949642181396, "step": 22540 }, { "epoch": 0.9, "learning_rate": 1.4485636803175828e-07, "logits/chosen": -3.02791166305542, "logits/rejected": -3.0558695793151855, "logps/chosen": -1.7697381973266602, "logps/rejected": -286.6495056152344, "loss": 0.0735, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.29700881242752075, "rewards/margins": 2.8113532066345215, "rewards/rejected": -2.5143439769744873, "step": 22550 }, { "epoch": 0.9, "learning_rate": 1.4368771727411496e-07, "logits/chosen": -2.999399185180664, "logits/rejected": -3.030857801437378, "logps/chosen": -0.1751411259174347, "logps/rejected": -291.25592041015625, "loss": 0.0554, "rewards/accuracies": 1.0, "rewards/chosen": 0.3171786665916443, "rewards/margins": 2.8725457191467285, "rewards/rejected": -2.5553669929504395, "step": 22560 }, { "epoch": 0.9, "learning_rate": 1.425236602678387e-07, "logits/chosen": -3.020651340484619, "logits/rejected": -3.0507683753967285, "logps/chosen": -3.8400890827178955, "logps/rejected": -287.63250732421875, "loss": 0.0911, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2777814567089081, "rewards/margins": 2.8029165267944336, "rewards/rejected": -2.525135040283203, "step": 22570 }, { "epoch": 0.9, "learning_rate": 1.4136419928231892e-07, "logits/chosen": -3.01467227935791, "logits/rejected": -3.04586124420166, "logps/chosen": -3.471571683883667, "logps/rejected": -283.95025634765625, "loss": 0.0886, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2820015251636505, "rewards/margins": 2.77006459236145, "rewards/rejected": -2.488062620162964, "step": 22580 }, { "epoch": 0.9, "learning_rate": 1.4020933657798385e-07, "logits/chosen": -3.0380334854125977, "logits/rejected": -3.0660743713378906, "logps/chosen": -3.8263556957244873, "logps/rejected": -287.6697082519531, "loss": 0.0914, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27950596809387207, "rewards/margins": 2.7971808910369873, "rewards/rejected": -2.5176749229431152, "step": 22590 }, { "epoch": 0.9, "learning_rate": 1.3905907440629752e-07, "logits/chosen": -3.0170862674713135, "logits/rejected": -3.0458643436431885, "logps/chosen": -8.176836967468262, "logps/rejected": -280.4187316894531, "loss": 0.1258, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.23409882187843323, "rewards/margins": 2.6873698234558105, "rewards/rejected": -2.45327091217041, "step": 22600 }, { "epoch": 0.9, "eval_logits/chosen": -3.070443630218506, "eval_logits/rejected": -3.0963385105133057, "eval_logps/chosen": -0.17200790345668793, "eval_logps/rejected": -284.54656982421875, "eval_loss": 0.0602252371609211, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.314879447221756, "eval_rewards/margins": 2.800189256668091, "eval_rewards/rejected": -2.4853100776672363, "eval_runtime": 2.5491, "eval_samples_per_second": 1.961, "eval_steps_per_second": 0.392, "step": 22600 }, { "epoch": 0.9, "learning_rate": 1.379134150097547e-07, "logits/chosen": -3.0244574546813965, "logits/rejected": -3.055354356765747, "logps/chosen": -4.27706241607666, "logps/rejected": -288.2788391113281, "loss": 0.0861, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2749953866004944, "rewards/margins": 2.8038904666900635, "rewards/rejected": -2.5288949012756348, "step": 22610 }, { "epoch": 0.9, "learning_rate": 1.3677236062187654e-07, "logits/chosen": -3.002315044403076, "logits/rejected": -3.029919147491455, "logps/chosen": -6.51849889755249, "logps/rejected": -282.3607482910156, "loss": 0.1197, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.25264573097229004, "rewards/margins": 2.7180991172790527, "rewards/rejected": -2.465453624725342, "step": 22620 }, { "epoch": 0.91, "learning_rate": 1.3563591346720806e-07, "logits/chosen": -2.9950127601623535, "logits/rejected": -3.0242390632629395, "logps/chosen": -3.2210114002227783, "logps/rejected": -288.29290771484375, "loss": 0.0838, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.286202996969223, "rewards/margins": 2.812091827392578, "rewards/rejected": -2.5258889198303223, "step": 22630 }, { "epoch": 0.91, "learning_rate": 1.345040757613103e-07, "logits/chosen": -3.0136666297912598, "logits/rejected": -3.044881582260132, "logps/chosen": -2.8804266452789307, "logps/rejected": -286.5202331542969, "loss": 0.0825, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2862790524959564, "rewards/margins": 2.8011364936828613, "rewards/rejected": -2.514857292175293, "step": 22640 }, { "epoch": 0.91, "learning_rate": 1.3337684971075932e-07, "logits/chosen": -3.003373622894287, "logits/rejected": -3.035926103591919, "logps/chosen": -3.2422237396240234, "logps/rejected": -288.0936279296875, "loss": 0.084, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2826620936393738, "rewards/margins": 2.8085386753082275, "rewards/rejected": -2.525876522064209, "step": 22650 }, { "epoch": 0.91, "learning_rate": 1.3225423751313942e-07, "logits/chosen": -3.0377652645111084, "logits/rejected": -3.0679969787597656, "logps/chosen": -3.1988298892974854, "logps/rejected": -285.6497497558594, "loss": 0.085, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28594645857810974, "rewards/margins": 2.7849581241607666, "rewards/rejected": -2.499011754989624, "step": 22660 }, { "epoch": 0.91, "learning_rate": 1.311362413570408e-07, "logits/chosen": -3.0253729820251465, "logits/rejected": -3.0505120754241943, "logps/chosen": -12.893452644348145, "logps/rejected": -278.5771789550781, "loss": 0.1804, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.188338041305542, "rewards/margins": 2.6212265491485596, "rewards/rejected": -2.4328885078430176, "step": 22670 }, { "epoch": 0.91, "learning_rate": 1.300228634220546e-07, "logits/chosen": -2.978994846343994, "logits/rejected": -3.011591911315918, "logps/chosen": -6.130929470062256, "logps/rejected": -283.5531921386719, "loss": 0.1126, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.25442811846733093, "rewards/margins": 2.738564968109131, "rewards/rejected": -2.4841370582580566, "step": 22680 }, { "epoch": 0.91, "learning_rate": 1.2891410587876714e-07, "logits/chosen": -3.0079166889190674, "logits/rejected": -3.0346055030822754, "logps/chosen": -14.501592636108398, "logps/rejected": -274.71905517578125, "loss": 0.1988, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.17123597860336304, "rewards/margins": 2.5652568340301514, "rewards/rejected": -2.3940207958221436, "step": 22690 }, { "epoch": 0.91, "learning_rate": 1.278099708887587e-07, "logits/chosen": -3.0163960456848145, "logits/rejected": -3.0467422008514404, "logps/chosen": -3.70232892036438, "logps/rejected": -287.42218017578125, "loss": 0.091, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2779483199119568, "rewards/margins": 2.800114154815674, "rewards/rejected": -2.5221660137176514, "step": 22700 }, { "epoch": 0.91, "eval_logits/chosen": -3.0708885192871094, "eval_logits/rejected": -3.09663462638855, "eval_logps/chosen": -0.16260509192943573, "eval_logps/rejected": -284.28704833984375, "eval_loss": 0.06037154793739319, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31497350335121155, "eval_rewards/margins": 2.7976880073547363, "eval_rewards/rejected": -2.482714891433716, "eval_runtime": 2.5363, "eval_samples_per_second": 1.971, "eval_steps_per_second": 0.394, "step": 22700 }, { "epoch": 0.91, "learning_rate": 1.2671046060459685e-07, "logits/chosen": -3.0284976959228516, "logits/rejected": -3.0561766624450684, "logps/chosen": -0.21153683960437775, "logps/rejected": -290.4297180175781, "loss": 0.056, "rewards/accuracies": 1.0, "rewards/chosen": 0.31457096338272095, "rewards/margins": 2.863022804260254, "rewards/rejected": -2.5484519004821777, "step": 22710 }, { "epoch": 0.91, "learning_rate": 1.2561557716983308e-07, "logits/chosen": -3.012664318084717, "logits/rejected": -3.042249917984009, "logps/chosen": -3.180988311767578, "logps/rejected": -285.80780029296875, "loss": 0.0848, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2825668752193451, "rewards/margins": 2.791029214859009, "rewards/rejected": -2.5084621906280518, "step": 22720 }, { "epoch": 0.91, "learning_rate": 1.2452532271899853e-07, "logits/chosen": -3.048030376434326, "logits/rejected": -3.077200412750244, "logps/chosen": -2.519275188446045, "logps/rejected": -289.94659423828125, "loss": 0.0663, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2931092381477356, "rewards/margins": 2.835845708847046, "rewards/rejected": -2.542736768722534, "step": 22730 }, { "epoch": 0.91, "learning_rate": 1.2343969937759992e-07, "logits/chosen": -3.0319714546203613, "logits/rejected": -3.0599451065063477, "logps/chosen": -0.21628041565418243, "logps/rejected": -290.6517639160156, "loss": 0.0559, "rewards/accuracies": 1.0, "rewards/chosen": 0.3153747320175171, "rewards/margins": 2.8658862113952637, "rewards/rejected": -2.550511598587036, "step": 22740 }, { "epoch": 0.91, "learning_rate": 1.223587092621162e-07, "logits/chosen": -3.0134897232055664, "logits/rejected": -3.0438685417175293, "logps/chosen": -0.18700259923934937, "logps/rejected": -291.57867431640625, "loss": 0.0555, "rewards/accuracies": 1.0, "rewards/chosen": 0.31475773453712463, "rewards/margins": 2.8705499172210693, "rewards/rejected": -2.5557923316955566, "step": 22750 }, { "epoch": 0.91, "learning_rate": 1.2128235447999181e-07, "logits/chosen": -3.024871826171875, "logits/rejected": -3.0515356063842773, "logps/chosen": -0.6098474264144897, "logps/rejected": -289.90423583984375, "loss": 0.0582, "rewards/accuracies": 1.0, "rewards/chosen": 0.3119294047355652, "rewards/margins": 2.8547654151916504, "rewards/rejected": -2.5428357124328613, "step": 22760 }, { "epoch": 0.91, "learning_rate": 1.2021063712963715e-07, "logits/chosen": -3.0571112632751465, "logits/rejected": -3.083038330078125, "logps/chosen": -10.335386276245117, "logps/rejected": -278.37396240234375, "loss": 0.1573, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.21347343921661377, "rewards/margins": 2.6454837322235107, "rewards/rejected": -2.4320101737976074, "step": 22770 }, { "epoch": 0.91, "learning_rate": 1.1914355930041838e-07, "logits/chosen": -3.0159788131713867, "logits/rejected": -3.044956684112549, "logps/chosen": -0.30940112471580505, "logps/rejected": -290.05889892578125, "loss": 0.0565, "rewards/accuracies": 1.0, "rewards/chosen": 0.3127928376197815, "rewards/margins": 2.857363224029541, "rewards/rejected": -2.5445704460144043, "step": 22780 }, { "epoch": 0.91, "learning_rate": 1.180811230726589e-07, "logits/chosen": -3.0184648036956787, "logits/rejected": -3.048469066619873, "logps/chosen": -5.907876014709473, "logps/rejected": -285.5011901855469, "loss": 0.1117, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.25892236828804016, "rewards/margins": 2.7566094398498535, "rewards/rejected": -2.4976868629455566, "step": 22790 }, { "epoch": 0.91, "learning_rate": 1.1702333051763271e-07, "logits/chosen": -3.008793592453003, "logits/rejected": -3.0386810302734375, "logps/chosen": -4.015249729156494, "logps/rejected": -284.8692321777344, "loss": 0.0939, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27795082330703735, "rewards/margins": 2.7698235511779785, "rewards/rejected": -2.491872787475586, "step": 22800 }, { "epoch": 0.91, "eval_logits/chosen": -3.070729970932007, "eval_logits/rejected": -3.096435785293579, "eval_logps/chosen": -0.1687033474445343, "eval_logps/rejected": -284.2431640625, "eval_loss": 0.06039755418896675, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31491249799728394, "eval_rewards/margins": 2.7971885204315186, "eval_rewards/rejected": -2.48227596282959, "eval_runtime": 2.5381, "eval_samples_per_second": 1.97, "eval_steps_per_second": 0.394, "step": 22800 }, { "epoch": 0.91, "learning_rate": 1.159701836975602e-07, "logits/chosen": -2.9942939281463623, "logits/rejected": -3.022256374359131, "logps/chosen": -9.105826377868652, "logps/rejected": -281.8519287109375, "loss": 0.1421, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.22863879799842834, "rewards/margins": 2.6902263164520264, "rewards/rejected": -2.461587429046631, "step": 22810 }, { "epoch": 0.91, "learning_rate": 1.1492168466560538e-07, "logits/chosen": -3.0274181365966797, "logits/rejected": -3.057366132736206, "logps/chosen": -0.34510236978530884, "logps/rejected": -287.1620788574219, "loss": 0.0589, "rewards/accuracies": 1.0, "rewards/chosen": 0.31520482897758484, "rewards/margins": 2.8313441276550293, "rewards/rejected": -2.516139507293701, "step": 22820 }, { "epoch": 0.91, "learning_rate": 1.1387783546587011e-07, "logits/chosen": -3.031829357147217, "logits/rejected": -3.0582451820373535, "logps/chosen": -5.667557716369629, "logps/rejected": -285.76458740234375, "loss": 0.0982, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.26107192039489746, "rewards/margins": 2.76344633102417, "rewards/rejected": -2.5023741722106934, "step": 22830 }, { "epoch": 0.91, "learning_rate": 1.1283863813339263e-07, "logits/chosen": -3.0126893520355225, "logits/rejected": -3.041861057281494, "logps/chosen": -3.654362440109253, "logps/rejected": -287.23321533203125, "loss": 0.0889, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2823767066001892, "rewards/margins": 2.7983760833740234, "rewards/rejected": -2.5159990787506104, "step": 22840 }, { "epoch": 0.91, "learning_rate": 1.1180409469414094e-07, "logits/chosen": -3.027944326400757, "logits/rejected": -3.052715539932251, "logps/chosen": -9.436028480529785, "logps/rejected": -280.9776306152344, "loss": 0.1341, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.22170230746269226, "rewards/margins": 2.679924488067627, "rewards/rejected": -2.458221912384033, "step": 22850 }, { "epoch": 0.91, "learning_rate": 1.1077420716501031e-07, "logits/chosen": -3.043311595916748, "logits/rejected": -3.0692572593688965, "logps/chosen": -0.17908911406993866, "logps/rejected": -290.8965759277344, "loss": 0.0557, "rewards/accuracies": 1.0, "rewards/chosen": 0.3171611428260803, "rewards/margins": 2.868079423904419, "rewards/rejected": -2.5509181022644043, "step": 22860 }, { "epoch": 0.91, "learning_rate": 1.0974897755381936e-07, "logits/chosen": -3.0184783935546875, "logits/rejected": -3.0485305786132812, "logps/chosen": -6.285985946655273, "logps/rejected": -282.1624755859375, "loss": 0.1215, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2528327405452728, "rewards/margins": 2.7247447967529297, "rewards/rejected": -2.471912145614624, "step": 22870 }, { "epoch": 0.92, "learning_rate": 1.087284078593051e-07, "logits/chosen": -3.0045971870422363, "logits/rejected": -3.036738872528076, "logps/chosen": -0.19190986454486847, "logps/rejected": -291.68359375, "loss": 0.0552, "rewards/accuracies": 1.0, "rewards/chosen": 0.31704097986221313, "rewards/margins": 2.87631893157959, "rewards/rejected": -2.5592777729034424, "step": 22880 }, { "epoch": 0.92, "learning_rate": 1.0771250007112155e-07, "logits/chosen": -3.0366721153259277, "logits/rejected": -3.0640430450439453, "logps/chosen": -8.180059432983398, "logps/rejected": -280.8228759765625, "loss": 0.1355, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.23714232444763184, "rewards/margins": 2.691737651824951, "rewards/rejected": -2.4545950889587402, "step": 22890 }, { "epoch": 0.92, "learning_rate": 1.067012561698319e-07, "logits/chosen": -3.0147311687469482, "logits/rejected": -3.044553518295288, "logps/chosen": -0.26309287548065186, "logps/rejected": -288.7773742675781, "loss": 0.057, "rewards/accuracies": 1.0, "rewards/chosen": 0.3133823573589325, "rewards/margins": 2.8479156494140625, "rewards/rejected": -2.5345335006713867, "step": 22900 }, { "epoch": 0.92, "eval_logits/chosen": -3.071131706237793, "eval_logits/rejected": -3.097275733947754, "eval_logps/chosen": -0.17709887027740479, "eval_logps/rejected": -284.3765869140625, "eval_loss": 0.060340188443660736, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3148285448551178, "eval_rewards/margins": 2.798438549041748, "eval_rewards/rejected": -2.483610153198242, "eval_runtime": 2.5348, "eval_samples_per_second": 1.973, "eval_steps_per_second": 0.395, "step": 22900 }, { "epoch": 0.92, "learning_rate": 1.0569467812690832e-07, "logits/chosen": -2.990438938140869, "logits/rejected": -3.020517587661743, "logps/chosen": -7.34426736831665, "logps/rejected": -283.54071044921875, "loss": 0.1267, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2441934049129486, "rewards/margins": 2.7263731956481934, "rewards/rejected": -2.482179641723633, "step": 22910 }, { "epoch": 0.92, "learning_rate": 1.0469276790472604e-07, "logits/chosen": -3.024697780609131, "logits/rejected": -3.052950620651245, "logps/chosen": -6.9980292320251465, "logps/rejected": -283.162841796875, "loss": 0.1251, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24830901622772217, "rewards/margins": 2.7271835803985596, "rewards/rejected": -2.478874683380127, "step": 22920 }, { "epoch": 0.92, "learning_rate": 1.0369552745656014e-07, "logits/chosen": -3.0212597846984863, "logits/rejected": -3.052022933959961, "logps/chosen": -1.6727781295776367, "logps/rejected": -291.38446044921875, "loss": 0.0599, "rewards/accuracies": 1.0, "rewards/chosen": 0.3009779453277588, "rewards/margins": 2.8644204139709473, "rewards/rejected": -2.5634422302246094, "step": 22930 }, { "epoch": 0.92, "learning_rate": 1.0270295872658265e-07, "logits/chosen": -3.019272804260254, "logits/rejected": -3.0497727394104004, "logps/chosen": -2.4269649982452393, "logps/rejected": -289.98114013671875, "loss": 0.0722, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.29368215799331665, "rewards/margins": 2.8359434604644775, "rewards/rejected": -2.5422613620758057, "step": 22940 }, { "epoch": 0.92, "learning_rate": 1.0171506364985622e-07, "logits/chosen": -3.0301480293273926, "logits/rejected": -3.0578644275665283, "logps/chosen": -2.4191946983337402, "logps/rejected": -288.332763671875, "loss": 0.0766, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2920234799385071, "rewards/margins": 2.8203341960906982, "rewards/rejected": -2.528310537338257, "step": 22950 }, { "epoch": 0.92, "learning_rate": 1.0073184415233334e-07, "logits/chosen": -3.041109561920166, "logits/rejected": -3.0712082386016846, "logps/chosen": -0.16911455988883972, "logps/rejected": -291.9952087402344, "loss": 0.0549, "rewards/accuracies": 1.0, "rewards/chosen": 0.3155081868171692, "rewards/margins": 2.879424810409546, "rewards/rejected": -2.5639166831970215, "step": 22960 }, { "epoch": 0.92, "learning_rate": 9.975330215085066e-08, "logits/chosen": -3.005127429962158, "logits/rejected": -3.0356221199035645, "logps/chosen": -3.552525043487549, "logps/rejected": -288.19586181640625, "loss": 0.0884, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2816142439842224, "rewards/margins": 2.81182599067688, "rewards/rejected": -2.5302116870880127, "step": 22970 }, { "epoch": 0.92, "learning_rate": 9.877943955312552e-08, "logits/chosen": -3.029170274734497, "logits/rejected": -3.0560250282287598, "logps/chosen": -3.8489108085632324, "logps/rejected": -285.90380859375, "loss": 0.096, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.277651846408844, "rewards/margins": 2.786273956298828, "rewards/rejected": -2.508622169494629, "step": 22980 }, { "epoch": 0.92, "learning_rate": 9.781025825775392e-08, "logits/chosen": -3.0115551948547363, "logits/rejected": -3.0409932136535645, "logps/chosen": -2.981943130493164, "logps/rejected": -287.2720642089844, "loss": 0.0839, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2845291197299957, "rewards/margins": 2.80531644821167, "rewards/rejected": -2.5207877159118652, "step": 22990 }, { "epoch": 0.92, "learning_rate": 9.684576015420277e-08, "logits/chosen": -3.0156967639923096, "logits/rejected": -3.0449185371398926, "logps/chosen": -7.124350070953369, "logps/rejected": -283.8114318847656, "loss": 0.1252, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24289965629577637, "rewards/margins": 2.7302823066711426, "rewards/rejected": -2.487382411956787, "step": 23000 }, { "epoch": 0.92, "eval_logits/chosen": -3.0702741146087646, "eval_logits/rejected": -3.0956432819366455, "eval_logps/chosen": -0.1665695160627365, "eval_logps/rejected": -284.36602783203125, "eval_loss": 0.060309089720249176, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3149338364601135, "eval_rewards/margins": 2.798438549041748, "eval_rewards/rejected": -2.4835045337677, "eval_runtime": 2.5345, "eval_samples_per_second": 1.973, "eval_steps_per_second": 0.395, "step": 23000 }, { "epoch": 0.92, "learning_rate": 9.588594712281185e-08, "logits/chosen": -3.017014741897583, "logits/rejected": -3.045954465866089, "logps/chosen": -1.8901395797729492, "logps/rejected": -290.9639892578125, "loss": 0.0588, "rewards/accuracies": 1.0, "rewards/chosen": 0.298106849193573, "rewards/margins": 2.8554601669311523, "rewards/rejected": -2.5573530197143555, "step": 23010 }, { "epoch": 0.92, "learning_rate": 9.493082103478519e-08, "logits/chosen": -3.0094165802001953, "logits/rejected": -3.041088819503784, "logps/chosen": -3.875527858734131, "logps/rejected": -287.30010986328125, "loss": 0.0918, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27701619267463684, "rewards/margins": 2.8010711669921875, "rewards/rejected": -2.524055004119873, "step": 23020 }, { "epoch": 0.92, "learning_rate": 9.398038375219048e-08, "logits/chosen": -2.9881253242492676, "logits/rejected": -3.014308214187622, "logps/chosen": -14.652200698852539, "logps/rejected": -272.0647888183594, "loss": 0.2, "rewards/accuracies": 0.9375, "rewards/chosen": 0.16889925301074982, "rewards/margins": 2.5380046367645264, "rewards/rejected": -2.369105339050293, "step": 23030 }, { "epoch": 0.92, "learning_rate": 9.303463712795308e-08, "logits/chosen": -3.0367815494537354, "logits/rejected": -3.0661821365356445, "logps/chosen": -1.9787155389785767, "logps/rejected": -290.2550048828125, "loss": 0.0609, "rewards/accuracies": 1.0, "rewards/chosen": 0.29652976989746094, "rewards/margins": 2.844179391860962, "rewards/rejected": -2.54764986038208, "step": 23040 }, { "epoch": 0.92, "learning_rate": 9.209358300585474e-08, "logits/chosen": -3.0058069229125977, "logits/rejected": -3.0352227687835693, "logps/chosen": -2.5035147666931152, "logps/rejected": -287.84814453125, "loss": 0.0761, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2930963635444641, "rewards/margins": 2.8165793418884277, "rewards/rejected": -2.5234830379486084, "step": 23050 }, { "epoch": 0.92, "learning_rate": 9.115722322052878e-08, "logits/chosen": -3.0013349056243896, "logits/rejected": -3.0337860584259033, "logps/chosen": -3.8682079315185547, "logps/rejected": -286.09112548828125, "loss": 0.0932, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27585428953170776, "rewards/margins": 2.7844977378845215, "rewards/rejected": -2.508643627166748, "step": 23060 }, { "epoch": 0.92, "learning_rate": 9.022555959745604e-08, "logits/chosen": -3.0197620391845703, "logits/rejected": -3.0488696098327637, "logps/chosen": -8.462364196777344, "logps/rejected": -281.5255126953125, "loss": 0.125, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.22954249382019043, "rewards/margins": 2.7003164291381836, "rewards/rejected": -2.470773696899414, "step": 23070 }, { "epoch": 0.92, "learning_rate": 8.929859395296365e-08, "logits/chosen": -3.0099196434020996, "logits/rejected": -3.0413124561309814, "logps/chosen": -0.25650277733802795, "logps/rejected": -289.2255859375, "loss": 0.0567, "rewards/accuracies": 1.0, "rewards/chosen": 0.31443530321121216, "rewards/margins": 2.8527438640594482, "rewards/rejected": -2.5383083820343018, "step": 23080 }, { "epoch": 0.92, "learning_rate": 8.837632809421681e-08, "logits/chosen": -3.0232949256896973, "logits/rejected": -3.0530800819396973, "logps/chosen": -3.12538480758667, "logps/rejected": -289.41363525390625, "loss": 0.0841, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.284773588180542, "rewards/margins": 2.8251988887786865, "rewards/rejected": -2.5404255390167236, "step": 23090 }, { "epoch": 0.92, "learning_rate": 8.745876381922147e-08, "logits/chosen": -2.998906135559082, "logits/rejected": -3.0255751609802246, "logps/chosen": -14.849409103393555, "logps/rejected": -272.06939697265625, "loss": 0.2019, "rewards/accuracies": 0.9375, "rewards/chosen": 0.16914670169353485, "rewards/margins": 2.5369009971618652, "rewards/rejected": -2.3677544593811035, "step": 23100 }, { "epoch": 0.92, "eval_logits/chosen": -3.071352005004883, "eval_logits/rejected": -3.096193552017212, "eval_logps/chosen": -0.19367049634456635, "eval_logps/rejected": -284.29315185546875, "eval_loss": 0.06039261817932129, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3146628141403198, "eval_rewards/margins": 2.797438383102417, "eval_rewards/rejected": -2.4827756881713867, "eval_runtime": 2.5423, "eval_samples_per_second": 1.967, "eval_steps_per_second": 0.393, "step": 23100 }, { "epoch": 0.92, "learning_rate": 8.654590291681531e-08, "logits/chosen": -3.0451316833496094, "logits/rejected": -3.072139263153076, "logps/chosen": -0.27428311109542847, "logps/rejected": -290.52447509765625, "loss": 0.056, "rewards/accuracies": 1.0, "rewards/chosen": 0.3138781785964966, "rewards/margins": 2.864424228668213, "rewards/rejected": -2.550546169281006, "step": 23110 }, { "epoch": 0.92, "learning_rate": 8.563774716666779e-08, "logits/chosen": -3.0085442066192627, "logits/rejected": -3.0388553142547607, "logps/chosen": -0.1956210732460022, "logps/rejected": -291.12152099609375, "loss": 0.0554, "rewards/accuracies": 1.0, "rewards/chosen": 0.315712034702301, "rewards/margins": 2.8723866939544678, "rewards/rejected": -2.5566747188568115, "step": 23120 }, { "epoch": 0.93, "learning_rate": 8.473429833927488e-08, "logits/chosen": -3.000231981277466, "logits/rejected": -3.0325827598571777, "logps/chosen": -0.221364825963974, "logps/rejected": -291.291259765625, "loss": 0.0555, "rewards/accuracies": 1.0, "rewards/chosen": 0.31610676646232605, "rewards/margins": 2.872274160385132, "rewards/rejected": -2.5561676025390625, "step": 23130 }, { "epoch": 0.93, "learning_rate": 8.383555819595601e-08, "logits/chosen": -3.020601272583008, "logits/rejected": -3.051182985305786, "logps/chosen": -0.1966492235660553, "logps/rejected": -291.98529052734375, "loss": 0.0549, "rewards/accuracies": 1.0, "rewards/chosen": 0.3151650130748749, "rewards/margins": 2.8812882900238037, "rewards/rejected": -2.5661234855651855, "step": 23140 }, { "epoch": 0.93, "learning_rate": 8.294152848885156e-08, "logits/chosen": -3.041090726852417, "logits/rejected": -3.068662405014038, "logps/chosen": -10.246706008911133, "logps/rejected": -280.76263427734375, "loss": 0.1554, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.21416978538036346, "rewards/margins": 2.6694228649139404, "rewards/rejected": -2.4552531242370605, "step": 23150 }, { "epoch": 0.93, "learning_rate": 8.205221096091787e-08, "logits/chosen": -3.012775421142578, "logits/rejected": -3.0442569255828857, "logps/chosen": -1.4360743761062622, "logps/rejected": -285.38482666015625, "loss": 0.0695, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.3014332354068756, "rewards/margins": 2.800518751144409, "rewards/rejected": -2.4990856647491455, "step": 23160 }, { "epoch": 0.93, "learning_rate": 8.116760734592527e-08, "logits/chosen": -3.0125057697296143, "logits/rejected": -3.0407299995422363, "logps/chosen": -3.8139686584472656, "logps/rejected": -287.0626525878906, "loss": 0.0914, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27619075775146484, "rewards/margins": 2.7934341430664062, "rewards/rejected": -2.5172438621520996, "step": 23170 }, { "epoch": 0.93, "learning_rate": 8.028771936845343e-08, "logits/chosen": -3.006627321243286, "logits/rejected": -3.036668300628662, "logps/chosen": -3.8760623931884766, "logps/rejected": -289.342041015625, "loss": 0.0901, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27834945917129517, "rewards/margins": 2.8238742351531982, "rewards/rejected": -2.545525074005127, "step": 23180 }, { "epoch": 0.93, "learning_rate": 7.941254874388904e-08, "logits/chosen": -3.0212550163269043, "logits/rejected": -3.048081874847412, "logps/chosen": -6.516329288482666, "logps/rejected": -282.3587646484375, "loss": 0.1231, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.25263556838035583, "rewards/margins": 2.723583459854126, "rewards/rejected": -2.470947504043579, "step": 23190 }, { "epoch": 0.93, "learning_rate": 7.854209717842231e-08, "logits/chosen": -3.012183904647827, "logits/rejected": -3.0420632362365723, "logps/chosen": -2.98211669921875, "logps/rejected": -282.53167724609375, "loss": 0.0905, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.28928762674331665, "rewards/margins": 2.751723051071167, "rewards/rejected": -2.4624359607696533, "step": 23200 }, { "epoch": 0.93, "eval_logits/chosen": -3.071277141571045, "eval_logits/rejected": -3.096550703048706, "eval_logps/chosen": -0.1704237163066864, "eval_logps/rejected": -284.49481201171875, "eval_loss": 0.06025145202875137, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31489530205726624, "eval_rewards/margins": 2.7996878623962402, "eval_rewards/rejected": -2.484792709350586, "eval_runtime": 2.5354, "eval_samples_per_second": 1.972, "eval_steps_per_second": 0.394, "step": 23200 }, { "epoch": 0.93, "learning_rate": 7.767636636904274e-08, "logits/chosen": -3.01371693611145, "logits/rejected": -3.039921522140503, "logps/chosen": -7.00634765625, "logps/rejected": -282.5209655761719, "loss": 0.1238, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24625182151794434, "rewards/margins": 2.719392776489258, "rewards/rejected": -2.4731407165527344, "step": 23210 }, { "epoch": 0.93, "learning_rate": 7.681535800353717e-08, "logits/chosen": -2.9875638484954834, "logits/rejected": -3.0188088417053223, "logps/chosen": -0.30196526646614075, "logps/rejected": -289.1764221191406, "loss": 0.0568, "rewards/accuracies": 1.0, "rewards/chosen": 0.31163591146469116, "rewards/margins": 2.8528058528900146, "rewards/rejected": -2.541170120239258, "step": 23220 }, { "epoch": 0.93, "learning_rate": 7.595907376048512e-08, "logits/chosen": -3.0200271606445312, "logits/rejected": -3.0487751960754395, "logps/chosen": -5.985312461853027, "logps/rejected": -282.18658447265625, "loss": 0.1138, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.25853684544563293, "rewards/margins": 2.7269256114959717, "rewards/rejected": -2.468388319015503, "step": 23230 }, { "epoch": 0.93, "learning_rate": 7.510751530925675e-08, "logits/chosen": -3.011378288269043, "logits/rejected": -3.0406272411346436, "logps/chosen": -6.723675727844238, "logps/rejected": -284.83087158203125, "loss": 0.1207, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2485291063785553, "rewards/margins": 2.7438666820526123, "rewards/rejected": -2.495337963104248, "step": 23240 }, { "epoch": 0.93, "learning_rate": 7.426068431000883e-08, "logits/chosen": -2.9905447959899902, "logits/rejected": -3.022408962249756, "logps/chosen": -0.1908080130815506, "logps/rejected": -290.1520080566406, "loss": 0.0563, "rewards/accuracies": 1.0, "rewards/chosen": 0.31498318910598755, "rewards/margins": 2.859938383102417, "rewards/rejected": -2.5449554920196533, "step": 23250 }, { "epoch": 0.93, "learning_rate": 7.341858241368182e-08, "logits/chosen": -3.030778169631958, "logits/rejected": -3.0572030544281006, "logps/chosen": -9.119039535522461, "logps/rejected": -280.4582824707031, "loss": 0.1459, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.22632715106010437, "rewards/margins": 2.676298141479492, "rewards/rejected": -2.4499707221984863, "step": 23260 }, { "epoch": 0.93, "learning_rate": 7.258121126199614e-08, "logits/chosen": -3.0136539936065674, "logits/rejected": -3.044630289077759, "logps/chosen": -0.3782525658607483, "logps/rejected": -288.8850402832031, "loss": 0.0573, "rewards/accuracies": 1.0, "rewards/chosen": 0.31381145119667053, "rewards/margins": 2.8460142612457275, "rewards/rejected": -2.532202959060669, "step": 23270 }, { "epoch": 0.93, "learning_rate": 7.174857248745004e-08, "logits/chosen": -3.0109899044036865, "logits/rejected": -3.0440170764923096, "logps/chosen": -0.18004289269447327, "logps/rejected": -290.78106689453125, "loss": 0.0558, "rewards/accuracies": 1.0, "rewards/chosen": 0.3153030276298523, "rewards/margins": 2.8653945922851562, "rewards/rejected": -2.5500919818878174, "step": 23280 }, { "epoch": 0.93, "learning_rate": 7.092066771331507e-08, "logits/chosen": -3.0440680980682373, "logits/rejected": -3.0732455253601074, "logps/chosen": -0.2119254618883133, "logps/rejected": -291.4216613769531, "loss": 0.0553, "rewards/accuracies": 1.0, "rewards/chosen": 0.31413474678993225, "rewards/margins": 2.8744089603424072, "rewards/rejected": -2.560274362564087, "step": 23290 }, { "epoch": 0.93, "learning_rate": 7.009749855363457e-08, "logits/chosen": -3.027690887451172, "logits/rejected": -3.0554018020629883, "logps/chosen": -5.98476505279541, "logps/rejected": -284.8565673828125, "loss": 0.1121, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.25554051995277405, "rewards/margins": 2.754642963409424, "rewards/rejected": -2.4991025924682617, "step": 23300 }, { "epoch": 0.93, "eval_logits/chosen": -3.0709500312805176, "eval_logits/rejected": -3.0955400466918945, "eval_logps/chosen": -0.16695435345172882, "eval_logps/rejected": -284.5164794921875, "eval_loss": 0.06023123115301132, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3149299919605255, "eval_rewards/margins": 2.799938678741455, "eval_rewards/rejected": -2.485008716583252, "eval_runtime": 2.5419, "eval_samples_per_second": 1.967, "eval_steps_per_second": 0.393, "step": 23300 }, { "epoch": 0.93, "learning_rate": 6.927906661321815e-08, "logits/chosen": -2.99202036857605, "logits/rejected": -3.022650718688965, "logps/chosen": -3.631866455078125, "logps/rejected": -286.8922424316406, "loss": 0.0906, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27958276867866516, "rewards/margins": 2.7969326972961426, "rewards/rejected": -2.517350435256958, "step": 23310 }, { "epoch": 0.93, "learning_rate": 6.846537348764116e-08, "logits/chosen": -3.012507677078247, "logits/rejected": -3.040789842605591, "logps/chosen": -3.787322521209717, "logps/rejected": -289.08770751953125, "loss": 0.0901, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.280433714389801, "rewards/margins": 2.8162832260131836, "rewards/rejected": -2.5358498096466064, "step": 23320 }, { "epoch": 0.93, "learning_rate": 6.765642076323992e-08, "logits/chosen": -3.0153908729553223, "logits/rejected": -3.0423362255096436, "logps/chosen": -9.101587295532227, "logps/rejected": -280.9149475097656, "loss": 0.1449, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.22471782565116882, "rewards/margins": 2.6831250190734863, "rewards/rejected": -2.458407163619995, "step": 23330 }, { "epoch": 0.93, "learning_rate": 6.685221001710957e-08, "logits/chosen": -3.005058765411377, "logits/rejected": -3.0353496074676514, "logps/chosen": -3.2591347694396973, "logps/rejected": -283.9668884277344, "loss": 0.0905, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2818176746368408, "rewards/margins": 2.777228832244873, "rewards/rejected": -2.495410919189453, "step": 23340 }, { "epoch": 0.93, "learning_rate": 6.605274281709929e-08, "logits/chosen": -3.028595447540283, "logits/rejected": -3.059267520904541, "logps/chosen": -0.17623743414878845, "logps/rejected": -291.789794921875, "loss": 0.0549, "rewards/accuracies": 1.0, "rewards/chosen": 0.31683725118637085, "rewards/margins": 2.8792102336883545, "rewards/rejected": -2.562372922897339, "step": 23350 }, { "epoch": 0.93, "learning_rate": 6.525802072181204e-08, "logits/chosen": -3.010333299636841, "logits/rejected": -3.0413575172424316, "logps/chosen": -6.734272003173828, "logps/rejected": -282.7337951660156, "loss": 0.1219, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2484370470046997, "rewards/margins": 2.7216382026672363, "rewards/rejected": -2.473201274871826, "step": 23360 }, { "epoch": 0.93, "learning_rate": 6.446804528059874e-08, "logits/chosen": -3.00950288772583, "logits/rejected": -3.0380756855010986, "logps/chosen": -0.2528027892112732, "logps/rejected": -291.5843811035156, "loss": 0.0554, "rewards/accuracies": 1.0, "rewards/chosen": 0.3134761452674866, "rewards/margins": 2.875542402267456, "rewards/rejected": -2.5620663166046143, "step": 23370 }, { "epoch": 0.94, "learning_rate": 6.368281803355692e-08, "logits/chosen": -3.0094738006591797, "logits/rejected": -3.04034686088562, "logps/chosen": -2.4063222408294678, "logps/rejected": -285.5948791503906, "loss": 0.0777, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2921940088272095, "rewards/margins": 2.794438600540161, "rewards/rejected": -2.502244710922241, "step": 23380 }, { "epoch": 0.94, "learning_rate": 6.29023405115281e-08, "logits/chosen": -3.012416124343872, "logits/rejected": -3.0411808490753174, "logps/chosen": -6.885917663574219, "logps/rejected": -282.49615478515625, "loss": 0.1225, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24851150810718536, "rewards/margins": 2.719728946685791, "rewards/rejected": -2.471217393875122, "step": 23390 }, { "epoch": 0.94, "learning_rate": 6.212661423609184e-08, "logits/chosen": -3.0040700435638428, "logits/rejected": -3.0309460163116455, "logps/chosen": -0.2105502337217331, "logps/rejected": -290.29290771484375, "loss": 0.0559, "rewards/accuracies": 1.0, "rewards/chosen": 0.31248602271080017, "rewards/margins": 2.8632545471191406, "rewards/rejected": -2.5507683753967285, "step": 23400 }, { "epoch": 0.94, "eval_logits/chosen": -3.071168899536133, "eval_logits/rejected": -3.0968017578125, "eval_logps/chosen": -0.16695508360862732, "eval_logps/rejected": -284.4164123535156, "eval_loss": 0.06029810756444931, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3149299621582031, "eval_rewards/margins": 2.798938274383545, "eval_rewards/rejected": -2.484008312225342, "eval_runtime": 2.5349, "eval_samples_per_second": 1.972, "eval_steps_per_second": 0.394, "step": 23400 }, { "epoch": 0.94, "learning_rate": 6.135564071956729e-08, "logits/chosen": -3.030527353286743, "logits/rejected": -3.0574653148651123, "logps/chosen": -1.8541009426116943, "logps/rejected": -289.33612060546875, "loss": 0.0602, "rewards/accuracies": 1.0, "rewards/chosen": 0.29969853162765503, "rewards/margins": 2.834155559539795, "rewards/rejected": -2.534457206726074, "step": 23410 }, { "epoch": 0.94, "learning_rate": 6.05894214650063e-08, "logits/chosen": -3.0421082973480225, "logits/rejected": -3.0669658184051514, "logps/chosen": -3.096249580383301, "logps/rejected": -288.04736328125, "loss": 0.085, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2878749370574951, "rewards/margins": 2.811619758605957, "rewards/rejected": -2.523745059967041, "step": 23420 }, { "epoch": 0.94, "learning_rate": 5.982795796619256e-08, "logits/chosen": -3.015763759613037, "logits/rejected": -3.0451560020446777, "logps/chosen": -0.19404307007789612, "logps/rejected": -287.19354248046875, "loss": 0.0579, "rewards/accuracies": 1.0, "rewards/chosen": 0.3121371865272522, "rewards/margins": 2.832995891571045, "rewards/rejected": -2.5208585262298584, "step": 23430 }, { "epoch": 0.94, "learning_rate": 5.9071251707638056e-08, "logits/chosen": -3.0007822513580322, "logits/rejected": -3.0299293994903564, "logps/chosen": -3.4861743450164795, "logps/rejected": -287.0700988769531, "loss": 0.0887, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28263798356056213, "rewards/margins": 2.7953755855560303, "rewards/rejected": -2.512737989425659, "step": 23440 }, { "epoch": 0.94, "learning_rate": 5.83193041645802e-08, "logits/chosen": -3.0060997009277344, "logits/rejected": -3.0357894897460938, "logps/chosen": -11.674018859863281, "logps/rejected": -278.8095703125, "loss": 0.1569, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.20177344977855682, "rewards/margins": 2.6362407207489014, "rewards/rejected": -2.434467315673828, "step": 23450 }, { "epoch": 0.94, "learning_rate": 5.7572116802979695e-08, "logits/chosen": -3.01945424079895, "logits/rejected": -3.050351619720459, "logps/chosen": -0.17028920352458954, "logps/rejected": -291.06982421875, "loss": 0.0553, "rewards/accuracies": 1.0, "rewards/chosen": 0.31555861234664917, "rewards/margins": 2.8743350505828857, "rewards/rejected": -2.558776617050171, "step": 23460 }, { "epoch": 0.94, "learning_rate": 5.68296910795163e-08, "logits/chosen": -3.0381789207458496, "logits/rejected": -3.066404104232788, "logps/chosen": -7.433203220367432, "logps/rejected": -284.06396484375, "loss": 0.127, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24322715401649475, "rewards/margins": 2.731215000152588, "rewards/rejected": -2.487987518310547, "step": 23470 }, { "epoch": 0.94, "learning_rate": 5.609202844158723e-08, "logits/chosen": -3.0187249183654785, "logits/rejected": -3.0449633598327637, "logps/chosen": -3.896209239959717, "logps/rejected": -282.64544677734375, "loss": 0.0937, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2773863673210144, "rewards/margins": 2.7548699378967285, "rewards/rejected": -2.4774832725524902, "step": 23480 }, { "epoch": 0.94, "learning_rate": 5.535913032730295e-08, "logits/chosen": -3.0129470825195312, "logits/rejected": -3.040513515472412, "logps/chosen": -8.544660568237305, "logps/rejected": -279.0264587402344, "loss": 0.1363, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.23126360774040222, "rewards/margins": 2.671325445175171, "rewards/rejected": -2.4400620460510254, "step": 23490 }, { "epoch": 0.94, "learning_rate": 5.463099816548578e-08, "logits/chosen": -3.009509563446045, "logits/rejected": -3.0421693325042725, "logps/chosen": -0.19111748039722443, "logps/rejected": -290.82012939453125, "loss": 0.0555, "rewards/accuracies": 1.0, "rewards/chosen": 0.31381145119667053, "rewards/margins": 2.8705966472625732, "rewards/rejected": -2.5567855834960938, "step": 23500 }, { "epoch": 0.94, "eval_logits/chosen": -3.070258617401123, "eval_logits/rejected": -3.0957586765289307, "eval_logps/chosen": -0.18260376155376434, "eval_logps/rejected": -284.4820556640625, "eval_loss": 0.060272276401519775, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3147734999656677, "eval_rewards/margins": 2.7994384765625, "eval_rewards/rejected": -2.4846649169921875, "eval_runtime": 2.5395, "eval_samples_per_second": 1.969, "eval_steps_per_second": 0.394, "step": 23500 }, { "epoch": 0.94, "learning_rate": 5.390763337566746e-08, "logits/chosen": -3.027244806289673, "logits/rejected": -3.0578408241271973, "logps/chosen": -4.031628608703613, "logps/rejected": -285.6437072753906, "loss": 0.0933, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2776208817958832, "rewards/margins": 2.7761831283569336, "rewards/rejected": -2.4985623359680176, "step": 23510 }, { "epoch": 0.94, "learning_rate": 5.318903736808406e-08, "logits/chosen": -3.0094826221466064, "logits/rejected": -3.038278102874756, "logps/chosen": -2.461322784423828, "logps/rejected": -288.6359558105469, "loss": 0.0718, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2886492609977722, "rewards/margins": 2.8256161212921143, "rewards/rejected": -2.5369668006896973, "step": 23520 }, { "epoch": 0.94, "learning_rate": 5.247521154367552e-08, "logits/chosen": -2.9773712158203125, "logits/rejected": -3.010289430618286, "logps/chosen": -0.19207237660884857, "logps/rejected": -292.2181701660156, "loss": 0.0548, "rewards/accuracies": 1.0, "rewards/chosen": 0.3137403130531311, "rewards/margins": 2.8806381225585938, "rewards/rejected": -2.5668978691101074, "step": 23530 }, { "epoch": 0.94, "learning_rate": 5.176615729408169e-08, "logits/chosen": -3.0487091541290283, "logits/rejected": -3.0766923427581787, "logps/chosen": -0.22616741061210632, "logps/rejected": -290.3964538574219, "loss": 0.0559, "rewards/accuracies": 1.0, "rewards/chosen": 0.3159666359424591, "rewards/margins": 2.8649544715881348, "rewards/rejected": -2.548987627029419, "step": 23540 }, { "epoch": 0.94, "learning_rate": 5.106187600163987e-08, "logits/chosen": -3.030538320541382, "logits/rejected": -3.0606331825256348, "logps/chosen": -7.502226829528809, "logps/rejected": -285.1378173828125, "loss": 0.1272, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2434951364994049, "rewards/margins": 2.741987943649292, "rewards/rejected": -2.498493194580078, "step": 23550 }, { "epoch": 0.94, "learning_rate": 5.036236903938285e-08, "logits/chosen": -3.0365424156188965, "logits/rejected": -3.0627570152282715, "logps/chosen": -6.611250400543213, "logps/rejected": -284.01153564453125, "loss": 0.1133, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.25174954533576965, "rewards/margins": 2.739555835723877, "rewards/rejected": -2.4878063201904297, "step": 23560 }, { "epoch": 0.94, "learning_rate": 4.966763777103506e-08, "logits/chosen": -3.014406204223633, "logits/rejected": -3.043139934539795, "logps/chosen": -2.8868300914764404, "logps/rejected": -286.66192626953125, "loss": 0.0819, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.287659615278244, "rewards/margins": 2.8011841773986816, "rewards/rejected": -2.5135245323181152, "step": 23570 }, { "epoch": 0.94, "learning_rate": 4.8977683551010846e-08, "logits/chosen": -3.0283212661743164, "logits/rejected": -3.0566089153289795, "logps/chosen": -4.587279319763184, "logps/rejected": -284.71533203125, "loss": 0.1006, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.26985207200050354, "rewards/margins": 2.768221855163574, "rewards/rejected": -2.4983696937561035, "step": 23580 }, { "epoch": 0.94, "learning_rate": 4.829250772441091e-08, "logits/chosen": -3.0125250816345215, "logits/rejected": -3.0419931411743164, "logps/chosen": -2.8510971069335938, "logps/rejected": -287.15057373046875, "loss": 0.0829, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2890329957008362, "rewards/margins": 2.8059849739074707, "rewards/rejected": -2.5169520378112793, "step": 23590 }, { "epoch": 0.94, "learning_rate": 4.761211162702117e-08, "logits/chosen": -3.0385825634002686, "logits/rejected": -3.0648088455200195, "logps/chosen": -3.952608823776245, "logps/rejected": -285.7478332519531, "loss": 0.093, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2773330807685852, "rewards/margins": 2.783569097518921, "rewards/rejected": -2.5062365531921387, "step": 23600 }, { "epoch": 0.94, "eval_logits/chosen": -3.070643663406372, "eval_logits/rejected": -3.0959322452545166, "eval_logps/chosen": -0.18574514985084534, "eval_logps/rejected": -284.5601806640625, "eval_loss": 0.060225676745176315, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3147420883178711, "eval_rewards/margins": 2.800187826156616, "eval_rewards/rejected": -2.485445737838745, "eval_runtime": 2.5503, "eval_samples_per_second": 1.961, "eval_steps_per_second": 0.392, "step": 23600 }, { "epoch": 0.94, "learning_rate": 4.6936496585308075e-08, "logits/chosen": -3.003721237182617, "logits/rejected": -3.0369133949279785, "logps/chosen": -0.198044091463089, "logps/rejected": -290.0942687988281, "loss": 0.0562, "rewards/accuracies": 1.0, "rewards/chosen": 0.3140285611152649, "rewards/margins": 2.859891414642334, "rewards/rejected": -2.545862913131714, "step": 23610 }, { "epoch": 0.94, "learning_rate": 4.626566391641774e-08, "logits/chosen": -3.016326665878296, "logits/rejected": -3.0442280769348145, "logps/chosen": -3.8397669792175293, "logps/rejected": -286.2727966308594, "loss": 0.0926, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27819353342056274, "rewards/margins": 2.785418748855591, "rewards/rejected": -2.507225275039673, "step": 23620 }, { "epoch": 0.95, "learning_rate": 4.5599614928173166e-08, "logits/chosen": -3.018341064453125, "logits/rejected": -3.0481607913970947, "logps/chosen": -5.512994289398193, "logps/rejected": -284.9250793457031, "loss": 0.1076, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2627502977848053, "rewards/margins": 2.7577431201934814, "rewards/rejected": -2.49499249458313, "step": 23630 }, { "epoch": 0.95, "learning_rate": 4.493835091907067e-08, "logits/chosen": -3.0275368690490723, "logits/rejected": -3.0565712451934814, "logps/chosen": -2.9350311756134033, "logps/rejected": -288.035400390625, "loss": 0.0833, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28895753622055054, "rewards/margins": 2.8159918785095215, "rewards/rejected": -2.527034282684326, "step": 23640 }, { "epoch": 0.95, "learning_rate": 4.428187317827848e-08, "logits/chosen": -3.008392095565796, "logits/rejected": -3.0390498638153076, "logps/chosen": -3.306417942047119, "logps/rejected": -285.41375732421875, "loss": 0.0885, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28288397192955017, "rewards/margins": 2.782162666320801, "rewards/rejected": -2.4992785453796387, "step": 23650 }, { "epoch": 0.95, "learning_rate": 4.3630182985633093e-08, "logits/chosen": -3.017932176589966, "logits/rejected": -3.050443172454834, "logps/chosen": -6.827781677246094, "logps/rejected": -285.01318359375, "loss": 0.1205, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24791860580444336, "rewards/margins": 2.747138738632202, "rewards/rejected": -2.4992198944091797, "step": 23660 }, { "epoch": 0.95, "learning_rate": 4.2983281611638225e-08, "logits/chosen": -3.0202293395996094, "logits/rejected": -3.0511815547943115, "logps/chosen": -0.15623262524604797, "logps/rejected": -292.49798583984375, "loss": 0.0544, "rewards/accuracies": 1.0, "rewards/chosen": 0.31589382886886597, "rewards/margins": 2.88749623298645, "rewards/rejected": -2.5716023445129395, "step": 23670 }, { "epoch": 0.95, "learning_rate": 4.2341170317461433e-08, "logits/chosen": -3.0093746185302734, "logits/rejected": -3.0387043952941895, "logps/chosen": -2.4206175804138184, "logps/rejected": -286.3841857910156, "loss": 0.0775, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.29302555322647095, "rewards/margins": 2.803101062774658, "rewards/rejected": -2.510075330734253, "step": 23680 }, { "epoch": 0.95, "learning_rate": 4.170385035493108e-08, "logits/chosen": -2.997973918914795, "logits/rejected": -3.0293190479278564, "logps/chosen": -2.4124226570129395, "logps/rejected": -287.16778564453125, "loss": 0.0767, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28908246755599976, "rewards/margins": 2.8088717460632324, "rewards/rejected": -2.519789218902588, "step": 23690 }, { "epoch": 0.95, "learning_rate": 4.1071322966535487e-08, "logits/chosen": -2.9931416511535645, "logits/rejected": -3.023808002471924, "logps/chosen": -0.21779994666576385, "logps/rejected": -290.0079040527344, "loss": 0.0561, "rewards/accuracies": 1.0, "rewards/chosen": 0.3135024905204773, "rewards/margins": 2.861931085586548, "rewards/rejected": -2.548428535461426, "step": 23700 }, { "epoch": 0.95, "eval_logits/chosen": -3.069990396499634, "eval_logits/rejected": -3.0959038734436035, "eval_logps/chosen": -0.17214924097061157, "eval_logps/rejected": -284.3966369628906, "eval_loss": 0.06028766557574272, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31487804651260376, "eval_rewards/margins": 2.7986884117126465, "eval_rewards/rejected": -2.4838104248046875, "eval_runtime": 2.5401, "eval_samples_per_second": 1.968, "eval_steps_per_second": 0.394, "step": 23700 }, { "epoch": 0.95, "learning_rate": 4.044358938541853e-08, "logits/chosen": -3.0188193321228027, "logits/rejected": -3.0498433113098145, "logps/chosen": -0.1940622627735138, "logps/rejected": -291.19207763671875, "loss": 0.0555, "rewards/accuracies": 1.0, "rewards/chosen": 0.31554025411605835, "rewards/margins": 2.8695785999298096, "rewards/rejected": -2.5540385246276855, "step": 23710 }, { "epoch": 0.95, "learning_rate": 3.982065083537962e-08, "logits/chosen": -3.0177035331726074, "logits/rejected": -3.0463204383850098, "logps/chosen": -0.17813582718372345, "logps/rejected": -293.0651550292969, "loss": 0.0542, "rewards/accuracies": 1.0, "rewards/chosen": 0.31587082147598267, "rewards/margins": 2.8927690982818604, "rewards/rejected": -2.5768978595733643, "step": 23720 }, { "epoch": 0.95, "learning_rate": 3.920250853086893e-08, "logits/chosen": -3.0381407737731934, "logits/rejected": -3.0645909309387207, "logps/chosen": -6.244898796081543, "logps/rejected": -286.8886413574219, "loss": 0.0981, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.25728926062583923, "rewards/margins": 2.76621413230896, "rewards/rejected": -2.508924961090088, "step": 23730 }, { "epoch": 0.95, "learning_rate": 3.8589163676986674e-08, "logits/chosen": -3.0394339561462402, "logits/rejected": -3.0686304569244385, "logps/chosen": -0.3009334206581116, "logps/rejected": -291.862060546875, "loss": 0.0553, "rewards/accuracies": 1.0, "rewards/chosen": 0.31495437026023865, "rewards/margins": 2.878657341003418, "rewards/rejected": -2.5637030601501465, "step": 23740 }, { "epoch": 0.95, "learning_rate": 3.798061746947995e-08, "logits/chosen": -3.0071616172790527, "logits/rejected": -3.039520740509033, "logps/chosen": -0.14657747745513916, "logps/rejected": -290.5335388183594, "loss": 0.0561, "rewards/accuracies": 1.0, "rewards/chosen": 0.31571370363235474, "rewards/margins": 2.861889362335205, "rewards/rejected": -2.546175718307495, "step": 23750 }, { "epoch": 0.95, "learning_rate": 3.737687109474059e-08, "logits/chosen": -3.014721393585205, "logits/rejected": -3.044046401977539, "logps/chosen": -0.5933729410171509, "logps/rejected": -285.255126953125, "loss": 0.0653, "rewards/accuracies": 1.0, "rewards/chosen": 0.3105855882167816, "rewards/margins": 2.811654567718506, "rewards/rejected": -2.5010690689086914, "step": 23760 }, { "epoch": 0.95, "learning_rate": 3.677792572980371e-08, "logits/chosen": -3.0458531379699707, "logits/rejected": -3.071864366531372, "logps/chosen": -0.17370279133319855, "logps/rejected": -290.80120849609375, "loss": 0.0558, "rewards/accuracies": 1.0, "rewards/chosen": 0.31641775369644165, "rewards/margins": 2.865406036376953, "rewards/rejected": -2.5489883422851562, "step": 23770 }, { "epoch": 0.95, "learning_rate": 3.618378254234306e-08, "logits/chosen": -3.0173559188842773, "logits/rejected": -3.0476653575897217, "logps/chosen": -6.24556303024292, "logps/rejected": -284.1162109375, "loss": 0.1162, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2552174925804138, "rewards/margins": 2.7411022186279297, "rewards/rejected": -2.485884666442871, "step": 23780 }, { "epoch": 0.95, "learning_rate": 3.5594442690671806e-08, "logits/chosen": -3.0153768062591553, "logits/rejected": -3.0473217964172363, "logps/chosen": -1.5639674663543701, "logps/rejected": -289.28265380859375, "loss": 0.0626, "rewards/accuracies": 1.0, "rewards/chosen": 0.30048996210098267, "rewards/margins": 2.839726686477661, "rewards/rejected": -2.5392367839813232, "step": 23790 }, { "epoch": 0.95, "learning_rate": 3.5009907323737826e-08, "logits/chosen": -3.026024103164673, "logits/rejected": -3.055072069168091, "logps/chosen": -0.3059697449207306, "logps/rejected": -289.26861572265625, "loss": 0.0576, "rewards/accuracies": 1.0, "rewards/chosen": 0.3154938220977783, "rewards/margins": 2.8502840995788574, "rewards/rejected": -2.534790277481079, "step": 23800 }, { "epoch": 0.95, "eval_logits/chosen": -3.0703442096710205, "eval_logits/rejected": -3.096557855606079, "eval_logps/chosen": -0.1914995312690735, "eval_logps/rejected": -284.49090576171875, "eval_loss": 0.060272444039583206, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31468454003334045, "eval_rewards/margins": 2.7994377613067627, "eval_rewards/rejected": -2.484753131866455, "eval_runtime": 2.5349, "eval_samples_per_second": 1.972, "eval_steps_per_second": 0.394, "step": 23800 }, { "epoch": 0.95, "learning_rate": 3.44301775811226e-08, "logits/chosen": -3.0238189697265625, "logits/rejected": -3.0535900592803955, "logps/chosen": -4.595786094665527, "logps/rejected": -282.61248779296875, "loss": 0.103, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.26944699883461, "rewards/margins": 2.7417352199554443, "rewards/rejected": -2.472288131713867, "step": 23810 }, { "epoch": 0.95, "learning_rate": 3.3855254593039566e-08, "logits/chosen": -3.0134313106536865, "logits/rejected": -3.043207883834839, "logps/chosen": -11.051773071289062, "logps/rejected": -274.3720397949219, "loss": 0.1668, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.20821671187877655, "rewards/margins": 2.596916675567627, "rewards/rejected": -2.388700008392334, "step": 23820 }, { "epoch": 0.95, "learning_rate": 3.328513948032991e-08, "logits/chosen": -3.026421070098877, "logits/rejected": -3.055530071258545, "logps/chosen": -0.24076032638549805, "logps/rejected": -289.5401916503906, "loss": 0.0568, "rewards/accuracies": 1.0, "rewards/chosen": 0.3134598731994629, "rewards/margins": 2.8512446880340576, "rewards/rejected": -2.537785053253174, "step": 23830 }, { "epoch": 0.95, "learning_rate": 3.2719833354462325e-08, "logits/chosen": -3.024740695953369, "logits/rejected": -3.053652286529541, "logps/chosen": -6.65875244140625, "logps/rejected": -285.0677795410156, "loss": 0.1199, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2532484233379364, "rewards/margins": 2.7471320629119873, "rewards/rejected": -2.4938836097717285, "step": 23840 }, { "epoch": 0.95, "learning_rate": 3.2159337317530234e-08, "logits/chosen": -3.01863431930542, "logits/rejected": -3.046877145767212, "logps/chosen": -0.6101809740066528, "logps/rejected": -290.2419738769531, "loss": 0.0571, "rewards/accuracies": 1.0, "rewards/chosen": 0.31101781129837036, "rewards/margins": 2.8594651222229004, "rewards/rejected": -2.5484471321105957, "step": 23850 }, { "epoch": 0.95, "learning_rate": 3.1603652462249e-08, "logits/chosen": -3.0385379791259766, "logits/rejected": -3.0689210891723633, "logps/chosen": -0.3140861392021179, "logps/rejected": -291.35894775390625, "loss": 0.0554, "rewards/accuracies": 1.0, "rewards/chosen": 0.31046730279922485, "rewards/margins": 2.8733859062194824, "rewards/rejected": -2.5629184246063232, "step": 23860 }, { "epoch": 0.95, "learning_rate": 3.1052779871955376e-08, "logits/chosen": -3.023805618286133, "logits/rejected": -3.0514469146728516, "logps/chosen": -0.25301748514175415, "logps/rejected": -290.4915466308594, "loss": 0.0559, "rewards/accuracies": 1.0, "rewards/chosen": 0.31491968035697937, "rewards/margins": 2.8683130741119385, "rewards/rejected": -2.5533928871154785, "step": 23870 }, { "epoch": 0.96, "learning_rate": 3.050672062060278e-08, "logits/chosen": -3.0318281650543213, "logits/rejected": -3.060433864593506, "logps/chosen": -6.753883361816406, "logps/rejected": -280.25927734375, "loss": 0.1237, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24940796196460724, "rewards/margins": 2.6998696327209473, "rewards/rejected": -2.4504618644714355, "step": 23880 }, { "epoch": 0.96, "learning_rate": 2.9965475772762154e-08, "logits/chosen": -2.9997589588165283, "logits/rejected": -3.033475875854492, "logps/chosen": -0.37956634163856506, "logps/rejected": -290.5104064941406, "loss": 0.0568, "rewards/accuracies": 1.0, "rewards/chosen": 0.31062230467796326, "rewards/margins": 2.8637492656707764, "rewards/rejected": -2.5531272888183594, "step": 23890 }, { "epoch": 0.96, "learning_rate": 2.9429046383618042e-08, "logits/chosen": -3.0029406547546387, "logits/rejected": -3.033812999725342, "logps/chosen": -7.2001519203186035, "logps/rejected": -283.521484375, "loss": 0.1255, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24355268478393555, "rewards/margins": 2.7268869876861572, "rewards/rejected": -2.4833340644836426, "step": 23900 }, { "epoch": 0.96, "eval_logits/chosen": -3.0707147121429443, "eval_logits/rejected": -3.096421003341675, "eval_logps/chosen": -0.19396977126598358, "eval_logps/rejected": -284.443359375, "eval_loss": 0.060298480093479156, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31465983390808105, "eval_rewards/margins": 2.7989375591278076, "eval_rewards/rejected": -2.4842782020568848, "eval_runtime": 2.5392, "eval_samples_per_second": 1.969, "eval_steps_per_second": 0.394, "step": 23900 }, { "epoch": 0.96, "learning_rate": 2.8897433498966678e-08, "logits/chosen": -3.0111024379730225, "logits/rejected": -3.0431861877441406, "logps/chosen": -0.1677643060684204, "logps/rejected": -290.61419677734375, "loss": 0.0558, "rewards/accuracies": 1.0, "rewards/chosen": 0.31609898805618286, "rewards/margins": 2.867112159729004, "rewards/rejected": -2.551013231277466, "step": 23910 }, { "epoch": 0.96, "learning_rate": 2.8370638155215125e-08, "logits/chosen": -3.0130021572113037, "logits/rejected": -3.0401182174682617, "logps/chosen": -2.590177059173584, "logps/rejected": -287.64483642578125, "loss": 0.0792, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.29207876324653625, "rewards/margins": 2.8110034465789795, "rewards/rejected": -2.5189244747161865, "step": 23920 }, { "epoch": 0.96, "learning_rate": 2.784866137937714e-08, "logits/chosen": -3.030900478363037, "logits/rejected": -3.0582587718963623, "logps/chosen": -0.2156890332698822, "logps/rejected": -292.46685791015625, "loss": 0.0546, "rewards/accuracies": 1.0, "rewards/chosen": 0.3141142725944519, "rewards/margins": 2.884934186935425, "rewards/rejected": -2.570819854736328, "step": 23930 }, { "epoch": 0.96, "learning_rate": 2.7331504189073987e-08, "logits/chosen": -3.017167568206787, "logits/rejected": -3.0456738471984863, "logps/chosen": -2.0266342163085938, "logps/rejected": -286.0074462890625, "loss": 0.0735, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.29361793398857117, "rewards/margins": 2.8034071922302246, "rewards/rejected": -2.509788990020752, "step": 23940 }, { "epoch": 0.96, "learning_rate": 2.681916759252917e-08, "logits/chosen": -3.0317881107330322, "logits/rejected": -3.0600662231445312, "logps/chosen": -0.2257104367017746, "logps/rejected": -290.195556640625, "loss": 0.0563, "rewards/accuracies": 1.0, "rewards/chosen": 0.3181437849998474, "rewards/margins": 2.858765125274658, "rewards/rejected": -2.540621519088745, "step": 23950 }, { "epoch": 0.96, "learning_rate": 2.6311652588569826e-08, "logits/chosen": -3.032709836959839, "logits/rejected": -3.059504270553589, "logps/chosen": -2.930908441543579, "logps/rejected": -286.9700927734375, "loss": 0.0821, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28693994879722595, "rewards/margins": 2.8030505180358887, "rewards/rejected": -2.516110897064209, "step": 23960 }, { "epoch": 0.96, "learning_rate": 2.580896016662199e-08, "logits/chosen": -3.001800537109375, "logits/rejected": -3.032026767730713, "logps/chosen": -3.2663180828094482, "logps/rejected": -288.33306884765625, "loss": 0.0855, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2826565206050873, "rewards/margins": 2.8126559257507324, "rewards/rejected": -2.529999256134033, "step": 23970 }, { "epoch": 0.96, "learning_rate": 2.531109130671061e-08, "logits/chosen": -3.0428831577301025, "logits/rejected": -3.067709445953369, "logps/chosen": -0.1899869740009308, "logps/rejected": -288.98162841796875, "loss": 0.0568, "rewards/accuracies": 1.0, "rewards/chosen": 0.31585755944252014, "rewards/margins": 2.8510031700134277, "rewards/rejected": -2.5351457595825195, "step": 23980 }, { "epoch": 0.96, "learning_rate": 2.48180469794565e-08, "logits/chosen": -3.0335209369659424, "logits/rejected": -3.0644683837890625, "logps/chosen": -0.18103250861167908, "logps/rejected": -290.4665222167969, "loss": 0.0559, "rewards/accuracies": 1.0, "rewards/chosen": 0.31692665815353394, "rewards/margins": 2.8639588356018066, "rewards/rejected": -2.5470328330993652, "step": 23990 }, { "epoch": 0.96, "learning_rate": 2.4329828146074096e-08, "logits/chosen": -3.012629747390747, "logits/rejected": -3.0429139137268066, "logps/chosen": -3.930060625076294, "logps/rejected": -283.5278625488281, "loss": 0.094, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2723408639431, "rewards/margins": 2.7600278854370117, "rewards/rejected": -2.4876868724823, "step": 24000 }, { "epoch": 0.96, "eval_logits/chosen": -3.0700876712799072, "eval_logits/rejected": -3.096083402633667, "eval_logps/chosen": -0.19231677055358887, "eval_logps/rejected": -284.4167785644531, "eval_loss": 0.060334790498018265, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3146763741970062, "eval_rewards/margins": 2.7986881732940674, "eval_rewards/rejected": -2.4840118885040283, "eval_runtime": 2.5461, "eval_samples_per_second": 1.964, "eval_steps_per_second": 0.393, "step": 24000 }, { "epoch": 0.96, "learning_rate": 2.3846435758372034e-08, "logits/chosen": -3.0515618324279785, "logits/rejected": -3.0778915882110596, "logps/chosen": -7.0014238357543945, "logps/rejected": -281.4954528808594, "loss": 0.1242, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24634146690368652, "rewards/margins": 2.7078349590301514, "rewards/rejected": -2.461493492126465, "step": 24010 }, { "epoch": 0.96, "learning_rate": 2.3367870758747857e-08, "logits/chosen": -3.0299735069274902, "logits/rejected": -3.058058500289917, "logps/chosen": -3.8590445518493652, "logps/rejected": -287.55548095703125, "loss": 0.0917, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2775806188583374, "rewards/margins": 2.797689437866211, "rewards/rejected": -2.520108938217163, "step": 24020 }, { "epoch": 0.96, "learning_rate": 2.289413408018859e-08, "logits/chosen": -3.0276119709014893, "logits/rejected": -3.0561602115631104, "logps/chosen": -4.960816383361816, "logps/rejected": -282.50311279296875, "loss": 0.1059, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2656046748161316, "rewards/margins": 2.73994517326355, "rewards/rejected": -2.4743409156799316, "step": 24030 }, { "epoch": 0.96, "learning_rate": 2.242522664626823e-08, "logits/chosen": -3.008155584335327, "logits/rejected": -3.0388054847717285, "logps/chosen": -4.471499919891357, "logps/rejected": -284.2335205078125, "loss": 0.0979, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2716630697250366, "rewards/margins": 2.765077590942383, "rewards/rejected": -2.493414878845215, "step": 24040 }, { "epoch": 0.96, "learning_rate": 2.1961149371145795e-08, "logits/chosen": -3.025585651397705, "logits/rejected": -3.0569491386413574, "logps/chosen": -0.1659257709980011, "logps/rejected": -291.0560607910156, "loss": 0.0556, "rewards/accuracies": 1.0, "rewards/chosen": 0.3155616819858551, "rewards/margins": 2.869629383087158, "rewards/rejected": -2.554067611694336, "step": 24050 }, { "epoch": 0.96, "learning_rate": 2.1501903159563688e-08, "logits/chosen": -3.019181251525879, "logits/rejected": -3.0494914054870605, "logps/chosen": -5.727991104125977, "logps/rejected": -284.1846618652344, "loss": 0.1102, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2594570517539978, "rewards/margins": 2.7473440170288086, "rewards/rejected": -2.487886905670166, "step": 24060 }, { "epoch": 0.96, "learning_rate": 2.1047488906845715e-08, "logits/chosen": -3.0237338542938232, "logits/rejected": -3.051020622253418, "logps/chosen": -7.48496150970459, "logps/rejected": -283.8203125, "loss": 0.1276, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24207615852355957, "rewards/margins": 2.7270290851593018, "rewards/rejected": -2.484952688217163, "step": 24070 }, { "epoch": 0.96, "learning_rate": 2.0597907498896007e-08, "logits/chosen": -2.9917681217193604, "logits/rejected": -3.023019313812256, "logps/chosen": -3.7419421672821045, "logps/rejected": -287.9491271972656, "loss": 0.0904, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2799729108810425, "rewards/margins": 2.8067994117736816, "rewards/rejected": -2.5268263816833496, "step": 24080 }, { "epoch": 0.96, "learning_rate": 2.015315981219651e-08, "logits/chosen": -3.0277152061462402, "logits/rejected": -3.055359125137329, "logps/chosen": -0.1541227400302887, "logps/rejected": -293.13031005859375, "loss": 0.0541, "rewards/accuracies": 1.0, "rewards/chosen": 0.3162393569946289, "rewards/margins": 2.894028425216675, "rewards/rejected": -2.577789306640625, "step": 24090 }, { "epoch": 0.96, "learning_rate": 1.9713246713805588e-08, "logits/chosen": -3.0002212524414062, "logits/rejected": -3.032461643218994, "logps/chosen": -0.2330995500087738, "logps/rejected": -292.90142822265625, "loss": 0.0543, "rewards/accuracies": 1.0, "rewards/chosen": 0.3137085735797882, "rewards/margins": 2.8891217708587646, "rewards/rejected": -2.575413227081299, "step": 24100 }, { "epoch": 0.96, "eval_logits/chosen": -3.07096791267395, "eval_logits/rejected": -3.0963008403778076, "eval_logps/chosen": -0.18172328174114227, "eval_logps/rejected": -284.4561462402344, "eval_loss": 0.060277946293354034, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3147823214530945, "eval_rewards/margins": 2.7991881370544434, "eval_rewards/rejected": -2.484405994415283, "eval_runtime": 2.5464, "eval_samples_per_second": 1.964, "eval_steps_per_second": 0.393, "step": 24100 }, { "epoch": 0.96, "learning_rate": 1.9278169061355824e-08, "logits/chosen": -3.029594659805298, "logits/rejected": -3.059987783432007, "logps/chosen": -0.18768854439258575, "logps/rejected": -291.0887145996094, "loss": 0.0554, "rewards/accuracies": 1.0, "rewards/chosen": 0.3145006000995636, "rewards/margins": 2.8725435733795166, "rewards/rejected": -2.5580430030822754, "step": 24110 }, { "epoch": 0.96, "learning_rate": 1.8847927703053993e-08, "logits/chosen": -2.990328311920166, "logits/rejected": -3.02136492729187, "logps/chosen": -10.416816711425781, "logps/rejected": -281.78436279296875, "loss": 0.1566, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.2119264155626297, "rewards/margins": 2.6786327362060547, "rewards/rejected": -2.466705799102783, "step": 24120 }, { "epoch": 0.97, "learning_rate": 1.842252347767748e-08, "logits/chosen": -3.043701648712158, "logits/rejected": -3.070528507232666, "logps/chosen": -0.5155161619186401, "logps/rejected": -285.11029052734375, "loss": 0.0656, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.31288760900497437, "rewards/margins": 2.8074119091033936, "rewards/rejected": -2.4945242404937744, "step": 24130 }, { "epoch": 0.97, "learning_rate": 1.8001957214573706e-08, "logits/chosen": -3.0089457035064697, "logits/rejected": -3.038281202316284, "logps/chosen": -9.863495826721191, "logps/rejected": -277.43829345703125, "loss": 0.149, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.21934890747070312, "rewards/margins": 2.637516498565674, "rewards/rejected": -2.41816782951355, "step": 24140 }, { "epoch": 0.97, "learning_rate": 1.7586229733657646e-08, "logits/chosen": -2.991212844848633, "logits/rejected": -3.0196595191955566, "logps/chosen": -3.847632884979248, "logps/rejected": -286.01593017578125, "loss": 0.0919, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2775954008102417, "rewards/margins": 2.7878429889678955, "rewards/rejected": -2.510247230529785, "step": 24150 }, { "epoch": 0.97, "learning_rate": 1.717534184541153e-08, "logits/chosen": -3.028139591217041, "logits/rejected": -3.0564188957214355, "logps/chosen": -0.1980186551809311, "logps/rejected": -291.3858947753906, "loss": 0.0554, "rewards/accuracies": 1.0, "rewards/chosen": 0.31686386466026306, "rewards/margins": 2.872545003890991, "rewards/rejected": -2.5556812286376953, "step": 24160 }, { "epoch": 0.97, "learning_rate": 1.6769294350882648e-08, "logits/chosen": -3.037003755569458, "logits/rejected": -3.0665459632873535, "logps/chosen": -5.883091926574707, "logps/rejected": -286.5858154296875, "loss": 0.1065, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.26106733083724976, "rewards/margins": 2.766326427459717, "rewards/rejected": -2.5052590370178223, "step": 24170 }, { "epoch": 0.97, "learning_rate": 1.6368088041681108e-08, "logits/chosen": -3.017857789993286, "logits/rejected": -3.0450310707092285, "logps/chosen": -10.375732421875, "logps/rejected": -276.71868896484375, "loss": 0.16, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.21354737877845764, "rewards/margins": 2.6279537677764893, "rewards/rejected": -2.4144065380096436, "step": 24180 }, { "epoch": 0.97, "learning_rate": 1.5971723699979015e-08, "logits/chosen": -3.032365083694458, "logits/rejected": -3.0588715076446533, "logps/chosen": -6.922816276550293, "logps/rejected": -282.31689453125, "loss": 0.1242, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24781134724617004, "rewards/margins": 2.718527317047119, "rewards/rejected": -2.4707159996032715, "step": 24190 }, { "epoch": 0.97, "learning_rate": 1.5580202098509078e-08, "logits/chosen": -3.004486560821533, "logits/rejected": -3.0362343788146973, "logps/chosen": -0.21736708283424377, "logps/rejected": -291.9169006347656, "loss": 0.0551, "rewards/accuracies": 1.0, "rewards/chosen": 0.31570810079574585, "rewards/margins": 2.876342535018921, "rewards/rejected": -2.5606346130371094, "step": 24200 }, { "epoch": 0.97, "eval_logits/chosen": -3.0702764987945557, "eval_logits/rejected": -3.0958616733551025, "eval_logps/chosen": -0.18278095126152039, "eval_logps/rejected": -284.45721435546875, "eval_loss": 0.06027877330780029, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31477171182632446, "eval_rewards/margins": 2.7991881370544434, "eval_rewards/rejected": -2.4844162464141846, "eval_runtime": 2.54, "eval_samples_per_second": 1.968, "eval_steps_per_second": 0.394, "step": 24200 }, { "epoch": 0.97, "learning_rate": 1.5193524000562675e-08, "logits/chosen": -3.0311131477355957, "logits/rejected": -3.058567523956299, "logps/chosen": -7.112244606018066, "logps/rejected": -280.00701904296875, "loss": 0.1268, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24537964165210724, "rewards/margins": 2.692171096801758, "rewards/rejected": -2.446791410446167, "step": 24210 }, { "epoch": 0.97, "learning_rate": 1.4811690159988456e-08, "logits/chosen": -3.0391733646392822, "logits/rejected": -3.0645031929016113, "logps/chosen": -12.932577133178711, "logps/rejected": -278.80584716796875, "loss": 0.1652, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.18574902415275574, "rewards/margins": 2.6266632080078125, "rewards/rejected": -2.4409141540527344, "step": 24220 }, { "epoch": 0.97, "learning_rate": 1.4434701321191236e-08, "logits/chosen": -3.0409703254699707, "logits/rejected": -3.0682132244110107, "logps/chosen": -0.20229701697826385, "logps/rejected": -289.84564208984375, "loss": 0.0563, "rewards/accuracies": 1.0, "rewards/chosen": 0.3161543905735016, "rewards/margins": 2.859160900115967, "rewards/rejected": -2.543006181716919, "step": 24230 }, { "epoch": 0.97, "learning_rate": 1.4062558219130052e-08, "logits/chosen": -3.020634174346924, "logits/rejected": -3.0441389083862305, "logps/chosen": -7.597140312194824, "logps/rejected": -281.4622497558594, "loss": 0.1333, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24420881271362305, "rewards/margins": 2.7016072273254395, "rewards/rejected": -2.4573986530303955, "step": 24240 }, { "epoch": 0.97, "learning_rate": 1.3695261579316776e-08, "logits/chosen": -3.030100107192993, "logits/rejected": -3.054352045059204, "logps/chosen": -6.130160331726074, "logps/rejected": -283.243408203125, "loss": 0.1155, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2558888792991638, "rewards/margins": 2.7346537113189697, "rewards/rejected": -2.478764772415161, "step": 24250 }, { "epoch": 0.97, "learning_rate": 1.3332812117814731e-08, "logits/chosen": -3.008725881576538, "logits/rejected": -3.0379364490509033, "logps/chosen": -3.972182035446167, "logps/rejected": -286.58984375, "loss": 0.0925, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27544787526130676, "rewards/margins": 2.7895545959472656, "rewards/rejected": -2.5141069889068604, "step": 24260 }, { "epoch": 0.97, "learning_rate": 1.2975210541238403e-08, "logits/chosen": -3.014378070831299, "logits/rejected": -3.043705463409424, "logps/chosen": -5.804856300354004, "logps/rejected": -282.3077087402344, "loss": 0.108, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2576860189437866, "rewards/margins": 2.730886936187744, "rewards/rejected": -2.473200798034668, "step": 24270 }, { "epoch": 0.97, "learning_rate": 1.2622457546749567e-08, "logits/chosen": -3.024972915649414, "logits/rejected": -3.052522659301758, "logps/chosen": -6.692194938659668, "logps/rejected": -282.1401672363281, "loss": 0.122, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24963326752185822, "rewards/margins": 2.7175559997558594, "rewards/rejected": -2.4679226875305176, "step": 24280 }, { "epoch": 0.97, "learning_rate": 1.2274553822058944e-08, "logits/chosen": -3.0087459087371826, "logits/rejected": -3.0393612384796143, "logps/chosen": -3.7301433086395264, "logps/rejected": -288.29998779296875, "loss": 0.0904, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28194400668144226, "rewards/margins": 2.8082056045532227, "rewards/rejected": -2.526261806488037, "step": 24290 }, { "epoch": 0.97, "learning_rate": 1.193150004542204e-08, "logits/chosen": -3.0332908630371094, "logits/rejected": -3.0617759227752686, "logps/chosen": -6.187159061431885, "logps/rejected": -283.59295654296875, "loss": 0.1155, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.2548387348651886, "rewards/margins": 2.738236904144287, "rewards/rejected": -2.4833984375, "step": 24300 }, { "epoch": 0.97, "eval_logits/chosen": -3.071225881576538, "eval_logits/rejected": -3.0970869064331055, "eval_logps/chosen": -0.16518676280975342, "eval_logps/rejected": -284.4146728515625, "eval_loss": 0.06029857322573662, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3149476647377014, "eval_rewards/margins": 2.798938512802124, "eval_rewards/rejected": -2.4839909076690674, "eval_runtime": 2.5407, "eval_samples_per_second": 1.968, "eval_steps_per_second": 0.394, "step": 24300 }, { "epoch": 0.97, "learning_rate": 1.1593296885640259e-08, "logits/chosen": -3.01824688911438, "logits/rejected": -3.045703411102295, "logps/chosen": -9.625073432922363, "logps/rejected": -278.08453369140625, "loss": 0.149, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.22160100936889648, "rewards/margins": 2.6524059772491455, "rewards/rejected": -2.430804967880249, "step": 24310 }, { "epoch": 0.97, "learning_rate": 1.125994500205757e-08, "logits/chosen": -3.033712148666382, "logits/rejected": -3.062299966812134, "logps/chosen": -7.067903995513916, "logps/rejected": -281.67510986328125, "loss": 0.1243, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24711230397224426, "rewards/margins": 2.707221508026123, "rewards/rejected": -2.4601094722747803, "step": 24320 }, { "epoch": 0.97, "learning_rate": 1.0931445044560784e-08, "logits/chosen": -3.009427070617676, "logits/rejected": -3.041133165359497, "logps/chosen": -2.320488452911377, "logps/rejected": -287.45819091796875, "loss": 0.0647, "rewards/accuracies": 1.0, "rewards/chosen": 0.29370665550231934, "rewards/margins": 2.8148655891418457, "rewards/rejected": -2.5211589336395264, "step": 24330 }, { "epoch": 0.97, "learning_rate": 1.0607797653577333e-08, "logits/chosen": -3.040856122970581, "logits/rejected": -3.070539951324463, "logps/chosen": -0.2183413803577423, "logps/rejected": -290.33343505859375, "loss": 0.0558, "rewards/accuracies": 1.0, "rewards/chosen": 0.31189680099487305, "rewards/margins": 2.866222381591797, "rewards/rejected": -2.554325580596924, "step": 24340 }, { "epoch": 0.97, "learning_rate": 1.0289003460074165e-08, "logits/chosen": -3.0032782554626465, "logits/rejected": -3.029379367828369, "logps/chosen": -9.976242065429688, "logps/rejected": -278.94677734375, "loss": 0.1543, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.2165077179670334, "rewards/margins": 2.6564624309539795, "rewards/rejected": -2.4399547576904297, "step": 24350 }, { "epoch": 0.97, "learning_rate": 9.97506308555718e-09, "logits/chosen": -3.025629758834839, "logits/rejected": -3.055903196334839, "logps/chosen": -3.8694961071014404, "logps/rejected": -288.33642578125, "loss": 0.091, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2787528932094574, "rewards/margins": 2.808987617492676, "rewards/rejected": -2.5302348136901855, "step": 24360 }, { "epoch": 0.97, "learning_rate": 9.665977142068738e-09, "logits/chosen": -3.012220621109009, "logits/rejected": -3.041438579559326, "logps/chosen": -6.0847954750061035, "logps/rejected": -283.7989807128906, "loss": 0.1129, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.25360286235809326, "rewards/margins": 2.7436182498931885, "rewards/rejected": -2.4900150299072266, "step": 24370 }, { "epoch": 0.98, "learning_rate": 9.361746232188496e-09, "logits/chosen": -3.020205020904541, "logits/rejected": -3.0505576133728027, "logps/chosen": -0.1634991317987442, "logps/rejected": -292.36383056640625, "loss": 0.0545, "rewards/accuracies": 1.0, "rewards/chosen": 0.3147975504398346, "rewards/margins": 2.8861303329467773, "rewards/rejected": -2.5713329315185547, "step": 24380 }, { "epoch": 0.98, "learning_rate": 9.062370949029231e-09, "logits/chosen": -3.0157647132873535, "logits/rejected": -3.0463244915008545, "logps/chosen": -2.5483322143554688, "logps/rejected": -288.40924072265625, "loss": 0.0774, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.29157930612564087, "rewards/margins": 2.8193976879119873, "rewards/rejected": -2.5278186798095703, "step": 24390 }, { "epoch": 0.98, "learning_rate": 8.767851876239075e-09, "logits/chosen": -3.005762815475464, "logits/rejected": -3.036912202835083, "logps/chosen": -0.1690901815891266, "logps/rejected": -291.904541015625, "loss": 0.0549, "rewards/accuracies": 1.0, "rewards/chosen": 0.31663936376571655, "rewards/margins": 2.8806023597717285, "rewards/rejected": -2.563962697982788, "step": 24400 }, { "epoch": 0.98, "eval_logits/chosen": -3.070141077041626, "eval_logits/rejected": -3.0956385135650635, "eval_logps/chosen": -0.16539230942726135, "eval_logps/rejected": -284.23980712890625, "eval_loss": 0.060412902384996414, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31494560837745667, "eval_rewards/margins": 2.7971882820129395, "eval_rewards/rejected": -2.4822423458099365, "eval_runtime": 2.54, "eval_samples_per_second": 1.969, "eval_steps_per_second": 0.394, "step": 24400 }, { "epoch": 0.98, "learning_rate": 8.478189587997898e-09, "logits/chosen": -2.989332914352417, "logits/rejected": -3.023707628250122, "logps/chosen": -0.19294360280036926, "logps/rejected": -289.6800231933594, "loss": 0.0563, "rewards/accuracies": 1.0, "rewards/chosen": 0.315995991230011, "rewards/margins": 2.8610854148864746, "rewards/rejected": -2.5450897216796875, "step": 24410 }, { "epoch": 0.98, "learning_rate": 8.193384649017033e-09, "logits/chosen": -3.0079691410064697, "logits/rejected": -3.0362675189971924, "logps/chosen": -5.880807399749756, "logps/rejected": -283.3115234375, "loss": 0.1127, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.25872793793678284, "rewards/margins": 2.733865737915039, "rewards/rejected": -2.475137948989868, "step": 24420 }, { "epoch": 0.98, "learning_rate": 7.913437614538166e-09, "logits/chosen": -3.038883686065674, "logits/rejected": -3.0654072761535645, "logps/chosen": -3.5481178760528564, "logps/rejected": -287.5367431640625, "loss": 0.0891, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28187692165374756, "rewards/margins": 2.802769184112549, "rewards/rejected": -2.52089262008667, "step": 24430 }, { "epoch": 0.98, "learning_rate": 7.638349030332504e-09, "logits/chosen": -3.033822774887085, "logits/rejected": -3.0630404949188232, "logps/chosen": -6.904820442199707, "logps/rejected": -283.6285705566406, "loss": 0.1094, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.24824097752571106, "rewards/margins": 2.7311525344848633, "rewards/rejected": -2.4829115867614746, "step": 24440 }, { "epoch": 0.98, "learning_rate": 7.368119432699383e-09, "logits/chosen": -3.0016884803771973, "logits/rejected": -3.0328831672668457, "logps/chosen": -3.348400831222534, "logps/rejected": -286.23699951171875, "loss": 0.0876, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.280203640460968, "rewards/margins": 2.7943413257598877, "rewards/rejected": -2.5141375064849854, "step": 24450 }, { "epoch": 0.98, "learning_rate": 7.102749348465166e-09, "logits/chosen": -3.0213351249694824, "logits/rejected": -3.051905870437622, "logps/chosen": -0.3246050179004669, "logps/rejected": -290.34210205078125, "loss": 0.0561, "rewards/accuracies": 1.0, "rewards/chosen": 0.31273922324180603, "rewards/margins": 2.8633720874786377, "rewards/rejected": -2.5506327152252197, "step": 24460 }, { "epoch": 0.98, "learning_rate": 6.8422392949824005e-09, "logits/chosen": -3.021573543548584, "logits/rejected": -3.0496959686279297, "logps/chosen": -3.75740122795105, "logps/rejected": -286.23504638671875, "loss": 0.0916, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27955299615859985, "rewards/margins": 2.790409564971924, "rewards/rejected": -2.5108566284179688, "step": 24470 }, { "epoch": 0.98, "learning_rate": 6.586589780128716e-09, "logits/chosen": -3.0450663566589355, "logits/rejected": -3.0722286701202393, "logps/chosen": -3.867155075073242, "logps/rejected": -288.4808654785156, "loss": 0.0906, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2781871557235718, "rewards/margins": 2.812818765640259, "rewards/rejected": -2.5346314907073975, "step": 24480 }, { "epoch": 0.98, "learning_rate": 6.3358013023062656e-09, "logits/chosen": -3.0125889778137207, "logits/rejected": -3.041591167449951, "logps/chosen": -10.50626277923584, "logps/rejected": -280.5230407714844, "loss": 0.1578, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.21187350153923035, "rewards/margins": 2.6670022010803223, "rewards/rejected": -2.4551289081573486, "step": 24490 }, { "epoch": 0.98, "learning_rate": 6.089874350439507e-09, "logits/chosen": -3.008206844329834, "logits/rejected": -3.0378220081329346, "logps/chosen": -0.20001789927482605, "logps/rejected": -290.90972900390625, "loss": 0.0556, "rewards/accuracies": 1.0, "rewards/chosen": 0.3165794014930725, "rewards/margins": 2.869096517562866, "rewards/rejected": -2.5525169372558594, "step": 24500 }, { "epoch": 0.98, "eval_logits/chosen": -3.0698800086975098, "eval_logits/rejected": -3.095959186553955, "eval_logps/chosen": -0.15687134861946106, "eval_logps/rejected": -284.55633544921875, "eval_loss": 0.060204457491636276, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3150308132171631, "eval_rewards/margins": 2.800438404083252, "eval_rewards/rejected": -2.485407590866089, "eval_runtime": 2.5393, "eval_samples_per_second": 1.969, "eval_steps_per_second": 0.394, "step": 24500 }, { "epoch": 0.98, "learning_rate": 5.848809403975752e-09, "logits/chosen": -3.019421339035034, "logits/rejected": -3.0505871772766113, "logps/chosen": -0.189042329788208, "logps/rejected": -293.1136779785156, "loss": 0.054, "rewards/accuracies": 1.0, "rewards/chosen": 0.31270796060562134, "rewards/margins": 2.895188331604004, "rewards/rejected": -2.5824801921844482, "step": 24510 }, { "epoch": 0.98, "learning_rate": 5.612606932883513e-09, "logits/chosen": -3.0276503562927246, "logits/rejected": -3.0535523891448975, "logps/chosen": -10.726096153259277, "logps/rejected": -274.9632568359375, "loss": 0.1576, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.20915517210960388, "rewards/margins": 2.6037237644195557, "rewards/rejected": -2.39456844329834, "step": 24520 }, { "epoch": 0.98, "learning_rate": 5.381267397651935e-09, "logits/chosen": -3.0241456031799316, "logits/rejected": -3.054375648498535, "logps/chosen": -0.22589346766471863, "logps/rejected": -290.86358642578125, "loss": 0.0554, "rewards/accuracies": 1.0, "rewards/chosen": 0.31306254863739014, "rewards/margins": 2.8727049827575684, "rewards/rejected": -2.5596423149108887, "step": 24530 }, { "epoch": 0.98, "learning_rate": 5.15479124928886e-09, "logits/chosen": -3.036520004272461, "logits/rejected": -3.0641493797302246, "logps/chosen": -6.46173095703125, "logps/rejected": -281.01495361328125, "loss": 0.1205, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.25225144624710083, "rewards/margins": 2.7084038257598877, "rewards/rejected": -2.4561524391174316, "step": 24540 }, { "epoch": 0.98, "learning_rate": 4.933178929321103e-09, "logits/chosen": -3.0135133266448975, "logits/rejected": -3.045367479324341, "logps/chosen": -3.660465955734253, "logps/rejected": -289.0694580078125, "loss": 0.0888, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27808964252471924, "rewards/margins": 2.818444013595581, "rewards/rejected": -2.5403544902801514, "step": 24550 }, { "epoch": 0.98, "learning_rate": 4.7164308697933425e-09, "logits/chosen": -3.0086798667907715, "logits/rejected": -3.036935567855835, "logps/chosen": -3.794483184814453, "logps/rejected": -287.66119384765625, "loss": 0.0913, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2802538275718689, "rewards/margins": 2.795341968536377, "rewards/rejected": -2.5150880813598633, "step": 24560 }, { "epoch": 0.98, "learning_rate": 4.504547493267286e-09, "logits/chosen": -3.0032808780670166, "logits/rejected": -3.033142566680908, "logps/chosen": -3.0696346759796143, "logps/rejected": -288.0691833496094, "loss": 0.0845, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2862509787082672, "rewards/margins": 2.8145675659179688, "rewards/rejected": -2.5283169746398926, "step": 24570 }, { "epoch": 0.98, "learning_rate": 4.297529212820006e-09, "logits/chosen": -3.012538194656372, "logits/rejected": -3.041560173034668, "logps/chosen": -3.391829013824463, "logps/rejected": -286.13970947265625, "loss": 0.088, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2844642698764801, "rewards/margins": 2.7866883277893066, "rewards/rejected": -2.5022244453430176, "step": 24580 }, { "epoch": 0.98, "learning_rate": 4.095376432044218e-09, "logits/chosen": -3.0001511573791504, "logits/rejected": -3.0321013927459717, "logps/chosen": -0.16866037249565125, "logps/rejected": -289.786865234375, "loss": 0.0564, "rewards/accuracies": 1.0, "rewards/chosen": 0.31280219554901123, "rewards/margins": 2.8569586277008057, "rewards/rejected": -2.544156551361084, "step": 24590 }, { "epoch": 0.98, "learning_rate": 3.8980895450474455e-09, "logits/chosen": -3.0259780883789062, "logits/rejected": -3.0562615394592285, "logps/chosen": -1.9563184976577759, "logps/rejected": -286.6620178222656, "loss": 0.0693, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2949569821357727, "rewards/margins": 2.810814380645752, "rewards/rejected": -2.515857458114624, "step": 24600 }, { "epoch": 0.98, "eval_logits/chosen": -3.071052074432373, "eval_logits/rejected": -3.096306085586548, "eval_logps/chosen": -0.19074097275733948, "eval_logps/rejected": -284.3652038574219, "eval_loss": 0.060361482203006744, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3146921396255493, "eval_rewards/margins": 2.7981884479522705, "eval_rewards/rejected": -2.4834961891174316, "eval_runtime": 2.5536, "eval_samples_per_second": 1.958, "eval_steps_per_second": 0.392, "step": 24600 }, { "epoch": 0.98, "learning_rate": 3.7056689364503574e-09, "logits/chosen": -3.0271925926208496, "logits/rejected": -3.055426836013794, "logps/chosen": -0.3335789740085602, "logps/rejected": -290.7089538574219, "loss": 0.056, "rewards/accuracies": 1.0, "rewards/chosen": 0.3140459656715393, "rewards/margins": 2.8630948066711426, "rewards/rejected": -2.549049139022827, "step": 24610 }, { "epoch": 0.98, "learning_rate": 3.518114981387044e-09, "logits/chosen": -3.016493558883667, "logits/rejected": -3.0480141639709473, "logps/chosen": -0.22168810665607452, "logps/rejected": -290.01641845703125, "loss": 0.0562, "rewards/accuracies": 1.0, "rewards/chosen": 0.31400471925735474, "rewards/margins": 2.8608412742614746, "rewards/rejected": -2.546837091445923, "step": 24620 }, { "epoch": 0.99, "learning_rate": 3.3354280455030753e-09, "logits/chosen": -3.010680675506592, "logits/rejected": -3.038423776626587, "logps/chosen": -4.3172221183776855, "logps/rejected": -283.1442565917969, "loss": 0.1007, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27497345209121704, "rewards/margins": 2.749600648880005, "rewards/rejected": -2.4746270179748535, "step": 24630 }, { "epoch": 0.99, "learning_rate": 3.1576084849563315e-09, "logits/chosen": -3.0082263946533203, "logits/rejected": -3.0380120277404785, "logps/chosen": -1.3709821701049805, "logps/rejected": -288.3421325683594, "loss": 0.0633, "rewards/accuracies": 1.0, "rewards/chosen": 0.30382996797561646, "rewards/margins": 2.8329100608825684, "rewards/rejected": -2.5290801525115967, "step": 24640 }, { "epoch": 0.99, "learning_rate": 2.984656646415063e-09, "logits/chosen": -3.0155768394470215, "logits/rejected": -3.0443482398986816, "logps/chosen": -3.561309337615967, "logps/rejected": -287.3155212402344, "loss": 0.0907, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2823999524116516, "rewards/margins": 2.8011088371276855, "rewards/rejected": -2.5187089443206787, "step": 24650 }, { "epoch": 0.99, "learning_rate": 2.8165728670573324e-09, "logits/chosen": -3.0131735801696777, "logits/rejected": -3.043957233428955, "logps/chosen": -0.2830604016780853, "logps/rejected": -289.2966003417969, "loss": 0.0568, "rewards/accuracies": 1.0, "rewards/chosen": 0.31344881653785706, "rewards/margins": 2.8521289825439453, "rewards/rejected": -2.538680076599121, "step": 24660 }, { "epoch": 0.99, "learning_rate": 2.6533574745718493e-09, "logits/chosen": -3.029114246368408, "logits/rejected": -3.058124542236328, "logps/chosen": -4.570045471191406, "logps/rejected": -284.9844970703125, "loss": 0.0966, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.26957201957702637, "rewards/margins": 2.7652337551116943, "rewards/rejected": -2.495661973953247, "step": 24670 }, { "epoch": 0.99, "learning_rate": 2.495010787154917e-09, "logits/chosen": -3.002002239227295, "logits/rejected": -3.033501148223877, "logps/chosen": -0.19483426213264465, "logps/rejected": -290.68084716796875, "loss": 0.0558, "rewards/accuracies": 1.0, "rewards/chosen": 0.3147077262401581, "rewards/margins": 2.8665213584899902, "rewards/rejected": -2.5518131256103516, "step": 24680 }, { "epoch": 0.99, "learning_rate": 2.3415331135115404e-09, "logits/chosen": -3.04321026802063, "logits/rejected": -3.0701653957366943, "logps/chosen": -3.283355712890625, "logps/rejected": -285.36260986328125, "loss": 0.0861, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28575339913368225, "rewards/margins": 2.7853569984436035, "rewards/rejected": -2.499603509902954, "step": 24690 }, { "epoch": 0.99, "learning_rate": 2.192924752854042e-09, "logits/chosen": -3.01523756980896, "logits/rejected": -3.0454952716827393, "logps/chosen": -3.8311564922332764, "logps/rejected": -287.52288818359375, "loss": 0.0919, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2761283218860626, "rewards/margins": 2.7964320182800293, "rewards/rejected": -2.520303964614868, "step": 24700 }, { "epoch": 0.99, "eval_logits/chosen": -3.0709755420684814, "eval_logits/rejected": -3.0962467193603516, "eval_logps/chosen": -0.18936842679977417, "eval_logps/rejected": -284.3638916015625, "eval_loss": 0.060361914336681366, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31470584869384766, "eval_rewards/margins": 2.7981886863708496, "eval_rewards/rejected": -2.483483076095581, "eval_runtime": 2.5367, "eval_samples_per_second": 1.971, "eval_steps_per_second": 0.394, "step": 24700 }, { "epoch": 0.99, "learning_rate": 2.0491859949026136e-09, "logits/chosen": -3.038762331008911, "logits/rejected": -3.0650038719177246, "logps/chosen": -6.165562629699707, "logps/rejected": -284.28936767578125, "loss": 0.1136, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.25711873173713684, "rewards/margins": 2.7469322681427, "rewards/rejected": -2.4898135662078857, "step": 24710 }, { "epoch": 0.99, "learning_rate": 1.910317119882821e-09, "logits/chosen": -3.025937080383301, "logits/rejected": -3.054873466491699, "logps/chosen": -7.469228267669678, "logps/rejected": -283.995361328125, "loss": 0.1274, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24293556809425354, "rewards/margins": 2.728241443634033, "rewards/rejected": -2.4853057861328125, "step": 24720 }, { "epoch": 0.99, "learning_rate": 1.7763183985269882e-09, "logits/chosen": -3.0166609287261963, "logits/rejected": -3.046797513961792, "logps/chosen": -0.3172820210456848, "logps/rejected": -288.11016845703125, "loss": 0.0614, "rewards/accuracies": 1.0, "rewards/chosen": 0.31176677346229553, "rewards/margins": 2.8406007289886475, "rewards/rejected": -2.528834104537964, "step": 24730 }, { "epoch": 0.99, "learning_rate": 1.647190092071982e-09, "logits/chosen": -3.0092999935150146, "logits/rejected": -3.041449546813965, "logps/chosen": -7.5297088623046875, "logps/rejected": -284.2057189941406, "loss": 0.1274, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24460884928703308, "rewards/margins": 2.7328107357025146, "rewards/rejected": -2.48820161819458, "step": 24740 }, { "epoch": 0.99, "learning_rate": 1.5229324522605949e-09, "logits/chosen": -3.030717372894287, "logits/rejected": -3.0580432415008545, "logps/chosen": -0.2108333557844162, "logps/rejected": -292.31817626953125, "loss": 0.0548, "rewards/accuracies": 1.0, "rewards/chosen": 0.31503695249557495, "rewards/margins": 2.8810346126556396, "rewards/rejected": -2.56599760055542, "step": 24750 }, { "epoch": 0.99, "learning_rate": 1.4035457213393278e-09, "logits/chosen": -3.014569044113159, "logits/rejected": -3.0463106632232666, "logps/chosen": -0.22349724173545837, "logps/rejected": -290.112060546875, "loss": 0.0563, "rewards/accuracies": 1.0, "rewards/chosen": 0.3154355585575104, "rewards/margins": 2.8585586547851562, "rewards/rejected": -2.543123245239258, "step": 24760 }, { "epoch": 0.99, "learning_rate": 1.2890301320583887e-09, "logits/chosen": -3.008496046066284, "logits/rejected": -3.0379157066345215, "logps/chosen": -3.2630927562713623, "logps/rejected": -288.3923645019531, "loss": 0.0856, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2828245460987091, "rewards/margins": 2.8157620429992676, "rewards/rejected": -2.532937526702881, "step": 24770 }, { "epoch": 0.99, "learning_rate": 1.179385907672248e-09, "logits/chosen": -3.020078182220459, "logits/rejected": -3.051640272140503, "logps/chosen": -3.1554338932037354, "logps/rejected": -287.3190002441406, "loss": 0.085, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28482356667518616, "rewards/margins": 2.8102481365203857, "rewards/rejected": -2.5254247188568115, "step": 24780 }, { "epoch": 0.99, "learning_rate": 1.0746132619374184e-09, "logits/chosen": -3.026099920272827, "logits/rejected": -3.055551052093506, "logps/chosen": -8.267836570739746, "logps/rejected": -278.7701721191406, "loss": 0.1369, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.23466603457927704, "rewards/margins": 2.668747901916504, "rewards/rejected": -2.43408203125, "step": 24790 }, { "epoch": 0.99, "learning_rate": 9.747123991141193e-10, "logits/chosen": -3.0155961513519287, "logits/rejected": -3.044996738433838, "logps/chosen": -3.840686082839966, "logps/rejected": -286.28631591796875, "loss": 0.0924, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27842551469802856, "rewards/margins": 2.787309408187866, "rewards/rejected": -2.5088839530944824, "step": 24800 }, { "epoch": 0.99, "eval_logits/chosen": -3.0707178115844727, "eval_logits/rejected": -3.096261978149414, "eval_logps/chosen": -0.18034979701042175, "eval_logps/rejected": -284.354736328125, "eval_loss": 0.06036118417978287, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3147960305213928, "eval_rewards/margins": 2.798187732696533, "eval_rewards/rejected": -2.483391761779785, "eval_runtime": 2.5417, "eval_samples_per_second": 1.967, "eval_steps_per_second": 0.393, "step": 24800 }, { "epoch": 0.99, "learning_rate": 8.796835139637805e-10, "logits/chosen": -3.033536911010742, "logits/rejected": -3.0602128505706787, "logps/chosen": -9.314623832702637, "logps/rejected": -280.17120361328125, "loss": 0.147, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.22418665885925293, "rewards/margins": 2.673352003097534, "rewards/rejected": -2.4491653442382812, "step": 24810 }, { "epoch": 0.99, "learning_rate": 7.895267917501503e-10, "logits/chosen": -3.0181479454040527, "logits/rejected": -3.049238920211792, "logps/chosen": -2.8749983310699463, "logps/rejected": -287.9979553222656, "loss": 0.0813, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2864297032356262, "rewards/margins": 2.8120617866516113, "rewards/rejected": -2.52563214302063, "step": 24820 }, { "epoch": 0.99, "learning_rate": 7.042424082381871e-10, "logits/chosen": -3.013343572616577, "logits/rejected": -3.0452892780303955, "logps/chosen": -3.928760528564453, "logps/rejected": -282.98516845703125, "loss": 0.0962, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27453410625457764, "rewards/margins": 2.7545406818389893, "rewards/rejected": -2.480006694793701, "step": 24830 }, { "epoch": 0.99, "learning_rate": 6.238305296946135e-10, "logits/chosen": -3.0007715225219727, "logits/rejected": -3.030129909515381, "logps/chosen": -0.20545120537281036, "logps/rejected": -291.1752624511719, "loss": 0.0553, "rewards/accuracies": 1.0, "rewards/chosen": 0.3144310414791107, "rewards/margins": 2.8737878799438477, "rewards/rejected": -2.559356689453125, "step": 24840 }, { "epoch": 0.99, "learning_rate": 5.48291312886251e-10, "logits/chosen": -3.018866539001465, "logits/rejected": -3.0460453033447266, "logps/chosen": -3.599155902862549, "logps/rejected": -287.6173400878906, "loss": 0.0896, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2819989323616028, "rewards/margins": 2.8011374473571777, "rewards/rejected": -2.5191385746002197, "step": 24850 }, { "epoch": 0.99, "learning_rate": 4.77624905080576e-10, "logits/chosen": -3.02131986618042, "logits/rejected": -3.0532021522521973, "logps/chosen": -2.1883013248443604, "logps/rejected": -287.5423278808594, "loss": 0.0741, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2948647439479828, "rewards/margins": 2.813955545425415, "rewards/rejected": -2.5190906524658203, "step": 24860 }, { "epoch": 0.99, "learning_rate": 4.1183144404571827e-10, "logits/chosen": -3.0180583000183105, "logits/rejected": -3.0492770671844482, "logps/chosen": -0.1994672417640686, "logps/rejected": -291.7662658691406, "loss": 0.0552, "rewards/accuracies": 1.0, "rewards/chosen": 0.3163365125656128, "rewards/margins": 2.875988721847534, "rewards/rejected": -2.559652328491211, "step": 24870 }, { "epoch": 1.0, "learning_rate": 3.509110580490749e-10, "logits/chosen": -3.0228047370910645, "logits/rejected": -3.0543053150177, "logps/chosen": -0.20584002137184143, "logps/rejected": -289.62860107421875, "loss": 0.0563, "rewards/accuracies": 1.0, "rewards/chosen": 0.3119796812534332, "rewards/margins": 2.859710931777954, "rewards/rejected": -2.5477311611175537, "step": 24880 }, { "epoch": 1.0, "learning_rate": 2.9486386585786395e-10, "logits/chosen": -3.0135016441345215, "logits/rejected": -3.0446245670318604, "logps/chosen": -0.20960617065429688, "logps/rejected": -290.2677917480469, "loss": 0.0561, "rewards/accuracies": 1.0, "rewards/chosen": 0.317573219537735, "rewards/margins": 2.8619954586029053, "rewards/rejected": -2.5444223880767822, "step": 24890 }, { "epoch": 1.0, "learning_rate": 2.43689976739403e-10, "logits/chosen": -3.0203018188476562, "logits/rejected": -3.0493016242980957, "logps/chosen": -0.27259624004364014, "logps/rejected": -288.28997802734375, "loss": 0.0575, "rewards/accuracies": 1.0, "rewards/chosen": 0.3136906325817108, "rewards/margins": 2.841336727142334, "rewards/rejected": -2.527646064758301, "step": 24900 }, { "epoch": 1.0, "eval_logits/chosen": -3.071237087249756, "eval_logits/rejected": -3.097256660461426, "eval_logps/chosen": -0.20153093338012695, "eval_logps/rejected": -284.42596435546875, "eval_loss": 0.06030454486608505, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.31458422541618347, "eval_rewards/margins": 2.7986879348754883, "eval_rewards/rejected": -2.4841036796569824, "eval_runtime": 2.538, "eval_samples_per_second": 1.97, "eval_steps_per_second": 0.394, "step": 24900 }, { "epoch": 1.0, "learning_rate": 1.9738949045972068e-10, "logits/chosen": -3.03068208694458, "logits/rejected": -3.0592963695526123, "logps/chosen": -6.509430885314941, "logps/rejected": -283.18865966796875, "loss": 0.12, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24976953864097595, "rewards/margins": 2.7294259071350098, "rewards/rejected": -2.479656457901001, "step": 24910 }, { "epoch": 1.0, "learning_rate": 1.5596249728383473e-10, "logits/chosen": -3.0316367149353027, "logits/rejected": -3.0613961219787598, "logps/chosen": -1.804356336593628, "logps/rejected": -288.2460021972656, "loss": 0.073, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.3005014657974243, "rewards/margins": 2.8233401775360107, "rewards/rejected": -2.522839069366455, "step": 24920 }, { "epoch": 1.0, "learning_rate": 1.1940907797575175e-10, "logits/chosen": -3.0271477699279785, "logits/rejected": -3.057917833328247, "logps/chosen": -3.8781192302703857, "logps/rejected": -285.3290100097656, "loss": 0.0937, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.27732494473457336, "rewards/margins": 2.77378511428833, "rewards/rejected": -2.496460199356079, "step": 24930 }, { "epoch": 1.0, "learning_rate": 8.772930379846723e-11, "logits/chosen": -3.0504353046417236, "logits/rejected": -3.0754547119140625, "logps/chosen": -6.948870658874512, "logps/rejected": -283.76556396484375, "loss": 0.1222, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.25121966004371643, "rewards/margins": 2.734142541885376, "rewards/rejected": -2.4829230308532715, "step": 24940 }, { "epoch": 1.0, "learning_rate": 6.092323651313293e-11, "logits/chosen": -3.035039186477661, "logits/rejected": -3.059028148651123, "logps/chosen": -8.155508041381836, "logps/rejected": -280.165771484375, "loss": 0.1335, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.23548361659049988, "rewards/margins": 2.683298110961914, "rewards/rejected": -2.447814464569092, "step": 24950 }, { "epoch": 1.0, "learning_rate": 3.899092837933438e-11, "logits/chosen": -3.049076557159424, "logits/rejected": -3.0769550800323486, "logps/chosen": -0.228045254945755, "logps/rejected": -291.6459045410156, "loss": 0.0554, "rewards/accuracies": 1.0, "rewards/chosen": 0.31369349360466003, "rewards/margins": 2.8748767375946045, "rewards/rejected": -2.561182975769043, "step": 24960 }, { "epoch": 1.0, "learning_rate": 2.1932422155923618e-11, "logits/chosen": -3.025184154510498, "logits/rejected": -3.0540833473205566, "logps/chosen": -7.051326751708984, "logps/rejected": -283.4236755371094, "loss": 0.1235, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.24811092019081116, "rewards/margins": 2.7264037132263184, "rewards/rejected": -2.478292942047119, "step": 24970 }, { "epoch": 1.0, "learning_rate": 9.747751098521107e-12, "logits/chosen": -3.0071215629577637, "logits/rejected": -3.0389745235443115, "logps/chosen": -0.2545509934425354, "logps/rejected": -292.2900390625, "loss": 0.0545, "rewards/accuracies": 1.0, "rewards/chosen": 0.31492769718170166, "rewards/margins": 2.888066530227661, "rewards/rejected": -2.57313871383667, "step": 24980 }, { "epoch": 1.0, "learning_rate": 2.4369389622913575e-12, "logits/chosen": -3.056591510772705, "logits/rejected": -3.079812526702881, "logps/chosen": -6.634778022766113, "logps/rejected": -283.67340087890625, "loss": 0.1207, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.25196316838264465, "rewards/margins": 2.7314858436584473, "rewards/rejected": -2.479522228240967, "step": 24990 }, { "epoch": 1.0, "learning_rate": 0.0, "logits/chosen": -2.9939842224121094, "logits/rejected": -3.0244998931884766, "logps/chosen": -3.4739856719970703, "logps/rejected": -285.0193786621094, "loss": 0.0884, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2809720039367676, "rewards/margins": 2.7770934104919434, "rewards/rejected": -2.4961211681365967, "step": 25000 }, { "epoch": 1.0, "eval_logits/chosen": -3.071336030960083, "eval_logits/rejected": -3.097411870956421, "eval_logps/chosen": -0.15935298800468445, "eval_logps/rejected": -284.3338623046875, "eval_loss": 0.06034620478749275, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 0.3150060176849365, "eval_rewards/margins": 2.7981886863708496, "eval_rewards/rejected": -2.483182668685913, "eval_runtime": 2.5389, "eval_samples_per_second": 1.969, "eval_steps_per_second": 0.394, "step": 25000 }, { "epoch": 1.0, "step": 25000, "total_flos": 0.0, "train_loss": 0.10413682895421982, "train_runtime": 142559.6668, "train_samples_per_second": 1.403, "train_steps_per_second": 0.175 } ], "logging_steps": 10, "max_steps": 25000, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }