{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 100, "global_step": 2942, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.694915254237288e-09, "logits/chosen": -1.5211243629455566, "logits/rejected": -0.9348576664924622, "logps/chosen": -412.05706787109375, "logps/rejected": -913.2714233398438, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.0, "learning_rate": 1.6949152542372882e-08, "logits/chosen": -1.4827719926834106, "logits/rejected": -1.226508378982544, "logps/chosen": -679.3842163085938, "logps/rejected": -639.005126953125, "loss": 0.8262, "rewards/accuracies": 0.4861111044883728, "rewards/chosen": 0.20207053422927856, "rewards/margins": 0.28480756282806396, "rewards/rejected": -0.0827370211482048, "step": 10 }, { "epoch": 0.01, "learning_rate": 3.3898305084745764e-08, "logits/chosen": -1.4881559610366821, "logits/rejected": -1.2070544958114624, "logps/chosen": -392.80548095703125, "logps/rejected": -549.167724609375, "loss": 0.8207, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": 0.09954075515270233, "rewards/margins": -0.08116824924945831, "rewards/rejected": 0.18070900440216064, "step": 20 }, { "epoch": 0.01, "learning_rate": 5.0847457627118645e-08, "logits/chosen": -1.455928921699524, "logits/rejected": -1.218510389328003, "logps/chosen": -549.7676391601562, "logps/rejected": -525.0243530273438, "loss": 0.8307, "rewards/accuracies": 0.5625, "rewards/chosen": 0.05704854801297188, "rewards/margins": 0.16263702511787415, "rewards/rejected": -0.10558845847845078, "step": 30 }, { "epoch": 0.01, "learning_rate": 6.779661016949153e-08, "logits/chosen": -1.4766838550567627, "logits/rejected": -1.218590259552002, "logps/chosen": -411.13653564453125, "logps/rejected": -574.4963989257812, "loss": 0.7857, "rewards/accuracies": 0.375, "rewards/chosen": -0.10409893095493317, "rewards/margins": -0.21830201148986816, "rewards/rejected": 0.11420309543609619, "step": 40 }, { "epoch": 0.02, "learning_rate": 8.47457627118644e-08, "logits/chosen": -1.5140564441680908, "logits/rejected": -1.1615564823150635, "logps/chosen": -362.17059326171875, "logps/rejected": -673.89013671875, "loss": 0.8045, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.031153270974755287, "rewards/margins": 0.03609558939933777, "rewards/rejected": -0.00494231004267931, "step": 50 }, { "epoch": 0.02, "learning_rate": 1.0169491525423729e-07, "logits/chosen": -1.498203992843628, "logits/rejected": -1.232889175415039, "logps/chosen": -459.11163330078125, "logps/rejected": -447.8902282714844, "loss": 0.7617, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.3170378804206848, "rewards/margins": 0.3453710079193115, "rewards/rejected": -0.028333133086562157, "step": 60 }, { "epoch": 0.02, "learning_rate": 1.1864406779661017e-07, "logits/chosen": -1.495025396347046, "logits/rejected": -1.215308427810669, "logps/chosen": -423.4064025878906, "logps/rejected": -605.0032958984375, "loss": 0.7105, "rewards/accuracies": 0.6875, "rewards/chosen": 0.3061259090900421, "rewards/margins": 0.37270691990852356, "rewards/rejected": -0.06658102571964264, "step": 70 }, { "epoch": 0.03, "learning_rate": 1.3559322033898305e-07, "logits/chosen": -1.475776195526123, "logits/rejected": -1.1816449165344238, "logps/chosen": -586.6575927734375, "logps/rejected": -481.2361755371094, "loss": 0.706, "rewards/accuracies": 0.625, "rewards/chosen": 0.3719860911369324, "rewards/margins": 0.38908010721206665, "rewards/rejected": -0.017094042152166367, "step": 80 }, { "epoch": 0.03, "learning_rate": 1.5254237288135593e-07, "logits/chosen": -1.5008628368377686, "logits/rejected": -1.2657488584518433, "logps/chosen": -372.3196105957031, "logps/rejected": -367.21673583984375, "loss": 0.6233, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.44139355421066284, "rewards/margins": 0.43834584951400757, "rewards/rejected": 0.0030477314721792936, "step": 90 }, { "epoch": 0.03, "learning_rate": 1.694915254237288e-07, "logits/chosen": -1.5043809413909912, "logits/rejected": -1.163338303565979, "logps/chosen": -347.18408203125, "logps/rejected": -516.4083862304688, "loss": 0.618, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.5171124935150146, "rewards/margins": 0.7477121353149414, "rewards/rejected": -0.23059968650341034, "step": 100 }, { "epoch": 0.03, "eval_logits/chosen": -1.4878125190734863, "eval_logits/rejected": -1.1894134283065796, "eval_logps/chosen": -412.9344482421875, "eval_logps/rejected": -560.655029296875, "eval_loss": 0.5642263293266296, "eval_rewards/accuracies": 0.7424242496490479, "eval_rewards/chosen": 0.69883131980896, "eval_rewards/margins": 0.8126964569091797, "eval_rewards/rejected": -0.11386506259441376, "eval_runtime": 556.7476, "eval_samples_per_second": 17.063, "eval_steps_per_second": 0.533, "step": 100 }, { "epoch": 0.04, "learning_rate": 1.8644067796610168e-07, "logits/chosen": -1.4966198205947876, "logits/rejected": -1.1994943618774414, "logps/chosen": -360.8127746582031, "logps/rejected": -802.7747802734375, "loss": 0.5552, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.7751646637916565, "rewards/margins": 1.0216423273086548, "rewards/rejected": -0.24647776782512665, "step": 110 }, { "epoch": 0.04, "learning_rate": 2.0338983050847458e-07, "logits/chosen": -1.5263328552246094, "logits/rejected": -1.2719924449920654, "logps/chosen": -380.39715576171875, "logps/rejected": -544.8963012695312, "loss": 0.4977, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": 1.0511916875839233, "rewards/margins": 0.912127673625946, "rewards/rejected": 0.13906405866146088, "step": 120 }, { "epoch": 0.04, "learning_rate": 2.2033898305084743e-07, "logits/chosen": -1.4926766157150269, "logits/rejected": -1.205890417098999, "logps/chosen": -433.191650390625, "logps/rejected": -580.9930419921875, "loss": 0.4889, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 1.003864049911499, "rewards/margins": 1.033372402191162, "rewards/rejected": -0.029508382081985474, "step": 130 }, { "epoch": 0.05, "learning_rate": 2.3728813559322033e-07, "logits/chosen": -1.4977641105651855, "logits/rejected": -1.2685011625289917, "logps/chosen": -319.09954833984375, "logps/rejected": -613.55859375, "loss": 0.4889, "rewards/accuracies": 0.762499988079071, "rewards/chosen": 0.8334357142448425, "rewards/margins": 0.8497712016105652, "rewards/rejected": -0.01633552275598049, "step": 140 }, { "epoch": 0.05, "learning_rate": 2.542372881355932e-07, "logits/chosen": -1.49599289894104, "logits/rejected": -1.2160688638687134, "logps/chosen": -361.3035583496094, "logps/rejected": -552.3671264648438, "loss": 0.4224, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 1.0968669652938843, "rewards/margins": 1.1340056657791138, "rewards/rejected": -0.037138573825359344, "step": 150 }, { "epoch": 0.05, "learning_rate": 2.711864406779661e-07, "logits/chosen": -1.4980968236923218, "logits/rejected": -1.2006438970565796, "logps/chosen": -340.8217468261719, "logps/rejected": -510.5523376464844, "loss": 0.466, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 1.201171875, "rewards/margins": 1.245226502418518, "rewards/rejected": -0.04405476525425911, "step": 160 }, { "epoch": 0.06, "learning_rate": 2.88135593220339e-07, "logits/chosen": -1.485167145729065, "logits/rejected": -1.1941057443618774, "logps/chosen": -447.4808654785156, "logps/rejected": -482.01336669921875, "loss": 0.3674, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 1.3460757732391357, "rewards/margins": 1.4081037044525146, "rewards/rejected": -0.06202799081802368, "step": 170 }, { "epoch": 0.06, "learning_rate": 3.0508474576271186e-07, "logits/chosen": -1.4714066982269287, "logits/rejected": -1.2212668657302856, "logps/chosen": -495.3789978027344, "logps/rejected": -627.9542236328125, "loss": 0.3855, "rewards/accuracies": 0.887499988079071, "rewards/chosen": 1.6778628826141357, "rewards/margins": 1.837794303894043, "rewards/rejected": -0.15993157029151917, "step": 180 }, { "epoch": 0.06, "learning_rate": 3.220338983050847e-07, "logits/chosen": -1.4885241985321045, "logits/rejected": -1.1983642578125, "logps/chosen": -357.9476623535156, "logps/rejected": -569.2054443359375, "loss": 0.3162, "rewards/accuracies": 0.862500011920929, "rewards/chosen": 1.981848955154419, "rewards/margins": 2.1885552406311035, "rewards/rejected": -0.2067060023546219, "step": 190 }, { "epoch": 0.07, "learning_rate": 3.389830508474576e-07, "logits/chosen": -1.4891068935394287, "logits/rejected": -1.140967607498169, "logps/chosen": -420.4295349121094, "logps/rejected": -437.64874267578125, "loss": 0.3539, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 1.6021934747695923, "rewards/margins": 1.6449912786483765, "rewards/rejected": -0.04279797524213791, "step": 200 }, { "epoch": 0.07, "eval_logits/chosen": -1.4799621105194092, "eval_logits/rejected": -1.1625027656555176, "eval_logps/chosen": -400.7641296386719, "eval_logps/rejected": -562.246337890625, "eval_loss": 0.31968235969543457, "eval_rewards/accuracies": 0.8846801519393921, "eval_rewards/chosen": 1.915861964225769, "eval_rewards/margins": 2.1888532638549805, "eval_rewards/rejected": -0.27299147844314575, "eval_runtime": 558.7533, "eval_samples_per_second": 17.002, "eval_steps_per_second": 0.532, "step": 200 }, { "epoch": 0.07, "learning_rate": 3.559322033898305e-07, "logits/chosen": -1.4898474216461182, "logits/rejected": -1.2634966373443604, "logps/chosen": -344.88134765625, "logps/rejected": -730.7076416015625, "loss": 0.3019, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 2.2538204193115234, "rewards/margins": 2.555640697479248, "rewards/rejected": -0.3018200993537903, "step": 210 }, { "epoch": 0.07, "learning_rate": 3.7288135593220336e-07, "logits/chosen": -1.4739089012145996, "logits/rejected": -1.2359154224395752, "logps/chosen": -474.7027282714844, "logps/rejected": -482.19598388671875, "loss": 0.2677, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 2.262795925140381, "rewards/margins": 2.4807448387145996, "rewards/rejected": -0.21794895827770233, "step": 220 }, { "epoch": 0.08, "learning_rate": 3.898305084745763e-07, "logits/chosen": -1.4778010845184326, "logits/rejected": -1.2181063890457153, "logps/chosen": -419.628662109375, "logps/rejected": -598.6785278320312, "loss": 0.267, "rewards/accuracies": 0.875, "rewards/chosen": 2.3457484245300293, "rewards/margins": 2.492673397064209, "rewards/rejected": -0.1469249576330185, "step": 230 }, { "epoch": 0.08, "learning_rate": 4.0677966101694916e-07, "logits/chosen": -1.4769701957702637, "logits/rejected": -1.1472581624984741, "logps/chosen": -401.890625, "logps/rejected": -701.8416748046875, "loss": 0.3046, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 2.2063190937042236, "rewards/margins": 2.501216173171997, "rewards/rejected": -0.2948969304561615, "step": 240 }, { "epoch": 0.08, "learning_rate": 4.23728813559322e-07, "logits/chosen": -1.4752933979034424, "logits/rejected": -1.1277306079864502, "logps/chosen": -344.5944519042969, "logps/rejected": -534.5394287109375, "loss": 0.2745, "rewards/accuracies": 0.9375, "rewards/chosen": 2.5138680934906006, "rewards/margins": 3.0067882537841797, "rewards/rejected": -0.4929198622703552, "step": 250 }, { "epoch": 0.09, "learning_rate": 4.4067796610169486e-07, "logits/chosen": -1.4643208980560303, "logits/rejected": -1.2705574035644531, "logps/chosen": -415.1036071777344, "logps/rejected": -591.1699829101562, "loss": 0.2602, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 2.2184274196624756, "rewards/margins": 2.6140334606170654, "rewards/rejected": -0.395606130361557, "step": 260 }, { "epoch": 0.09, "learning_rate": 4.576271186440678e-07, "logits/chosen": -1.4880424737930298, "logits/rejected": -1.1456931829452515, "logps/chosen": -367.011962890625, "logps/rejected": -408.5341491699219, "loss": 0.2313, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 2.4607489109039307, "rewards/margins": 2.9449193477630615, "rewards/rejected": -0.48417049646377563, "step": 270 }, { "epoch": 0.1, "learning_rate": 4.7457627118644066e-07, "logits/chosen": -1.4503757953643799, "logits/rejected": -1.0820204019546509, "logps/chosen": -341.2666320800781, "logps/rejected": -390.6230163574219, "loss": 0.2483, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 2.5250704288482666, "rewards/margins": 3.119588851928711, "rewards/rejected": -0.59451824426651, "step": 280 }, { "epoch": 0.1, "learning_rate": 4.915254237288136e-07, "logits/chosen": -1.4802117347717285, "logits/rejected": -1.1157623529434204, "logps/chosen": -311.9570007324219, "logps/rejected": -566.5151977539062, "loss": 0.2405, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 2.3611626625061035, "rewards/margins": 3.0045018196105957, "rewards/rejected": -0.6433390378952026, "step": 290 }, { "epoch": 0.1, "learning_rate": 4.990555345674349e-07, "logits/chosen": -1.483705997467041, "logits/rejected": -1.069526195526123, "logps/chosen": -341.3657531738281, "logps/rejected": -632.6183471679688, "loss": 0.2287, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 2.7662487030029297, "rewards/margins": 3.5426669120788574, "rewards/rejected": -0.7764180302619934, "step": 300 }, { "epoch": 0.1, "eval_logits/chosen": -1.464929223060608, "eval_logits/rejected": -1.1360561847686768, "eval_logps/chosen": -391.8653564453125, "eval_logps/rejected": -565.0551147460938, "eval_loss": 0.2127748280763626, "eval_rewards/accuracies": 0.9200336933135986, "eval_rewards/chosen": 2.805741310119629, "eval_rewards/margins": 3.3596181869506836, "eval_rewards/rejected": -0.5538769960403442, "eval_runtime": 557.7091, "eval_samples_per_second": 17.034, "eval_steps_per_second": 0.533, "step": 300 }, { "epoch": 0.11, "learning_rate": 4.971666037023044e-07, "logits/chosen": -1.478566288948059, "logits/rejected": -1.18798828125, "logps/chosen": -352.68511962890625, "logps/rejected": -547.9373168945312, "loss": 0.2133, "rewards/accuracies": 0.9375, "rewards/chosen": 2.948876142501831, "rewards/margins": 3.459970474243164, "rewards/rejected": -0.5110937356948853, "step": 310 }, { "epoch": 0.11, "learning_rate": 4.952776728371742e-07, "logits/chosen": -1.441450834274292, "logits/rejected": -1.1746580600738525, "logps/chosen": -548.0260009765625, "logps/rejected": -451.2164611816406, "loss": 0.2126, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 3.0513620376586914, "rewards/margins": 3.686058759689331, "rewards/rejected": -0.6346968412399292, "step": 320 }, { "epoch": 0.11, "learning_rate": 4.933887419720438e-07, "logits/chosen": -1.4612153768539429, "logits/rejected": -1.1157002449035645, "logps/chosen": -409.2095642089844, "logps/rejected": -628.3384399414062, "loss": 0.2312, "rewards/accuracies": 0.9375, "rewards/chosen": 3.231168031692505, "rewards/margins": 4.0572967529296875, "rewards/rejected": -0.826129138469696, "step": 330 }, { "epoch": 0.12, "learning_rate": 4.914998111069135e-07, "logits/chosen": -1.471062183380127, "logits/rejected": -1.1619117259979248, "logps/chosen": -319.6500549316406, "logps/rejected": -560.5769653320312, "loss": 0.1976, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 3.2418594360351562, "rewards/margins": 3.9750003814697266, "rewards/rejected": -0.733141303062439, "step": 340 }, { "epoch": 0.12, "learning_rate": 4.896108802417831e-07, "logits/chosen": -1.4680635929107666, "logits/rejected": -1.2122979164123535, "logps/chosen": -383.1250305175781, "logps/rejected": -619.9765625, "loss": 0.2053, "rewards/accuracies": 0.9375, "rewards/chosen": 3.1170341968536377, "rewards/margins": 3.632521152496338, "rewards/rejected": -0.5154868364334106, "step": 350 }, { "epoch": 0.12, "learning_rate": 4.877219493766528e-07, "logits/chosen": -1.480360746383667, "logits/rejected": -1.1829755306243896, "logps/chosen": -315.16925048828125, "logps/rejected": -440.88409423828125, "loss": 0.1615, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 3.1895642280578613, "rewards/margins": 4.29224967956543, "rewards/rejected": -1.1026861667633057, "step": 360 }, { "epoch": 0.13, "learning_rate": 4.858330185115224e-07, "logits/chosen": -1.4765106439590454, "logits/rejected": -1.180673360824585, "logps/chosen": -322.9911193847656, "logps/rejected": -827.7867431640625, "loss": 0.1879, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 3.0941760540008545, "rewards/margins": 3.847609043121338, "rewards/rejected": -0.7534326910972595, "step": 370 }, { "epoch": 0.13, "learning_rate": 4.839440876463921e-07, "logits/chosen": -1.4910002946853638, "logits/rejected": -1.1475781202316284, "logps/chosen": -359.06103515625, "logps/rejected": -703.1707153320312, "loss": 0.1842, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 3.6445469856262207, "rewards/margins": 4.668353080749512, "rewards/rejected": -1.023805856704712, "step": 380 }, { "epoch": 0.13, "learning_rate": 4.820551567812618e-07, "logits/chosen": -1.4512460231781006, "logits/rejected": -1.1814398765563965, "logps/chosen": -393.1922302246094, "logps/rejected": -443.9873962402344, "loss": 0.1744, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 3.31535267829895, "rewards/margins": 3.9874045848846436, "rewards/rejected": -0.6720519065856934, "step": 390 }, { "epoch": 0.14, "learning_rate": 4.801662259161314e-07, "logits/chosen": -1.4928423166275024, "logits/rejected": -1.0846529006958008, "logps/chosen": -340.31768798828125, "logps/rejected": -675.9088745117188, "loss": 0.158, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 4.019392967224121, "rewards/margins": 5.392711639404297, "rewards/rejected": -1.3733187913894653, "step": 400 }, { "epoch": 0.14, "eval_logits/chosen": -1.462174892425537, "eval_logits/rejected": -1.1299500465393066, "eval_logps/chosen": -385.3669738769531, "eval_logps/rejected": -569.8557739257812, "eval_loss": 0.1673159897327423, "eval_rewards/accuracies": 0.932659924030304, "eval_rewards/chosen": 3.4555790424346924, "eval_rewards/margins": 4.489521503448486, "eval_rewards/rejected": -1.0339421033859253, "eval_runtime": 557.8843, "eval_samples_per_second": 17.029, "eval_steps_per_second": 0.532, "step": 400 }, { "epoch": 0.14, "learning_rate": 4.782772950510011e-07, "logits/chosen": -1.4920897483825684, "logits/rejected": -1.212968111038208, "logps/chosen": -316.6252746582031, "logps/rejected": -558.7969360351562, "loss": 0.165, "rewards/accuracies": 0.9375, "rewards/chosen": 3.692905902862549, "rewards/margins": 4.354551315307617, "rewards/rejected": -0.6616458296775818, "step": 410 }, { "epoch": 0.14, "learning_rate": 4.7638836418587073e-07, "logits/chosen": -1.468379259109497, "logits/rejected": -1.1990084648132324, "logps/chosen": -325.2828369140625, "logps/rejected": -746.1368408203125, "loss": 0.1803, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 3.6396260261535645, "rewards/margins": 4.8296284675598145, "rewards/rejected": -1.19000244140625, "step": 420 }, { "epoch": 0.15, "learning_rate": 4.7449943332074044e-07, "logits/chosen": -1.4748234748840332, "logits/rejected": -1.181004285812378, "logps/chosen": -308.9472961425781, "logps/rejected": -665.6039428710938, "loss": 0.1592, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 3.706333875656128, "rewards/margins": 4.874017715454102, "rewards/rejected": -1.1676843166351318, "step": 430 }, { "epoch": 0.15, "learning_rate": 4.7261050245561014e-07, "logits/chosen": -1.4771819114685059, "logits/rejected": -1.1283105611801147, "logps/chosen": -328.56915283203125, "logps/rejected": -495.9109802246094, "loss": 0.1475, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 3.4302115440368652, "rewards/margins": 4.693282127380371, "rewards/rejected": -1.2630702257156372, "step": 440 }, { "epoch": 0.15, "learning_rate": 4.7072157159047975e-07, "logits/chosen": -1.4672292470932007, "logits/rejected": -1.0770254135131836, "logps/chosen": -366.31182861328125, "logps/rejected": -418.83758544921875, "loss": 0.1714, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 3.2115979194641113, "rewards/margins": 4.826067924499512, "rewards/rejected": -1.6144702434539795, "step": 450 }, { "epoch": 0.16, "learning_rate": 4.6883264072534946e-07, "logits/chosen": -1.4420884847640991, "logits/rejected": -1.096064567565918, "logps/chosen": -405.8441467285156, "logps/rejected": -466.4434509277344, "loss": 0.1394, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 3.97516131401062, "rewards/margins": 5.2190327644348145, "rewards/rejected": -1.2438714504241943, "step": 460 }, { "epoch": 0.16, "learning_rate": 4.6694370986021906e-07, "logits/chosen": -1.4477102756500244, "logits/rejected": -1.1750242710113525, "logps/chosen": -496.8306579589844, "logps/rejected": -317.3291015625, "loss": 0.1444, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 4.208278656005859, "rewards/margins": 5.423037052154541, "rewards/rejected": -1.2147585153579712, "step": 470 }, { "epoch": 0.16, "learning_rate": 4.6505477899508877e-07, "logits/chosen": -1.4701149463653564, "logits/rejected": -1.249976396560669, "logps/chosen": -317.600830078125, "logps/rejected": -649.2884521484375, "loss": 0.1347, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 3.9683494567871094, "rewards/margins": 5.167794227600098, "rewards/rejected": -1.199444055557251, "step": 480 }, { "epoch": 0.17, "learning_rate": 4.631658481299584e-07, "logits/chosen": -1.4915847778320312, "logits/rejected": -1.1582549810409546, "logps/chosen": -343.9900207519531, "logps/rejected": -545.9863891601562, "loss": 0.1168, "rewards/accuracies": 1.0, "rewards/chosen": 4.775851726531982, "rewards/margins": 6.124849796295166, "rewards/rejected": -1.3489978313446045, "step": 490 }, { "epoch": 0.17, "learning_rate": 4.612769172648281e-07, "logits/chosen": -1.4516441822052002, "logits/rejected": -1.1763832569122314, "logps/chosen": -479.01776123046875, "logps/rejected": -355.7424011230469, "loss": 0.1599, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 4.103041172027588, "rewards/margins": 5.349932670593262, "rewards/rejected": -1.2468923330307007, "step": 500 }, { "epoch": 0.17, "eval_logits/chosen": -1.4607428312301636, "eval_logits/rejected": -1.1274610757827759, "eval_logps/chosen": -382.4375915527344, "eval_logps/rejected": -572.8546142578125, "eval_loss": 0.13974203169345856, "eval_rewards/accuracies": 0.9461279511451721, "eval_rewards/chosen": 3.7485170364379883, "eval_rewards/margins": 5.082335472106934, "eval_rewards/rejected": -1.3338183164596558, "eval_runtime": 557.5578, "eval_samples_per_second": 17.039, "eval_steps_per_second": 0.533, "step": 500 }, { "epoch": 0.17, "learning_rate": 4.5938798639969773e-07, "logits/chosen": -1.4614421129226685, "logits/rejected": -1.1661673784255981, "logps/chosen": -432.869384765625, "logps/rejected": -702.9627685546875, "loss": 0.1517, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 3.9662163257598877, "rewards/margins": 5.194244384765625, "rewards/rejected": -1.228027105331421, "step": 510 }, { "epoch": 0.18, "learning_rate": 4.574990555345674e-07, "logits/chosen": -1.4722058773040771, "logits/rejected": -1.1686432361602783, "logps/chosen": -335.47344970703125, "logps/rejected": -595.4827880859375, "loss": 0.1268, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 3.8043179512023926, "rewards/margins": 5.093755722045898, "rewards/rejected": -1.2894370555877686, "step": 520 }, { "epoch": 0.18, "learning_rate": 4.556101246694371e-07, "logits/chosen": -1.4674203395843506, "logits/rejected": -1.1525086164474487, "logps/chosen": -329.02264404296875, "logps/rejected": -652.1644287109375, "loss": 0.1353, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 4.099880218505859, "rewards/margins": 5.421080589294434, "rewards/rejected": -1.3212003707885742, "step": 530 }, { "epoch": 0.18, "learning_rate": 4.5372119380430675e-07, "logits/chosen": -1.4733096361160278, "logits/rejected": -1.134479284286499, "logps/chosen": -315.3797912597656, "logps/rejected": -457.8125915527344, "loss": 0.1457, "rewards/accuracies": 1.0, "rewards/chosen": 3.9675514698028564, "rewards/margins": 5.392933368682861, "rewards/rejected": -1.4253814220428467, "step": 540 }, { "epoch": 0.19, "learning_rate": 4.518322629391764e-07, "logits/chosen": -1.465785264968872, "logits/rejected": -1.1767680644989014, "logps/chosen": -327.9288635253906, "logps/rejected": -518.0633544921875, "loss": 0.1361, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 3.936115264892578, "rewards/margins": 5.388223171234131, "rewards/rejected": -1.45210862159729, "step": 550 }, { "epoch": 0.19, "learning_rate": 4.4994333207404607e-07, "logits/chosen": -1.4351527690887451, "logits/rejected": -1.1543748378753662, "logps/chosen": -454.43292236328125, "logps/rejected": -475.8153381347656, "loss": 0.1295, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 3.992440700531006, "rewards/margins": 5.424699306488037, "rewards/rejected": -1.4322583675384521, "step": 560 }, { "epoch": 0.19, "learning_rate": 4.480544012089157e-07, "logits/chosen": -1.4429913759231567, "logits/rejected": -1.201302409172058, "logps/chosen": -496.41790771484375, "logps/rejected": -365.816650390625, "loss": 0.1039, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 4.107169151306152, "rewards/margins": 5.378964424133301, "rewards/rejected": -1.2717949151992798, "step": 570 }, { "epoch": 0.2, "learning_rate": 4.461654703437854e-07, "logits/chosen": -1.4437055587768555, "logits/rejected": -1.1555430889129639, "logps/chosen": -460.52252197265625, "logps/rejected": -543.8465576171875, "loss": 0.1497, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 4.182587623596191, "rewards/margins": 5.458142280578613, "rewards/rejected": -1.2755542993545532, "step": 580 }, { "epoch": 0.2, "learning_rate": 4.442765394786551e-07, "logits/chosen": -1.4703487157821655, "logits/rejected": -1.1514756679534912, "logps/chosen": -335.76141357421875, "logps/rejected": -425.265380859375, "loss": 0.1179, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 3.938772678375244, "rewards/margins": 5.324645042419434, "rewards/rejected": -1.3858733177185059, "step": 590 }, { "epoch": 0.2, "learning_rate": 4.423876086135247e-07, "logits/chosen": -1.4418364763259888, "logits/rejected": -1.1505249738693237, "logps/chosen": -448.2904357910156, "logps/rejected": -721.1622314453125, "loss": 0.1389, "rewards/accuracies": 0.9375, "rewards/chosen": 3.9244837760925293, "rewards/margins": 5.320973873138428, "rewards/rejected": -1.3964899778366089, "step": 600 }, { "epoch": 0.2, "eval_logits/chosen": -1.4519003629684448, "eval_logits/rejected": -1.1194298267364502, "eval_logps/chosen": -380.6632995605469, "eval_logps/rejected": -574.6277465820312, "eval_loss": 0.12727472186088562, "eval_rewards/accuracies": 0.9528619647026062, "eval_rewards/chosen": 3.9259443283081055, "eval_rewards/margins": 5.437079906463623, "eval_rewards/rejected": -1.5111361742019653, "eval_runtime": 557.5407, "eval_samples_per_second": 17.039, "eval_steps_per_second": 0.533, "step": 600 }, { "epoch": 0.21, "learning_rate": 4.404986777483944e-07, "logits/chosen": -1.462982416152954, "logits/rejected": -1.1688556671142578, "logps/chosen": -368.68914794921875, "logps/rejected": -497.86566162109375, "loss": 0.1364, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 3.3148436546325684, "rewards/margins": 4.890419006347656, "rewards/rejected": -1.5755746364593506, "step": 610 }, { "epoch": 0.21, "learning_rate": 4.3860974688326405e-07, "logits/chosen": -1.4497849941253662, "logits/rejected": -1.1572027206420898, "logps/chosen": -465.70391845703125, "logps/rejected": -631.2728881835938, "loss": 0.1163, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 4.193670272827148, "rewards/margins": 5.658702850341797, "rewards/rejected": -1.465032935142517, "step": 620 }, { "epoch": 0.21, "learning_rate": 4.367208160181337e-07, "logits/chosen": -1.4319039583206177, "logits/rejected": -1.1632667779922485, "logps/chosen": -421.3758239746094, "logps/rejected": -333.9304504394531, "loss": 0.1224, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 3.695667266845703, "rewards/margins": 5.386081218719482, "rewards/rejected": -1.6904138326644897, "step": 630 }, { "epoch": 0.22, "learning_rate": 4.348318851530034e-07, "logits/chosen": -1.4637318849563599, "logits/rejected": -1.095100998878479, "logps/chosen": -396.5002136230469, "logps/rejected": -622.4762573242188, "loss": 0.09, "rewards/accuracies": 1.0, "rewards/chosen": 4.092984199523926, "rewards/margins": 6.124913215637207, "rewards/rejected": -2.0319290161132812, "step": 640 }, { "epoch": 0.22, "learning_rate": 4.32942954287873e-07, "logits/chosen": -1.4509179592132568, "logits/rejected": -1.1328189373016357, "logps/chosen": -371.68572998046875, "logps/rejected": -406.5970458984375, "loss": 0.1163, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 4.031527996063232, "rewards/margins": 5.5390424728393555, "rewards/rejected": -1.5075138807296753, "step": 650 }, { "epoch": 0.22, "learning_rate": 4.3105402342274273e-07, "logits/chosen": -1.4884783029556274, "logits/rejected": -1.1401994228363037, "logps/chosen": -321.93023681640625, "logps/rejected": -570.5530395507812, "loss": 0.1097, "rewards/accuracies": 0.9375, "rewards/chosen": 3.893491744995117, "rewards/margins": 5.886297702789307, "rewards/rejected": -1.9928067922592163, "step": 660 }, { "epoch": 0.23, "learning_rate": 4.2916509255761233e-07, "logits/chosen": -1.45878005027771, "logits/rejected": -1.0974493026733398, "logps/chosen": -378.56256103515625, "logps/rejected": -671.4094848632812, "loss": 0.1419, "rewards/accuracies": 0.9375, "rewards/chosen": 3.538317918777466, "rewards/margins": 5.423024654388428, "rewards/rejected": -1.8847074508666992, "step": 670 }, { "epoch": 0.23, "learning_rate": 4.2727616169248204e-07, "logits/chosen": -1.4535772800445557, "logits/rejected": -1.0777978897094727, "logps/chosen": -347.06304931640625, "logps/rejected": -607.7723999023438, "loss": 0.1015, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 4.180381774902344, "rewards/margins": 6.305299282073975, "rewards/rejected": -2.12491774559021, "step": 680 }, { "epoch": 0.23, "learning_rate": 4.253872308273517e-07, "logits/chosen": -1.4577261209487915, "logits/rejected": -1.1172749996185303, "logps/chosen": -428.004150390625, "logps/rejected": -609.6082763671875, "loss": 0.0902, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 4.4771833419799805, "rewards/margins": 6.247294902801514, "rewards/rejected": -1.770111083984375, "step": 690 }, { "epoch": 0.24, "learning_rate": 4.2349829996222135e-07, "logits/chosen": -1.459695816040039, "logits/rejected": -1.1408016681671143, "logps/chosen": -375.980224609375, "logps/rejected": -637.662109375, "loss": 0.0778, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 4.746489524841309, "rewards/margins": 6.767748832702637, "rewards/rejected": -2.0212595462799072, "step": 700 }, { "epoch": 0.24, "eval_logits/chosen": -1.4541884660720825, "eval_logits/rejected": -1.1302434206008911, "eval_logps/chosen": -379.2232971191406, "eval_logps/rejected": -578.0139770507812, "eval_loss": 0.1122458353638649, "eval_rewards/accuracies": 0.9612794518470764, "eval_rewards/chosen": 4.069947719573975, "eval_rewards/margins": 5.919719219207764, "eval_rewards/rejected": -1.84977126121521, "eval_runtime": 559.0869, "eval_samples_per_second": 16.992, "eval_steps_per_second": 0.531, "step": 700 }, { "epoch": 0.24, "learning_rate": 4.2160936909709106e-07, "logits/chosen": -1.4686052799224854, "logits/rejected": -1.17806077003479, "logps/chosen": -370.8101806640625, "logps/rejected": -445.7115783691406, "loss": 0.1139, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 4.275851249694824, "rewards/margins": 5.940474033355713, "rewards/rejected": -1.6646230220794678, "step": 710 }, { "epoch": 0.24, "learning_rate": 4.1972043823196066e-07, "logits/chosen": -1.4710712432861328, "logits/rejected": -1.2118116617202759, "logps/chosen": -349.842041015625, "logps/rejected": -497.0186462402344, "loss": 0.1405, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 3.897695541381836, "rewards/margins": 6.115738391876221, "rewards/rejected": -2.2180426120758057, "step": 720 }, { "epoch": 0.25, "learning_rate": 4.1783150736683037e-07, "logits/chosen": -1.4504650831222534, "logits/rejected": -1.1481643915176392, "logps/chosen": -421.00927734375, "logps/rejected": -541.6673583984375, "loss": 0.1052, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 4.073805809020996, "rewards/margins": 5.906230926513672, "rewards/rejected": -1.8324254751205444, "step": 730 }, { "epoch": 0.25, "learning_rate": 4.1594257650170003e-07, "logits/chosen": -1.475731611251831, "logits/rejected": -1.1985948085784912, "logps/chosen": -405.5483703613281, "logps/rejected": -488.00091552734375, "loss": 0.1015, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 3.7700603008270264, "rewards/margins": 5.521186351776123, "rewards/rejected": -1.7511262893676758, "step": 740 }, { "epoch": 0.25, "learning_rate": 4.140536456365697e-07, "logits/chosen": -1.4649735689163208, "logits/rejected": -1.115206241607666, "logps/chosen": -314.864501953125, "logps/rejected": -583.0494995117188, "loss": 0.0764, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 4.480077266693115, "rewards/margins": 6.317180633544922, "rewards/rejected": -1.837104082107544, "step": 750 }, { "epoch": 0.26, "learning_rate": 4.1216471477143934e-07, "logits/chosen": -1.4586890935897827, "logits/rejected": -1.1937768459320068, "logps/chosen": -318.05615234375, "logps/rejected": -491.633056640625, "loss": 0.1225, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 3.9885802268981934, "rewards/margins": 5.613485813140869, "rewards/rejected": -1.6249048709869385, "step": 760 }, { "epoch": 0.26, "learning_rate": 4.10275783906309e-07, "logits/chosen": -1.474686861038208, "logits/rejected": -1.2240248918533325, "logps/chosen": -448.8330993652344, "logps/rejected": -574.8685302734375, "loss": 0.079, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 4.1468000411987305, "rewards/margins": 6.423197269439697, "rewards/rejected": -2.276397228240967, "step": 770 }, { "epoch": 0.27, "learning_rate": 4.0838685304117865e-07, "logits/chosen": -1.4716846942901611, "logits/rejected": -1.1474727392196655, "logps/chosen": -458.2037658691406, "logps/rejected": -677.0916137695312, "loss": 0.103, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 4.230536460876465, "rewards/margins": 6.255263805389404, "rewards/rejected": -2.0247273445129395, "step": 780 }, { "epoch": 0.27, "learning_rate": 4.0649792217604836e-07, "logits/chosen": -1.4893128871917725, "logits/rejected": -1.1543127298355103, "logps/chosen": -368.73876953125, "logps/rejected": -385.9571533203125, "loss": 0.0759, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 4.367506504058838, "rewards/margins": 6.192745208740234, "rewards/rejected": -1.825238823890686, "step": 790 }, { "epoch": 0.27, "learning_rate": 4.04608991310918e-07, "logits/chosen": -1.4659887552261353, "logits/rejected": -1.2101812362670898, "logps/chosen": -387.55120849609375, "logps/rejected": -527.9453735351562, "loss": 0.0993, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 4.042902946472168, "rewards/margins": 5.99516487121582, "rewards/rejected": -1.9522621631622314, "step": 800 }, { "epoch": 0.27, "eval_logits/chosen": -1.46894109249115, "eval_logits/rejected": -1.142426609992981, "eval_logps/chosen": -377.5001220703125, "eval_logps/rejected": -579.4506225585938, "eval_loss": 0.09749113768339157, "eval_rewards/accuracies": 0.9663299918174744, "eval_rewards/chosen": 4.2422590255737305, "eval_rewards/margins": 6.235683441162109, "eval_rewards/rejected": -1.9934238195419312, "eval_runtime": 558.57, "eval_samples_per_second": 17.008, "eval_steps_per_second": 0.532, "step": 800 }, { "epoch": 0.28, "learning_rate": 4.0272006044578767e-07, "logits/chosen": -1.4774185419082642, "logits/rejected": -1.185450553894043, "logps/chosen": -389.61981201171875, "logps/rejected": -652.4323120117188, "loss": 0.1036, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 4.061360836029053, "rewards/margins": 6.323044776916504, "rewards/rejected": -2.261683940887451, "step": 810 }, { "epoch": 0.28, "learning_rate": 4.0083112958065733e-07, "logits/chosen": -1.4516403675079346, "logits/rejected": -1.1874592304229736, "logps/chosen": -475.3050231933594, "logps/rejected": -444.3898010253906, "loss": 0.1065, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 3.8749263286590576, "rewards/margins": 5.786238670349121, "rewards/rejected": -1.9113123416900635, "step": 820 }, { "epoch": 0.28, "learning_rate": 3.98942198715527e-07, "logits/chosen": -1.4862116575241089, "logits/rejected": -1.2050374746322632, "logps/chosen": -303.4962463378906, "logps/rejected": -611.756591796875, "loss": 0.1023, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 4.315167427062988, "rewards/margins": 6.290981769561768, "rewards/rejected": -1.9758144617080688, "step": 830 }, { "epoch": 0.29, "learning_rate": 3.970532678503967e-07, "logits/chosen": -1.4739606380462646, "logits/rejected": -1.2146607637405396, "logps/chosen": -395.7440490722656, "logps/rejected": -519.9666748046875, "loss": 0.0939, "rewards/accuracies": 1.0, "rewards/chosen": 5.144864559173584, "rewards/margins": 7.278559684753418, "rewards/rejected": -2.133694648742676, "step": 840 }, { "epoch": 0.29, "learning_rate": 3.951643369852663e-07, "logits/chosen": -1.476678490638733, "logits/rejected": -1.206061601638794, "logps/chosen": -404.0805358886719, "logps/rejected": -790.9165649414062, "loss": 0.0917, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 4.473787307739258, "rewards/margins": 6.4759931564331055, "rewards/rejected": -2.002206325531006, "step": 850 }, { "epoch": 0.29, "learning_rate": 3.93275406120136e-07, "logits/chosen": -1.471995234489441, "logits/rejected": -1.2011922597885132, "logps/chosen": -373.6271057128906, "logps/rejected": -589.4290771484375, "loss": 0.0896, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 4.3988142013549805, "rewards/margins": 6.434880256652832, "rewards/rejected": -2.036065101623535, "step": 860 }, { "epoch": 0.3, "learning_rate": 3.913864752550056e-07, "logits/chosen": -1.4729435443878174, "logits/rejected": -1.2467955350875854, "logps/chosen": -479.82470703125, "logps/rejected": -649.1353759765625, "loss": 0.1121, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 5.313334941864014, "rewards/margins": 7.132607936859131, "rewards/rejected": -1.8192729949951172, "step": 870 }, { "epoch": 0.3, "learning_rate": 3.894975443898753e-07, "logits/chosen": -1.4922538995742798, "logits/rejected": -1.1949760913848877, "logps/chosen": -295.2701721191406, "logps/rejected": -510.95001220703125, "loss": 0.0773, "rewards/accuracies": 1.0, "rewards/chosen": 4.697510719299316, "rewards/margins": 6.993855953216553, "rewards/rejected": -2.2963459491729736, "step": 880 }, { "epoch": 0.3, "learning_rate": 3.87608613524745e-07, "logits/chosen": -1.4829437732696533, "logits/rejected": -1.2089799642562866, "logps/chosen": -367.78387451171875, "logps/rejected": -401.001953125, "loss": 0.1007, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 3.9919090270996094, "rewards/margins": 5.9659037590026855, "rewards/rejected": -1.973995566368103, "step": 890 }, { "epoch": 0.31, "learning_rate": 3.857196826596146e-07, "logits/chosen": -1.5004401206970215, "logits/rejected": -1.0854889154434204, "logps/chosen": -337.4493408203125, "logps/rejected": -477.30963134765625, "loss": 0.111, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 4.4730706214904785, "rewards/margins": 6.716238498687744, "rewards/rejected": -2.2431674003601074, "step": 900 }, { "epoch": 0.31, "eval_logits/chosen": -1.4820351600646973, "eval_logits/rejected": -1.1542390584945679, "eval_logps/chosen": -376.70477294921875, "eval_logps/rejected": -582.0501098632812, "eval_loss": 0.09071440994739532, "eval_rewards/accuracies": 0.9696969985961914, "eval_rewards/chosen": 4.32179594039917, "eval_rewards/margins": 6.575175762176514, "eval_rewards/rejected": -2.253380537033081, "eval_runtime": 559.8508, "eval_samples_per_second": 16.969, "eval_steps_per_second": 0.53, "step": 900 }, { "epoch": 0.31, "learning_rate": 3.8383075179448433e-07, "logits/chosen": -1.48300302028656, "logits/rejected": -1.1969270706176758, "logps/chosen": -444.98114013671875, "logps/rejected": -399.57403564453125, "loss": 0.0892, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 4.185998439788818, "rewards/margins": 6.078363418579102, "rewards/rejected": -1.8923648595809937, "step": 910 }, { "epoch": 0.31, "learning_rate": 3.8194182092935394e-07, "logits/chosen": -1.4898041486740112, "logits/rejected": -1.1662390232086182, "logps/chosen": -322.0852355957031, "logps/rejected": -505.6220703125, "loss": 0.0793, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 4.525553226470947, "rewards/margins": 6.857700347900391, "rewards/rejected": -2.3321471214294434, "step": 920 }, { "epoch": 0.32, "learning_rate": 3.8005289006422365e-07, "logits/chosen": -1.4851583242416382, "logits/rejected": -1.1979453563690186, "logps/chosen": -358.98101806640625, "logps/rejected": -621.0003051757812, "loss": 0.0882, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 4.939785957336426, "rewards/margins": 7.3684186935424805, "rewards/rejected": -2.4286324977874756, "step": 930 }, { "epoch": 0.32, "learning_rate": 3.7816395919909325e-07, "logits/chosen": -1.49127197265625, "logits/rejected": -1.239793062210083, "logps/chosen": -314.9703674316406, "logps/rejected": -568.3617553710938, "loss": 0.0664, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 4.589601993560791, "rewards/margins": 6.650811672210693, "rewards/rejected": -2.0612106323242188, "step": 940 }, { "epoch": 0.32, "learning_rate": 3.7627502833396296e-07, "logits/chosen": -1.4954484701156616, "logits/rejected": -1.247184157371521, "logps/chosen": -381.68499755859375, "logps/rejected": -518.2406005859375, "loss": 0.0898, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 4.539010047912598, "rewards/margins": 6.654293060302734, "rewards/rejected": -2.115283489227295, "step": 950 }, { "epoch": 0.33, "learning_rate": 3.7438609746883267e-07, "logits/chosen": -1.475711464881897, "logits/rejected": -1.2076390981674194, "logps/chosen": -448.14556884765625, "logps/rejected": -554.5950927734375, "loss": 0.0742, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 5.037951946258545, "rewards/margins": 7.1611647605896, "rewards/rejected": -2.1232128143310547, "step": 960 }, { "epoch": 0.33, "learning_rate": 3.7249716660370227e-07, "logits/chosen": -1.4880720376968384, "logits/rejected": -1.206027865409851, "logps/chosen": -303.1774597167969, "logps/rejected": -724.1024169921875, "loss": 0.0858, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 4.292189121246338, "rewards/margins": 6.413214683532715, "rewards/rejected": -2.1210262775421143, "step": 970 }, { "epoch": 0.33, "learning_rate": 3.70608235738572e-07, "logits/chosen": -1.4684410095214844, "logits/rejected": -1.15514075756073, "logps/chosen": -447.8116760253906, "logps/rejected": -666.262939453125, "loss": 0.1078, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 4.545645236968994, "rewards/margins": 7.141517639160156, "rewards/rejected": -2.595871925354004, "step": 980 }, { "epoch": 0.34, "learning_rate": 3.687193048734416e-07, "logits/chosen": -1.4797093868255615, "logits/rejected": -1.1443145275115967, "logps/chosen": -302.0823059082031, "logps/rejected": -653.0443115234375, "loss": 0.0951, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 4.940204620361328, "rewards/margins": 7.737614631652832, "rewards/rejected": -2.797410011291504, "step": 990 }, { "epoch": 0.34, "learning_rate": 3.668303740083113e-07, "logits/chosen": -1.483666181564331, "logits/rejected": -1.183774709701538, "logps/chosen": -401.68359375, "logps/rejected": -488.19451904296875, "loss": 0.0893, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 4.864621639251709, "rewards/margins": 7.051810264587402, "rewards/rejected": -2.187187671661377, "step": 1000 }, { "epoch": 0.34, "eval_logits/chosen": -1.469427227973938, "eval_logits/rejected": -1.14968740940094, "eval_logps/chosen": -376.04510498046875, "eval_logps/rejected": -582.1046752929688, "eval_loss": 0.0881563276052475, "eval_rewards/accuracies": 0.9663299918174744, "eval_rewards/chosen": 4.387765407562256, "eval_rewards/margins": 6.646595001220703, "eval_rewards/rejected": -2.2588300704956055, "eval_runtime": 559.0589, "eval_samples_per_second": 16.993, "eval_steps_per_second": 0.531, "step": 1000 }, { "epoch": 0.34, "learning_rate": 3.6494144314318094e-07, "logits/chosen": -1.4677711725234985, "logits/rejected": -1.22615647315979, "logps/chosen": -408.8759765625, "logps/rejected": -469.7860412597656, "loss": 0.1257, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 4.661646842956543, "rewards/margins": 6.4328932762146, "rewards/rejected": -1.7712465524673462, "step": 1010 }, { "epoch": 0.35, "learning_rate": 3.630525122780506e-07, "logits/chosen": -1.497859239578247, "logits/rejected": -1.1968727111816406, "logps/chosen": -288.6357116699219, "logps/rejected": -523.6803588867188, "loss": 0.103, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 4.572832107543945, "rewards/margins": 6.243821620941162, "rewards/rejected": -1.670989990234375, "step": 1020 }, { "epoch": 0.35, "learning_rate": 3.6116358141292026e-07, "logits/chosen": -1.4879519939422607, "logits/rejected": -1.2336044311523438, "logps/chosen": -303.6993103027344, "logps/rejected": -508.20123291015625, "loss": 0.128, "rewards/accuracies": 0.9375, "rewards/chosen": 4.822319030761719, "rewards/margins": 6.814971923828125, "rewards/rejected": -1.9926522970199585, "step": 1030 }, { "epoch": 0.35, "learning_rate": 3.592746505477899e-07, "logits/chosen": -1.4755656719207764, "logits/rejected": -1.2433079481124878, "logps/chosen": -495.46337890625, "logps/rejected": -697.58740234375, "loss": 0.0889, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 4.0532355308532715, "rewards/margins": 6.689316749572754, "rewards/rejected": -2.636080503463745, "step": 1040 }, { "epoch": 0.36, "learning_rate": 3.573857196826596e-07, "logits/chosen": -1.512407660484314, "logits/rejected": -1.203151822090149, "logps/chosen": -480.6717224121094, "logps/rejected": -505.75830078125, "loss": 0.0971, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 4.397000312805176, "rewards/margins": 6.988126277923584, "rewards/rejected": -2.591125726699829, "step": 1050 }, { "epoch": 0.36, "learning_rate": 3.554967888175293e-07, "logits/chosen": -1.5000110864639282, "logits/rejected": -1.1538686752319336, "logps/chosen": -380.41741943359375, "logps/rejected": -551.3828735351562, "loss": 0.1111, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 4.297232151031494, "rewards/margins": 6.739757537841797, "rewards/rejected": -2.4425251483917236, "step": 1060 }, { "epoch": 0.36, "learning_rate": 3.5360785795239893e-07, "logits/chosen": -1.4952175617218018, "logits/rejected": -1.2412437200546265, "logps/chosen": -433.78424072265625, "logps/rejected": -458.8251953125, "loss": 0.0803, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 4.398755073547363, "rewards/margins": 6.876921653747559, "rewards/rejected": -2.478165864944458, "step": 1070 }, { "epoch": 0.37, "learning_rate": 3.517189270872686e-07, "logits/chosen": -1.503846526145935, "logits/rejected": -1.2310945987701416, "logps/chosen": -378.23150634765625, "logps/rejected": -460.6910705566406, "loss": 0.0755, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 4.319530487060547, "rewards/margins": 6.819169521331787, "rewards/rejected": -2.4996395111083984, "step": 1080 }, { "epoch": 0.37, "learning_rate": 3.4982999622213824e-07, "logits/chosen": -1.489429235458374, "logits/rejected": -1.194059133529663, "logps/chosen": -378.36224365234375, "logps/rejected": -339.7525329589844, "loss": 0.0693, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 4.176763534545898, "rewards/margins": 6.276023864746094, "rewards/rejected": -2.099259853363037, "step": 1090 }, { "epoch": 0.37, "learning_rate": 3.479410653570079e-07, "logits/chosen": -1.4815757274627686, "logits/rejected": -1.1958659887313843, "logps/chosen": -382.04718017578125, "logps/rejected": -590.7028198242188, "loss": 0.079, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 4.512447834014893, "rewards/margins": 7.3736677169799805, "rewards/rejected": -2.861220121383667, "step": 1100 }, { "epoch": 0.37, "eval_logits/chosen": -1.4807450771331787, "eval_logits/rejected": -1.153213620185852, "eval_logps/chosen": -375.21636962890625, "eval_logps/rejected": -582.6480712890625, "eval_loss": 0.08400283753871918, "eval_rewards/accuracies": 0.9688552021980286, "eval_rewards/chosen": 4.470638751983643, "eval_rewards/margins": 6.783812046051025, "eval_rewards/rejected": -2.313173294067383, "eval_runtime": 560.0503, "eval_samples_per_second": 16.963, "eval_steps_per_second": 0.53, "step": 1100 }, { "epoch": 0.38, "learning_rate": 3.460521344918776e-07, "logits/chosen": -1.5088837146759033, "logits/rejected": -1.1168277263641357, "logps/chosen": -275.2812194824219, "logps/rejected": -645.93701171875, "loss": 0.102, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 4.146356105804443, "rewards/margins": 6.36793327331543, "rewards/rejected": -2.2215771675109863, "step": 1110 }, { "epoch": 0.38, "learning_rate": 3.441632036267472e-07, "logits/chosen": -1.50923752784729, "logits/rejected": -1.1946974992752075, "logps/chosen": -330.641357421875, "logps/rejected": -577.8738403320312, "loss": 0.0676, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 4.274444580078125, "rewards/margins": 6.753907680511475, "rewards/rejected": -2.4794628620147705, "step": 1120 }, { "epoch": 0.38, "learning_rate": 3.422742727616169e-07, "logits/chosen": -1.4846798181533813, "logits/rejected": -1.1590081453323364, "logps/chosen": -407.13201904296875, "logps/rejected": -393.16961669921875, "loss": 0.0598, "rewards/accuracies": 1.0, "rewards/chosen": 4.086771011352539, "rewards/margins": 6.169893741607666, "rewards/rejected": -2.0831220149993896, "step": 1130 }, { "epoch": 0.39, "learning_rate": 3.403853418964866e-07, "logits/chosen": -1.4728076457977295, "logits/rejected": -1.1583011150360107, "logps/chosen": -379.17791748046875, "logps/rejected": -501.658935546875, "loss": 0.0813, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 4.588146209716797, "rewards/margins": 7.1952104568481445, "rewards/rejected": -2.6070632934570312, "step": 1140 }, { "epoch": 0.39, "learning_rate": 3.3849641103135623e-07, "logits/chosen": -1.4747841358184814, "logits/rejected": -1.1721833944320679, "logps/chosen": -417.3246154785156, "logps/rejected": -365.9868469238281, "loss": 0.0597, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 5.068565845489502, "rewards/margins": 7.486462593078613, "rewards/rejected": -2.4178969860076904, "step": 1150 }, { "epoch": 0.39, "learning_rate": 3.3660748016622594e-07, "logits/chosen": -1.4949634075164795, "logits/rejected": -1.1580262184143066, "logps/chosen": -463.01165771484375, "logps/rejected": -490.30926513671875, "loss": 0.0793, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 4.0085272789001465, "rewards/margins": 6.497877597808838, "rewards/rejected": -2.489349603652954, "step": 1160 }, { "epoch": 0.4, "learning_rate": 3.3471854930109554e-07, "logits/chosen": -1.4875307083129883, "logits/rejected": -1.1987477540969849, "logps/chosen": -300.71185302734375, "logps/rejected": -572.1688232421875, "loss": 0.0727, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 4.1474151611328125, "rewards/margins": 6.675169467926025, "rewards/rejected": -2.527754306793213, "step": 1170 }, { "epoch": 0.4, "learning_rate": 3.3282961843596525e-07, "logits/chosen": -1.4851741790771484, "logits/rejected": -1.1590709686279297, "logps/chosen": -357.76080322265625, "logps/rejected": -723.2069091796875, "loss": 0.0862, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 4.243594169616699, "rewards/margins": 6.315056800842285, "rewards/rejected": -2.071462392807007, "step": 1180 }, { "epoch": 0.4, "learning_rate": 3.3094068757083485e-07, "logits/chosen": -1.4589731693267822, "logits/rejected": -1.124894142150879, "logps/chosen": -395.55999755859375, "logps/rejected": -413.896240234375, "loss": 0.095, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 4.135136127471924, "rewards/margins": 6.630227565765381, "rewards/rejected": -2.495091676712036, "step": 1190 }, { "epoch": 0.41, "learning_rate": 3.2905175670570456e-07, "logits/chosen": -1.494888186454773, "logits/rejected": -1.1850342750549316, "logps/chosen": -464.22430419921875, "logps/rejected": -514.73486328125, "loss": 0.0706, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 3.990201234817505, "rewards/margins": 6.323441028594971, "rewards/rejected": -2.333240032196045, "step": 1200 }, { "epoch": 0.41, "eval_logits/chosen": -1.4885011911392212, "eval_logits/rejected": -1.1666902303695679, "eval_logps/chosen": -375.603759765625, "eval_logps/rejected": -586.021728515625, "eval_loss": 0.07206810265779495, "eval_rewards/accuracies": 0.9722222089767456, "eval_rewards/chosen": 4.431900501251221, "eval_rewards/margins": 7.082433223724365, "eval_rewards/rejected": -2.6505327224731445, "eval_runtime": 560.4254, "eval_samples_per_second": 16.951, "eval_steps_per_second": 0.53, "step": 1200 }, { "epoch": 0.41, "learning_rate": 3.271628258405742e-07, "logits/chosen": -1.5175247192382812, "logits/rejected": -1.1132011413574219, "logps/chosen": -382.6025085449219, "logps/rejected": -333.1087646484375, "loss": 0.0749, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 4.4511542320251465, "rewards/margins": 6.978426456451416, "rewards/rejected": -2.5272724628448486, "step": 1210 }, { "epoch": 0.41, "learning_rate": 3.252738949754439e-07, "logits/chosen": -1.4874627590179443, "logits/rejected": -1.1758732795715332, "logps/chosen": -365.48291015625, "logps/rejected": -641.5902099609375, "loss": 0.0659, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 3.6857669353485107, "rewards/margins": 6.348451614379883, "rewards/rejected": -2.662684679031372, "step": 1220 }, { "epoch": 0.42, "learning_rate": 3.233849641103136e-07, "logits/chosen": -1.4980213642120361, "logits/rejected": -1.2111032009124756, "logps/chosen": -301.4989318847656, "logps/rejected": -837.3327026367188, "loss": 0.0621, "rewards/accuracies": 1.0, "rewards/chosen": 4.749375820159912, "rewards/margins": 7.8252434730529785, "rewards/rejected": -3.0758676528930664, "step": 1230 }, { "epoch": 0.42, "learning_rate": 3.214960332451832e-07, "logits/chosen": -1.4798014163970947, "logits/rejected": -1.184417963027954, "logps/chosen": -396.10888671875, "logps/rejected": -611.9696044921875, "loss": 0.0912, "rewards/accuracies": 0.9375, "rewards/chosen": 4.219520568847656, "rewards/margins": 7.1679277420043945, "rewards/rejected": -2.9484081268310547, "step": 1240 }, { "epoch": 0.42, "learning_rate": 3.196071023800529e-07, "logits/chosen": -1.5121700763702393, "logits/rejected": -1.2249577045440674, "logps/chosen": -387.7380065917969, "logps/rejected": -794.2557373046875, "loss": 0.0843, "rewards/accuracies": 0.9375, "rewards/chosen": 4.096358776092529, "rewards/margins": 6.268472194671631, "rewards/rejected": -2.1721131801605225, "step": 1250 }, { "epoch": 0.43, "learning_rate": 3.1771817151492255e-07, "logits/chosen": -1.4872616529464722, "logits/rejected": -1.2021340131759644, "logps/chosen": -396.11920166015625, "logps/rejected": -725.071533203125, "loss": 0.0682, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 4.439419269561768, "rewards/margins": 7.2906174659729, "rewards/rejected": -2.851198673248291, "step": 1260 }, { "epoch": 0.43, "learning_rate": 3.158292406497922e-07, "logits/chosen": -1.4843103885650635, "logits/rejected": -1.1521885395050049, "logps/chosen": -430.59686279296875, "logps/rejected": -623.4092407226562, "loss": 0.065, "rewards/accuracies": 1.0, "rewards/chosen": 4.504824638366699, "rewards/margins": 7.555941581726074, "rewards/rejected": -3.0511183738708496, "step": 1270 }, { "epoch": 0.44, "learning_rate": 3.1394030978466186e-07, "logits/chosen": -1.5158779621124268, "logits/rejected": -1.161768913269043, "logps/chosen": -340.71282958984375, "logps/rejected": -436.19873046875, "loss": 0.0557, "rewards/accuracies": 1.0, "rewards/chosen": 5.091620445251465, "rewards/margins": 7.653326511383057, "rewards/rejected": -2.5617051124572754, "step": 1280 }, { "epoch": 0.44, "learning_rate": 3.120513789195315e-07, "logits/chosen": -1.5111273527145386, "logits/rejected": -1.1369271278381348, "logps/chosen": -313.18426513671875, "logps/rejected": -592.9158935546875, "loss": 0.0644, "rewards/accuracies": 1.0, "rewards/chosen": 4.0614423751831055, "rewards/margins": 7.267691612243652, "rewards/rejected": -3.2062485218048096, "step": 1290 }, { "epoch": 0.44, "learning_rate": 3.1016244805440117e-07, "logits/chosen": -1.478244423866272, "logits/rejected": -1.2829620838165283, "logps/chosen": -377.1938781738281, "logps/rejected": -613.537841796875, "loss": 0.0705, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 3.685913562774658, "rewards/margins": 6.318792819976807, "rewards/rejected": -2.6328797340393066, "step": 1300 }, { "epoch": 0.44, "eval_logits/chosen": -1.5001074075698853, "eval_logits/rejected": -1.181748628616333, "eval_logps/chosen": -376.17987060546875, "eval_logps/rejected": -588.2330322265625, "eval_loss": 0.07252340018749237, "eval_rewards/accuracies": 0.9739057421684265, "eval_rewards/chosen": 4.374290466308594, "eval_rewards/margins": 7.245957374572754, "eval_rewards/rejected": -2.87166690826416, "eval_runtime": 559.1781, "eval_samples_per_second": 16.989, "eval_steps_per_second": 0.531, "step": 1300 }, { "epoch": 0.45, "learning_rate": 3.082735171892709e-07, "logits/chosen": -1.5192620754241943, "logits/rejected": -1.2069748640060425, "logps/chosen": -312.7062072753906, "logps/rejected": -482.1133728027344, "loss": 0.0863, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 4.136082649230957, "rewards/margins": 6.253493309020996, "rewards/rejected": -2.117410182952881, "step": 1310 }, { "epoch": 0.45, "learning_rate": 3.0638458632414054e-07, "logits/chosen": -1.51072096824646, "logits/rejected": -1.2908846139907837, "logps/chosen": -370.267333984375, "logps/rejected": -700.0077514648438, "loss": 0.0773, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 4.160948276519775, "rewards/margins": 7.014911651611328, "rewards/rejected": -2.8539633750915527, "step": 1320 }, { "epoch": 0.45, "learning_rate": 3.044956554590102e-07, "logits/chosen": -1.4973653554916382, "logits/rejected": -1.1667084693908691, "logps/chosen": -367.26861572265625, "logps/rejected": -421.83868408203125, "loss": 0.0446, "rewards/accuracies": 1.0, "rewards/chosen": 3.954542875289917, "rewards/margins": 6.594731330871582, "rewards/rejected": -2.6401877403259277, "step": 1330 }, { "epoch": 0.46, "learning_rate": 3.0260672459387985e-07, "logits/chosen": -1.5250272750854492, "logits/rejected": -1.1743382215499878, "logps/chosen": -298.74359130859375, "logps/rejected": -514.9177856445312, "loss": 0.0598, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 4.985522270202637, "rewards/margins": 7.792318820953369, "rewards/rejected": -2.806795597076416, "step": 1340 }, { "epoch": 0.46, "learning_rate": 3.007177937287495e-07, "logits/chosen": -1.509218454360962, "logits/rejected": -1.1687209606170654, "logps/chosen": -313.0664978027344, "logps/rejected": -652.6644897460938, "loss": 0.0348, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 4.642588138580322, "rewards/margins": 7.6289262771606445, "rewards/rejected": -2.9863381385803223, "step": 1350 }, { "epoch": 0.46, "learning_rate": 2.988288628636192e-07, "logits/chosen": -1.5087230205535889, "logits/rejected": -1.2443337440490723, "logps/chosen": -378.0022888183594, "logps/rejected": -524.3590087890625, "loss": 0.0976, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 4.988725662231445, "rewards/margins": 7.9680657386779785, "rewards/rejected": -2.979340076446533, "step": 1360 }, { "epoch": 0.47, "learning_rate": 2.969399319984888e-07, "logits/chosen": -1.4828989505767822, "logits/rejected": -1.1986699104309082, "logps/chosen": -329.68743896484375, "logps/rejected": -764.1326293945312, "loss": 0.06, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 3.3440890312194824, "rewards/margins": 6.329494476318359, "rewards/rejected": -2.985405445098877, "step": 1370 }, { "epoch": 0.47, "learning_rate": 2.950510011333585e-07, "logits/chosen": -1.5152653455734253, "logits/rejected": -1.1745421886444092, "logps/chosen": -352.6541748046875, "logps/rejected": -632.8907470703125, "loss": 0.0712, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 4.7109479904174805, "rewards/margins": 7.576220512390137, "rewards/rejected": -2.8652729988098145, "step": 1380 }, { "epoch": 0.47, "learning_rate": 2.9316207026822813e-07, "logits/chosen": -1.523559808731079, "logits/rejected": -1.219855546951294, "logps/chosen": -350.2402648925781, "logps/rejected": -644.3220825195312, "loss": 0.0555, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 4.640657901763916, "rewards/margins": 7.175803184509277, "rewards/rejected": -2.5351455211639404, "step": 1390 }, { "epoch": 0.48, "learning_rate": 2.9127313940309784e-07, "logits/chosen": -1.5056852102279663, "logits/rejected": -1.171008586883545, "logps/chosen": -290.78802490234375, "logps/rejected": -539.4659423828125, "loss": 0.0537, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 4.571245193481445, "rewards/margins": 8.044679641723633, "rewards/rejected": -3.4734344482421875, "step": 1400 }, { "epoch": 0.48, "eval_logits/chosen": -1.5018603801727295, "eval_logits/rejected": -1.1789315938949585, "eval_logps/chosen": -376.07598876953125, "eval_logps/rejected": -589.1927490234375, "eval_loss": 0.06479610502719879, "eval_rewards/accuracies": 0.9755892157554626, "eval_rewards/chosen": 4.384680271148682, "eval_rewards/margins": 7.3523173332214355, "eval_rewards/rejected": -2.967637062072754, "eval_runtime": 560.4699, "eval_samples_per_second": 16.95, "eval_steps_per_second": 0.53, "step": 1400 }, { "epoch": 0.48, "learning_rate": 2.8938420853796754e-07, "logits/chosen": -1.5160057544708252, "logits/rejected": -1.1476496458053589, "logps/chosen": -411.9048767089844, "logps/rejected": -589.949951171875, "loss": 0.0359, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 4.407751560211182, "rewards/margins": 7.413491725921631, "rewards/rejected": -3.0057406425476074, "step": 1410 }, { "epoch": 0.48, "learning_rate": 2.8749527767283715e-07, "logits/chosen": -1.4947352409362793, "logits/rejected": -1.2628874778747559, "logps/chosen": -390.4944152832031, "logps/rejected": -644.2883911132812, "loss": 0.0619, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 4.163218021392822, "rewards/margins": 7.030184268951416, "rewards/rejected": -2.866966485977173, "step": 1420 }, { "epoch": 0.49, "learning_rate": 2.8560634680770686e-07, "logits/chosen": -1.4881634712219238, "logits/rejected": -1.2398184537887573, "logps/chosen": -369.6822814941406, "logps/rejected": -579.9475708007812, "loss": 0.0704, "rewards/accuracies": 1.0, "rewards/chosen": 4.736443996429443, "rewards/margins": 7.590858459472656, "rewards/rejected": -2.854414463043213, "step": 1430 }, { "epoch": 0.49, "learning_rate": 2.8371741594257646e-07, "logits/chosen": -1.4975007772445679, "logits/rejected": -1.2246668338775635, "logps/chosen": -476.669677734375, "logps/rejected": -479.30108642578125, "loss": 0.0951, "rewards/accuracies": 1.0, "rewards/chosen": 4.004213809967041, "rewards/margins": 6.466128349304199, "rewards/rejected": -2.4619147777557373, "step": 1440 }, { "epoch": 0.49, "learning_rate": 2.8182848507744617e-07, "logits/chosen": -1.516287088394165, "logits/rejected": -1.202371597290039, "logps/chosen": -321.565673828125, "logps/rejected": -408.3916015625, "loss": 0.0573, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 4.261721611022949, "rewards/margins": 7.2457404136657715, "rewards/rejected": -2.9840192794799805, "step": 1450 }, { "epoch": 0.5, "learning_rate": 2.799395542123158e-07, "logits/chosen": -1.4922573566436768, "logits/rejected": -1.2690476179122925, "logps/chosen": -453.80413818359375, "logps/rejected": -637.1600952148438, "loss": 0.0575, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 4.037074089050293, "rewards/margins": 6.629528045654297, "rewards/rejected": -2.592454433441162, "step": 1460 }, { "epoch": 0.5, "learning_rate": 2.780506233471855e-07, "logits/chosen": -1.5398370027542114, "logits/rejected": -1.190582513809204, "logps/chosen": -303.58465576171875, "logps/rejected": -457.27142333984375, "loss": 0.0475, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 3.979724884033203, "rewards/margins": 6.8525190353393555, "rewards/rejected": -2.8727943897247314, "step": 1470 }, { "epoch": 0.5, "learning_rate": 2.7616169248205513e-07, "logits/chosen": -1.480398178100586, "logits/rejected": -1.1202542781829834, "logps/chosen": -291.8365478515625, "logps/rejected": -525.9415893554688, "loss": 0.0765, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 3.7548556327819824, "rewards/margins": 6.797545433044434, "rewards/rejected": -3.042689800262451, "step": 1480 }, { "epoch": 0.51, "learning_rate": 2.742727616169248e-07, "logits/chosen": -1.5058258771896362, "logits/rejected": -1.2445354461669922, "logps/chosen": -349.40081787109375, "logps/rejected": -353.338623046875, "loss": 0.0673, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 4.481741428375244, "rewards/margins": 7.538400173187256, "rewards/rejected": -3.056657552719116, "step": 1490 }, { "epoch": 0.51, "learning_rate": 2.723838307517945e-07, "logits/chosen": -1.5099804401397705, "logits/rejected": -1.227176308631897, "logps/chosen": -449.3966369628906, "logps/rejected": -564.8681030273438, "loss": 0.0483, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 4.375594615936279, "rewards/margins": 7.313169956207275, "rewards/rejected": -2.937574863433838, "step": 1500 }, { "epoch": 0.51, "eval_logits/chosen": -1.5114119052886963, "eval_logits/rejected": -1.1923363208770752, "eval_logps/chosen": -376.16131591796875, "eval_logps/rejected": -591.8114013671875, "eval_loss": 0.060400474816560745, "eval_rewards/accuracies": 0.9797979593276978, "eval_rewards/chosen": 4.3761420249938965, "eval_rewards/margins": 7.605640411376953, "eval_rewards/rejected": -3.2294986248016357, "eval_runtime": 560.6153, "eval_samples_per_second": 16.946, "eval_steps_per_second": 0.53, "step": 1500 }, { "epoch": 0.51, "learning_rate": 2.7049489988666416e-07, "logits/chosen": -1.5114130973815918, "logits/rejected": -1.2126189470291138, "logps/chosen": -430.208984375, "logps/rejected": -501.6602478027344, "loss": 0.075, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 4.029915809631348, "rewards/margins": 7.003431797027588, "rewards/rejected": -2.973515748977661, "step": 1510 }, { "epoch": 0.52, "learning_rate": 2.686059690215338e-07, "logits/chosen": -1.4975926876068115, "logits/rejected": -1.231730580329895, "logps/chosen": -384.80133056640625, "logps/rejected": -646.3175048828125, "loss": 0.0883, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 4.063393592834473, "rewards/margins": 7.050488471984863, "rewards/rejected": -2.9870944023132324, "step": 1520 }, { "epoch": 0.52, "learning_rate": 2.6671703815640347e-07, "logits/chosen": -1.5093035697937012, "logits/rejected": -1.1693612337112427, "logps/chosen": -357.9332580566406, "logps/rejected": -419.8724060058594, "loss": 0.0435, "rewards/accuracies": 1.0, "rewards/chosen": 3.968228816986084, "rewards/margins": 7.172101020812988, "rewards/rejected": -3.2038722038269043, "step": 1530 }, { "epoch": 0.52, "learning_rate": 2.648281072912731e-07, "logits/chosen": -1.5199733972549438, "logits/rejected": -1.2490711212158203, "logps/chosen": -319.37335205078125, "logps/rejected": -703.9043579101562, "loss": 0.0709, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 4.484343528747559, "rewards/margins": 6.947661399841309, "rewards/rejected": -2.46331787109375, "step": 1540 }, { "epoch": 0.53, "learning_rate": 2.629391764261428e-07, "logits/chosen": -1.4934265613555908, "logits/rejected": -1.2080678939819336, "logps/chosen": -397.90325927734375, "logps/rejected": -477.54681396484375, "loss": 0.069, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 3.9346039295196533, "rewards/margins": 7.143618583679199, "rewards/rejected": -3.2090160846710205, "step": 1550 }, { "epoch": 0.53, "learning_rate": 2.610502455610125e-07, "logits/chosen": -1.4791805744171143, "logits/rejected": -1.201578140258789, "logps/chosen": -478.21563720703125, "logps/rejected": -653.1609497070312, "loss": 0.0623, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 3.886442184448242, "rewards/margins": 6.926022529602051, "rewards/rejected": -3.0395796298980713, "step": 1560 }, { "epoch": 0.53, "learning_rate": 2.591613146958821e-07, "logits/chosen": -1.5138906240463257, "logits/rejected": -1.243032455444336, "logps/chosen": -350.7288818359375, "logps/rejected": -627.7431640625, "loss": 0.0433, "rewards/accuracies": 1.0, "rewards/chosen": 4.634993076324463, "rewards/margins": 7.957524299621582, "rewards/rejected": -3.3225319385528564, "step": 1570 }, { "epoch": 0.54, "learning_rate": 2.572723838307518e-07, "logits/chosen": -1.5238749980926514, "logits/rejected": -1.1785722970962524, "logps/chosen": -377.65045166015625, "logps/rejected": -531.2706298828125, "loss": 0.0716, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 4.06870698928833, "rewards/margins": 7.24503231048584, "rewards/rejected": -3.176325798034668, "step": 1580 }, { "epoch": 0.54, "learning_rate": 2.5538345296562145e-07, "logits/chosen": -1.5166254043579102, "logits/rejected": -1.1256914138793945, "logps/chosen": -375.782470703125, "logps/rejected": -465.26287841796875, "loss": 0.0531, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 4.288039684295654, "rewards/margins": 7.8462958335876465, "rewards/rejected": -3.5582566261291504, "step": 1590 }, { "epoch": 0.54, "learning_rate": 2.534945221004911e-07, "logits/chosen": -1.4953352212905884, "logits/rejected": -1.2030283212661743, "logps/chosen": -529.4041748046875, "logps/rejected": -491.87384033203125, "loss": 0.0572, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 4.736111164093018, "rewards/margins": 7.682862281799316, "rewards/rejected": -2.946751356124878, "step": 1600 }, { "epoch": 0.54, "eval_logits/chosen": -1.504213571548462, "eval_logits/rejected": -1.185482144355774, "eval_logps/chosen": -376.66448974609375, "eval_logps/rejected": -592.157470703125, "eval_loss": 0.05805225297808647, "eval_rewards/accuracies": 0.9772727489471436, "eval_rewards/chosen": 4.325828552246094, "eval_rewards/margins": 7.589939117431641, "eval_rewards/rejected": -3.2641103267669678, "eval_runtime": 560.9875, "eval_samples_per_second": 16.934, "eval_steps_per_second": 0.529, "step": 1600 }, { "epoch": 0.55, "learning_rate": 2.516055912353608e-07, "logits/chosen": -1.5023882389068604, "logits/rejected": -1.2200844287872314, "logps/chosen": -385.99200439453125, "logps/rejected": -679.5035400390625, "loss": 0.0551, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 4.191043376922607, "rewards/margins": 7.510348320007324, "rewards/rejected": -3.3193047046661377, "step": 1610 }, { "epoch": 0.55, "learning_rate": 2.497166603702304e-07, "logits/chosen": -1.5069820880889893, "logits/rejected": -1.2421448230743408, "logps/chosen": -367.49481201171875, "logps/rejected": -656.8528442382812, "loss": 0.0836, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 4.26437520980835, "rewards/margins": 7.1600213050842285, "rewards/rejected": -2.895646572113037, "step": 1620 }, { "epoch": 0.55, "learning_rate": 2.4782772950510013e-07, "logits/chosen": -1.516225814819336, "logits/rejected": -1.1817419528961182, "logps/chosen": -397.768798828125, "logps/rejected": -495.23443603515625, "loss": 0.0624, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 4.2399187088012695, "rewards/margins": 7.541648864746094, "rewards/rejected": -3.3017311096191406, "step": 1630 }, { "epoch": 0.56, "learning_rate": 2.459387986399698e-07, "logits/chosen": -1.517730474472046, "logits/rejected": -1.2045361995697021, "logps/chosen": -356.7253112792969, "logps/rejected": -508.641357421875, "loss": 0.0592, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 4.046034812927246, "rewards/margins": 7.333725929260254, "rewards/rejected": -3.2876906394958496, "step": 1640 }, { "epoch": 0.56, "learning_rate": 2.4404986777483944e-07, "logits/chosen": -1.497016191482544, "logits/rejected": -1.227853536605835, "logps/chosen": -446.447265625, "logps/rejected": -458.6922912597656, "loss": 0.0625, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 3.6903839111328125, "rewards/margins": 6.824693202972412, "rewards/rejected": -3.1343090534210205, "step": 1650 }, { "epoch": 0.56, "learning_rate": 2.421609369097091e-07, "logits/chosen": -1.514736533164978, "logits/rejected": -1.1953274011611938, "logps/chosen": -395.9438781738281, "logps/rejected": -519.7674560546875, "loss": 0.0504, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 4.116658687591553, "rewards/margins": 7.169915199279785, "rewards/rejected": -3.0532562732696533, "step": 1660 }, { "epoch": 0.57, "learning_rate": 2.4027200604457875e-07, "logits/chosen": -1.5058282613754272, "logits/rejected": -1.2581437826156616, "logps/chosen": -366.08233642578125, "logps/rejected": -546.3473510742188, "loss": 0.0501, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 4.698902130126953, "rewards/margins": 7.672143459320068, "rewards/rejected": -2.973240852355957, "step": 1670 }, { "epoch": 0.57, "learning_rate": 2.383830751794484e-07, "logits/chosen": -1.4994776248931885, "logits/rejected": -1.184206485748291, "logps/chosen": -481.41680908203125, "logps/rejected": -767.9908447265625, "loss": 0.0468, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 4.650491714477539, "rewards/margins": 8.127163887023926, "rewards/rejected": -3.4766716957092285, "step": 1680 }, { "epoch": 0.57, "learning_rate": 2.364941443143181e-07, "logits/chosen": -1.531582236289978, "logits/rejected": -1.2031329870224, "logps/chosen": -321.4193420410156, "logps/rejected": -499.52520751953125, "loss": 0.0414, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 4.110111236572266, "rewards/margins": 7.465426445007324, "rewards/rejected": -3.355315685272217, "step": 1690 }, { "epoch": 0.58, "learning_rate": 2.3460521344918775e-07, "logits/chosen": -1.5217511653900146, "logits/rejected": -1.2462084293365479, "logps/chosen": -344.8479309082031, "logps/rejected": -528.7171020507812, "loss": 0.066, "rewards/accuracies": 1.0, "rewards/chosen": 4.58670711517334, "rewards/margins": 7.7972092628479, "rewards/rejected": -3.2105019092559814, "step": 1700 }, { "epoch": 0.58, "eval_logits/chosen": -1.5110249519348145, "eval_logits/rejected": -1.1886183023452759, "eval_logps/chosen": -376.65234375, "eval_logps/rejected": -593.3289184570312, "eval_loss": 0.05385367199778557, "eval_rewards/accuracies": 0.9814814925193787, "eval_rewards/chosen": 4.327041149139404, "eval_rewards/margins": 7.708298683166504, "eval_rewards/rejected": -3.3812568187713623, "eval_runtime": 560.6648, "eval_samples_per_second": 16.944, "eval_steps_per_second": 0.53, "step": 1700 }, { "epoch": 0.58, "learning_rate": 2.327162825840574e-07, "logits/chosen": -1.5017929077148438, "logits/rejected": -1.2263991832733154, "logps/chosen": -440.38330078125, "logps/rejected": -768.6172485351562, "loss": 0.0801, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 4.483176231384277, "rewards/margins": 7.676694393157959, "rewards/rejected": -3.1935179233551025, "step": 1710 }, { "epoch": 0.58, "learning_rate": 2.3082735171892708e-07, "logits/chosen": -1.4979521036148071, "logits/rejected": -1.2336069345474243, "logps/chosen": -368.3187255859375, "logps/rejected": -962.7932739257812, "loss": 0.0557, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 4.179459095001221, "rewards/margins": 7.952836036682129, "rewards/rejected": -3.7733776569366455, "step": 1720 }, { "epoch": 0.59, "learning_rate": 2.2893842085379674e-07, "logits/chosen": -1.527874231338501, "logits/rejected": -1.1270580291748047, "logps/chosen": -333.58258056640625, "logps/rejected": -464.1814880371094, "loss": 0.0509, "rewards/accuracies": 1.0, "rewards/chosen": 4.677160263061523, "rewards/margins": 8.32009220123291, "rewards/rejected": -3.642932176589966, "step": 1730 }, { "epoch": 0.59, "learning_rate": 2.2704948998866642e-07, "logits/chosen": -1.4980236291885376, "logits/rejected": -1.2070553302764893, "logps/chosen": -518.45703125, "logps/rejected": -483.01422119140625, "loss": 0.0512, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 4.008620262145996, "rewards/margins": 7.4014739990234375, "rewards/rejected": -3.392852783203125, "step": 1740 }, { "epoch": 0.59, "learning_rate": 2.2516055912353608e-07, "logits/chosen": -1.5251991748809814, "logits/rejected": -1.212727665901184, "logps/chosen": -445.04931640625, "logps/rejected": -477.67364501953125, "loss": 0.0725, "rewards/accuracies": 1.0, "rewards/chosen": 4.699405193328857, "rewards/margins": 8.411505699157715, "rewards/rejected": -3.7121009826660156, "step": 1750 }, { "epoch": 0.6, "learning_rate": 2.2327162825840573e-07, "logits/chosen": -1.5143253803253174, "logits/rejected": -1.2615511417388916, "logps/chosen": -432.19561767578125, "logps/rejected": -536.9600219726562, "loss": 0.058, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 4.530027866363525, "rewards/margins": 7.708025932312012, "rewards/rejected": -3.1779980659484863, "step": 1760 }, { "epoch": 0.6, "learning_rate": 2.213826973932754e-07, "logits/chosen": -1.5171505212783813, "logits/rejected": -1.230185866355896, "logps/chosen": -315.69781494140625, "logps/rejected": -726.6375122070312, "loss": 0.0496, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 4.840248107910156, "rewards/margins": 8.333008766174316, "rewards/rejected": -3.4927608966827393, "step": 1770 }, { "epoch": 0.61, "learning_rate": 2.1949376652814505e-07, "logits/chosen": -1.4918253421783447, "logits/rejected": -1.2145707607269287, "logps/chosen": -400.26776123046875, "logps/rejected": -509.4452209472656, "loss": 0.071, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 4.016195774078369, "rewards/margins": 7.594870567321777, "rewards/rejected": -3.5786757469177246, "step": 1780 }, { "epoch": 0.61, "learning_rate": 2.176048356630147e-07, "logits/chosen": -1.5160037279129028, "logits/rejected": -1.2147983312606812, "logps/chosen": -521.50537109375, "logps/rejected": -494.4219665527344, "loss": 0.0611, "rewards/accuracies": 1.0, "rewards/chosen": 4.44297981262207, "rewards/margins": 7.988565921783447, "rewards/rejected": -3.5455868244171143, "step": 1790 }, { "epoch": 0.61, "learning_rate": 2.157159047978844e-07, "logits/chosen": -1.5425684452056885, "logits/rejected": -1.1855405569076538, "logps/chosen": -327.6552429199219, "logps/rejected": -658.273193359375, "loss": 0.0561, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 4.459707736968994, "rewards/margins": 7.496172904968262, "rewards/rejected": -3.0364651679992676, "step": 1800 }, { "epoch": 0.61, "eval_logits/chosen": -1.5143883228302002, "eval_logits/rejected": -1.194756269454956, "eval_logps/chosen": -376.0636291503906, "eval_logps/rejected": -593.4963989257812, "eval_loss": 0.05014927685260773, "eval_rewards/accuracies": 0.9797979593276978, "eval_rewards/chosen": 4.385910511016846, "eval_rewards/margins": 7.783912658691406, "eval_rewards/rejected": -3.3980023860931396, "eval_runtime": 560.6319, "eval_samples_per_second": 16.945, "eval_steps_per_second": 0.53, "step": 1800 }, { "epoch": 0.62, "learning_rate": 2.1382697393275407e-07, "logits/chosen": -1.526610016822815, "logits/rejected": -1.1567124128341675, "logps/chosen": -454.001220703125, "logps/rejected": -625.8849487304688, "loss": 0.0381, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 4.282632827758789, "rewards/margins": 8.028142929077148, "rewards/rejected": -3.7455101013183594, "step": 1810 }, { "epoch": 0.62, "learning_rate": 2.1193804306762372e-07, "logits/chosen": -1.514725923538208, "logits/rejected": -1.2673990726470947, "logps/chosen": -387.0127258300781, "logps/rejected": -848.1672973632812, "loss": 0.0355, "rewards/accuracies": 1.0, "rewards/chosen": 4.279064655303955, "rewards/margins": 7.790387153625488, "rewards/rejected": -3.5113232135772705, "step": 1820 }, { "epoch": 0.62, "learning_rate": 2.1004911220249338e-07, "logits/chosen": -1.5263025760650635, "logits/rejected": -1.2004420757293701, "logps/chosen": -390.23028564453125, "logps/rejected": -547.3260498046875, "loss": 0.0551, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 4.023037433624268, "rewards/margins": 6.7541937828063965, "rewards/rejected": -2.731156349182129, "step": 1830 }, { "epoch": 0.63, "learning_rate": 2.0816018133736303e-07, "logits/chosen": -1.5079574584960938, "logits/rejected": -1.1305427551269531, "logps/chosen": -463.9093322753906, "logps/rejected": -681.10205078125, "loss": 0.0428, "rewards/accuracies": 1.0, "rewards/chosen": 4.483296871185303, "rewards/margins": 8.173705101013184, "rewards/rejected": -3.690408229827881, "step": 1840 }, { "epoch": 0.63, "learning_rate": 2.0627125047223271e-07, "logits/chosen": -1.5206629037857056, "logits/rejected": -1.2181518077850342, "logps/chosen": -397.5008850097656, "logps/rejected": -562.5797729492188, "loss": 0.0519, "rewards/accuracies": 1.0, "rewards/chosen": 4.292169094085693, "rewards/margins": 7.382157325744629, "rewards/rejected": -3.089987277984619, "step": 1850 }, { "epoch": 0.63, "learning_rate": 2.0438231960710237e-07, "logits/chosen": -1.5032273530960083, "logits/rejected": -1.276940107345581, "logps/chosen": -366.43060302734375, "logps/rejected": -677.36962890625, "loss": 0.062, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 4.3607306480407715, "rewards/margins": 7.476487636566162, "rewards/rejected": -3.115757465362549, "step": 1860 }, { "epoch": 0.64, "learning_rate": 2.0249338874197203e-07, "logits/chosen": -1.5287476778030396, "logits/rejected": -1.2484480142593384, "logps/chosen": -378.16302490234375, "logps/rejected": -514.4780883789062, "loss": 0.0657, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 4.000302314758301, "rewards/margins": 7.317461967468262, "rewards/rejected": -3.317160129547119, "step": 1870 }, { "epoch": 0.64, "learning_rate": 2.0060445787684168e-07, "logits/chosen": -1.5523929595947266, "logits/rejected": -1.2034788131713867, "logps/chosen": -319.57110595703125, "logps/rejected": -540.62158203125, "loss": 0.0468, "rewards/accuracies": 1.0, "rewards/chosen": 4.091488361358643, "rewards/margins": 7.53751277923584, "rewards/rejected": -3.4460244178771973, "step": 1880 }, { "epoch": 0.64, "learning_rate": 1.9871552701171136e-07, "logits/chosen": -1.5344616174697876, "logits/rejected": -1.2502692937850952, "logps/chosen": -417.1963806152344, "logps/rejected": -557.7677001953125, "loss": 0.0625, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 4.207846641540527, "rewards/margins": 7.392093658447266, "rewards/rejected": -3.184246778488159, "step": 1890 }, { "epoch": 0.65, "learning_rate": 1.9682659614658105e-07, "logits/chosen": -1.5210683345794678, "logits/rejected": -1.2344892024993896, "logps/chosen": -468.1206970214844, "logps/rejected": -585.029541015625, "loss": 0.0538, "rewards/accuracies": 1.0, "rewards/chosen": 4.628952980041504, "rewards/margins": 7.9791693687438965, "rewards/rejected": -3.35021710395813, "step": 1900 }, { "epoch": 0.65, "eval_logits/chosen": -1.5147186517715454, "eval_logits/rejected": -1.203603982925415, "eval_logps/chosen": -375.7136535644531, "eval_logps/rejected": -593.9944458007812, "eval_loss": 0.050368715077638626, "eval_rewards/accuracies": 0.9814814925193787, "eval_rewards/chosen": 4.420912742614746, "eval_rewards/margins": 7.868711471557617, "eval_rewards/rejected": -3.447798728942871, "eval_runtime": 559.9302, "eval_samples_per_second": 16.966, "eval_steps_per_second": 0.53, "step": 1900 }, { "epoch": 0.65, "learning_rate": 1.949376652814507e-07, "logits/chosen": -1.5237205028533936, "logits/rejected": -1.2196900844573975, "logps/chosen": -353.64813232421875, "logps/rejected": -567.98486328125, "loss": 0.0444, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 4.203667640686035, "rewards/margins": 7.699929237365723, "rewards/rejected": -3.4962615966796875, "step": 1910 }, { "epoch": 0.65, "learning_rate": 1.9304873441632036e-07, "logits/chosen": -1.5383799076080322, "logits/rejected": -1.1897004842758179, "logps/chosen": -304.3857116699219, "logps/rejected": -420.22833251953125, "loss": 0.0371, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 4.345728874206543, "rewards/margins": 7.751856327056885, "rewards/rejected": -3.4061267375946045, "step": 1920 }, { "epoch": 0.66, "learning_rate": 1.9115980355119001e-07, "logits/chosen": -1.527374029159546, "logits/rejected": -1.2602983713150024, "logps/chosen": -329.572998046875, "logps/rejected": -735.498046875, "loss": 0.0391, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 4.703977108001709, "rewards/margins": 7.709604740142822, "rewards/rejected": -3.005627393722534, "step": 1930 }, { "epoch": 0.66, "learning_rate": 1.8927087268605967e-07, "logits/chosen": -1.5346765518188477, "logits/rejected": -1.2059959173202515, "logps/chosen": -328.3225402832031, "logps/rejected": -767.6409912109375, "loss": 0.0463, "rewards/accuracies": 1.0, "rewards/chosen": 4.100872993469238, "rewards/margins": 8.146936416625977, "rewards/rejected": -4.04606294631958, "step": 1940 }, { "epoch": 0.66, "learning_rate": 1.8738194182092935e-07, "logits/chosen": -1.494866132736206, "logits/rejected": -1.2239644527435303, "logps/chosen": -555.472900390625, "logps/rejected": -520.2547607421875, "loss": 0.0514, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 4.442248344421387, "rewards/margins": 7.3626532554626465, "rewards/rejected": -2.920405626296997, "step": 1950 }, { "epoch": 0.67, "learning_rate": 1.85493010955799e-07, "logits/chosen": -1.519852876663208, "logits/rejected": -1.212501883506775, "logps/chosen": -389.075927734375, "logps/rejected": -503.87640380859375, "loss": 0.0575, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 4.409701347351074, "rewards/margins": 8.056486129760742, "rewards/rejected": -3.6467843055725098, "step": 1960 }, { "epoch": 0.67, "learning_rate": 1.8360408009066866e-07, "logits/chosen": -1.5152437686920166, "logits/rejected": -1.2427327632904053, "logps/chosen": -401.62127685546875, "logps/rejected": -749.9725341796875, "loss": 0.054, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 4.197791576385498, "rewards/margins": 7.349157810211182, "rewards/rejected": -3.1513662338256836, "step": 1970 }, { "epoch": 0.67, "learning_rate": 1.8171514922553835e-07, "logits/chosen": -1.531203269958496, "logits/rejected": -1.2765506505966187, "logps/chosen": -406.9158630371094, "logps/rejected": -546.1353759765625, "loss": 0.0515, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 4.106224536895752, "rewards/margins": 7.5454840660095215, "rewards/rejected": -3.439260482788086, "step": 1980 }, { "epoch": 0.68, "learning_rate": 1.79826218360408e-07, "logits/chosen": -1.4749855995178223, "logits/rejected": -1.2416235208511353, "logps/chosen": -434.5870056152344, "logps/rejected": -412.8590393066406, "loss": 0.0475, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 3.6538729667663574, "rewards/margins": 7.308139801025391, "rewards/rejected": -3.654266357421875, "step": 1990 }, { "epoch": 0.68, "learning_rate": 1.7793728749527768e-07, "logits/chosen": -1.5064570903778076, "logits/rejected": -1.2394144535064697, "logps/chosen": -378.8180236816406, "logps/rejected": -484.3456115722656, "loss": 0.0493, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 3.8156559467315674, "rewards/margins": 7.063841342926025, "rewards/rejected": -3.2481846809387207, "step": 2000 }, { "epoch": 0.68, "eval_logits/chosen": -1.5070686340332031, "eval_logits/rejected": -1.1925033330917358, "eval_logps/chosen": -376.0872802734375, "eval_logps/rejected": -595.3203125, "eval_loss": 0.04720592126250267, "eval_rewards/accuracies": 0.9831649661064148, "eval_rewards/chosen": 4.383547306060791, "eval_rewards/margins": 7.9639458656311035, "eval_rewards/rejected": -3.5803987979888916, "eval_runtime": 558.9461, "eval_samples_per_second": 16.996, "eval_steps_per_second": 0.531, "step": 2000 }, { "epoch": 0.68, "learning_rate": 1.7604835663014734e-07, "logits/chosen": -1.5241343975067139, "logits/rejected": -1.2008111476898193, "logps/chosen": -319.7298278808594, "logps/rejected": -589.1061401367188, "loss": 0.0396, "rewards/accuracies": 1.0, "rewards/chosen": 4.302298545837402, "rewards/margins": 7.667372703552246, "rewards/rejected": -3.3650736808776855, "step": 2010 }, { "epoch": 0.69, "learning_rate": 1.74159425765017e-07, "logits/chosen": -1.5107629299163818, "logits/rejected": -1.1774795055389404, "logps/chosen": -325.34588623046875, "logps/rejected": -675.2240600585938, "loss": 0.0605, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 4.494423866271973, "rewards/margins": 8.346731185913086, "rewards/rejected": -3.852306365966797, "step": 2020 }, { "epoch": 0.69, "learning_rate": 1.7227049489988665e-07, "logits/chosen": -1.4806641340255737, "logits/rejected": -1.22501540184021, "logps/chosen": -429.6233825683594, "logps/rejected": -475.925537109375, "loss": 0.0373, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 4.175747871398926, "rewards/margins": 7.295570373535156, "rewards/rejected": -3.1198229789733887, "step": 2030 }, { "epoch": 0.69, "learning_rate": 1.703815640347563e-07, "logits/chosen": -1.5204100608825684, "logits/rejected": -1.2161033153533936, "logps/chosen": -313.9083557128906, "logps/rejected": -539.1317749023438, "loss": 0.0393, "rewards/accuracies": 1.0, "rewards/chosen": 4.7474541664123535, "rewards/margins": 8.653862953186035, "rewards/rejected": -3.9064087867736816, "step": 2040 }, { "epoch": 0.7, "learning_rate": 1.6849263316962596e-07, "logits/chosen": -1.5086395740509033, "logits/rejected": -1.1813517808914185, "logps/chosen": -395.3387756347656, "logps/rejected": -496.54052734375, "loss": 0.0482, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 3.5052483081817627, "rewards/margins": 7.271603584289551, "rewards/rejected": -3.766355514526367, "step": 2050 }, { "epoch": 0.7, "learning_rate": 1.6660370230449564e-07, "logits/chosen": -1.5112879276275635, "logits/rejected": -1.1831653118133545, "logps/chosen": -406.0916442871094, "logps/rejected": -510.435302734375, "loss": 0.0525, "rewards/accuracies": 1.0, "rewards/chosen": 4.234797477722168, "rewards/margins": 7.962366580963135, "rewards/rejected": -3.727570056915283, "step": 2060 }, { "epoch": 0.7, "learning_rate": 1.6471477143936533e-07, "logits/chosen": -1.5286850929260254, "logits/rejected": -1.2524337768554688, "logps/chosen": -375.4138488769531, "logps/rejected": -542.0020751953125, "loss": 0.0402, "rewards/accuracies": 1.0, "rewards/chosen": 4.921536922454834, "rewards/margins": 8.480701446533203, "rewards/rejected": -3.559164047241211, "step": 2070 }, { "epoch": 0.71, "learning_rate": 1.6282584057423498e-07, "logits/chosen": -1.5124969482421875, "logits/rejected": -1.151609182357788, "logps/chosen": -297.8030700683594, "logps/rejected": -420.9918518066406, "loss": 0.0415, "rewards/accuracies": 1.0, "rewards/chosen": 4.653942584991455, "rewards/margins": 8.311192512512207, "rewards/rejected": -3.657250165939331, "step": 2080 }, { "epoch": 0.71, "learning_rate": 1.6093690970910464e-07, "logits/chosen": -1.5191973447799683, "logits/rejected": -1.2087528705596924, "logps/chosen": -406.9920654296875, "logps/rejected": -566.786376953125, "loss": 0.0491, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 4.626988887786865, "rewards/margins": 8.500511169433594, "rewards/rejected": -3.873521327972412, "step": 2090 }, { "epoch": 0.71, "learning_rate": 1.590479788439743e-07, "logits/chosen": -1.5033385753631592, "logits/rejected": -1.2532203197479248, "logps/chosen": -429.1227111816406, "logps/rejected": -824.8059692382812, "loss": 0.0374, "rewards/accuracies": 1.0, "rewards/chosen": 3.9824090003967285, "rewards/margins": 7.593686103820801, "rewards/rejected": -3.611276149749756, "step": 2100 }, { "epoch": 0.71, "eval_logits/chosen": -1.5165996551513672, "eval_logits/rejected": -1.2020140886306763, "eval_logps/chosen": -376.9510498046875, "eval_logps/rejected": -597.5147094726562, "eval_loss": 0.044869087636470795, "eval_rewards/accuracies": 0.9840067625045776, "eval_rewards/chosen": 4.297166347503662, "eval_rewards/margins": 8.097002983093262, "eval_rewards/rejected": -3.7998366355895996, "eval_runtime": 560.374, "eval_samples_per_second": 16.953, "eval_steps_per_second": 0.53, "step": 2100 }, { "epoch": 0.72, "learning_rate": 1.5715904797884398e-07, "logits/chosen": -1.5076260566711426, "logits/rejected": -1.236230492591858, "logps/chosen": -348.450439453125, "logps/rejected": -499.9619140625, "loss": 0.0848, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 4.0965800285339355, "rewards/margins": 7.605328559875488, "rewards/rejected": -3.5087478160858154, "step": 2110 }, { "epoch": 0.72, "learning_rate": 1.5527011711371363e-07, "logits/chosen": -1.529317855834961, "logits/rejected": -1.2510004043579102, "logps/chosen": -339.26165771484375, "logps/rejected": -693.9097290039062, "loss": 0.0382, "rewards/accuracies": 1.0, "rewards/chosen": 4.7485551834106445, "rewards/margins": 8.082775115966797, "rewards/rejected": -3.3342204093933105, "step": 2120 }, { "epoch": 0.72, "learning_rate": 1.533811862485833e-07, "logits/chosen": -1.5148240327835083, "logits/rejected": -1.199103593826294, "logps/chosen": -385.8512878417969, "logps/rejected": -639.3033447265625, "loss": 0.0424, "rewards/accuracies": 1.0, "rewards/chosen": 4.181856155395508, "rewards/margins": 7.878443717956543, "rewards/rejected": -3.6965866088867188, "step": 2130 }, { "epoch": 0.73, "learning_rate": 1.5149225538345294e-07, "logits/chosen": -1.5198280811309814, "logits/rejected": -1.1935245990753174, "logps/chosen": -348.9425964355469, "logps/rejected": -459.17041015625, "loss": 0.0394, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 4.525935173034668, "rewards/margins": 7.647967338562012, "rewards/rejected": -3.122032403945923, "step": 2140 }, { "epoch": 0.73, "learning_rate": 1.496033245183226e-07, "logits/chosen": -1.5460079908370972, "logits/rejected": -1.1968626976013184, "logps/chosen": -328.58251953125, "logps/rejected": -633.262451171875, "loss": 0.0466, "rewards/accuracies": 1.0, "rewards/chosen": 4.540610313415527, "rewards/margins": 8.390274047851562, "rewards/rejected": -3.849663496017456, "step": 2150 }, { "epoch": 0.73, "learning_rate": 1.477143936531923e-07, "logits/chosen": -1.5178780555725098, "logits/rejected": -1.214658498764038, "logps/chosen": -476.41876220703125, "logps/rejected": -339.6917419433594, "loss": 0.0619, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 3.9794323444366455, "rewards/margins": 7.556540489196777, "rewards/rejected": -3.5771079063415527, "step": 2160 }, { "epoch": 0.74, "learning_rate": 1.4582546278806196e-07, "logits/chosen": -1.520281195640564, "logits/rejected": -1.2420076131820679, "logps/chosen": -379.0602111816406, "logps/rejected": -473.5738830566406, "loss": 0.0404, "rewards/accuracies": 1.0, "rewards/chosen": 4.220221042633057, "rewards/margins": 7.9052581787109375, "rewards/rejected": -3.685037612915039, "step": 2170 }, { "epoch": 0.74, "learning_rate": 1.4393653192293162e-07, "logits/chosen": -1.5194097757339478, "logits/rejected": -1.2116343975067139, "logps/chosen": -409.2084655761719, "logps/rejected": -558.417236328125, "loss": 0.0416, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 4.273309230804443, "rewards/margins": 7.955672264099121, "rewards/rejected": -3.6823630332946777, "step": 2180 }, { "epoch": 0.74, "learning_rate": 1.4204760105780127e-07, "logits/chosen": -1.5045769214630127, "logits/rejected": -1.2302758693695068, "logps/chosen": -392.59149169921875, "logps/rejected": -547.0234985351562, "loss": 0.046, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 4.600020408630371, "rewards/margins": 8.4217529296875, "rewards/rejected": -3.8217320442199707, "step": 2190 }, { "epoch": 0.75, "learning_rate": 1.4015867019267093e-07, "logits/chosen": -1.4690983295440674, "logits/rejected": -1.182051658630371, "logps/chosen": -650.6025390625, "logps/rejected": -511.93035888671875, "loss": 0.0475, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 3.5337417125701904, "rewards/margins": 7.234931945800781, "rewards/rejected": -3.7011895179748535, "step": 2200 }, { "epoch": 0.75, "eval_logits/chosen": -1.517708420753479, "eval_logits/rejected": -1.1991840600967407, "eval_logps/chosen": -376.849365234375, "eval_logps/rejected": -596.0023803710938, "eval_loss": 0.04422454535961151, "eval_rewards/accuracies": 0.9840067625045776, "eval_rewards/chosen": 4.307338714599609, "eval_rewards/margins": 7.955935478210449, "eval_rewards/rejected": -3.648597002029419, "eval_runtime": 561.4605, "eval_samples_per_second": 16.92, "eval_steps_per_second": 0.529, "step": 2200 }, { "epoch": 0.75, "learning_rate": 1.382697393275406e-07, "logits/chosen": -1.4882314205169678, "logits/rejected": -1.2919832468032837, "logps/chosen": -555.08984375, "logps/rejected": -622.5364379882812, "loss": 0.0606, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 3.7566609382629395, "rewards/margins": 7.16671895980835, "rewards/rejected": -3.4100584983825684, "step": 2210 }, { "epoch": 0.75, "learning_rate": 1.3638080846241027e-07, "logits/chosen": -1.5286386013031006, "logits/rejected": -1.2888312339782715, "logps/chosen": -284.7803039550781, "logps/rejected": -523.2872314453125, "loss": 0.0565, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 4.4519944190979, "rewards/margins": 7.86702823638916, "rewards/rejected": -3.4150338172912598, "step": 2220 }, { "epoch": 0.76, "learning_rate": 1.3449187759727992e-07, "logits/chosen": -1.527421236038208, "logits/rejected": -1.2638188600540161, "logps/chosen": -299.5697937011719, "logps/rejected": -468.88641357421875, "loss": 0.0418, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 4.214964389801025, "rewards/margins": 7.762864589691162, "rewards/rejected": -3.547900676727295, "step": 2230 }, { "epoch": 0.76, "learning_rate": 1.3260294673214958e-07, "logits/chosen": -1.5077273845672607, "logits/rejected": -1.2305810451507568, "logps/chosen": -315.0970153808594, "logps/rejected": -543.9883422851562, "loss": 0.0428, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 4.3515095710754395, "rewards/margins": 8.111806869506836, "rewards/rejected": -3.760296583175659, "step": 2240 }, { "epoch": 0.76, "learning_rate": 1.3071401586701926e-07, "logits/chosen": -1.5324013233184814, "logits/rejected": -1.176598310470581, "logps/chosen": -350.73590087890625, "logps/rejected": -407.3160705566406, "loss": 0.0444, "rewards/accuracies": 1.0, "rewards/chosen": 4.385611534118652, "rewards/margins": 8.232316970825195, "rewards/rejected": -3.8467063903808594, "step": 2250 }, { "epoch": 0.77, "learning_rate": 1.2882508500188894e-07, "logits/chosen": -1.53853440284729, "logits/rejected": -1.1605119705200195, "logps/chosen": -322.6458740234375, "logps/rejected": -577.635009765625, "loss": 0.0612, "rewards/accuracies": 1.0, "rewards/chosen": 4.1477580070495605, "rewards/margins": 8.158384323120117, "rewards/rejected": -4.010627746582031, "step": 2260 }, { "epoch": 0.77, "learning_rate": 1.269361541367586e-07, "logits/chosen": -1.4969431161880493, "logits/rejected": -1.2394483089447021, "logps/chosen": -429.3595275878906, "logps/rejected": -534.6132202148438, "loss": 0.0514, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 4.626293659210205, "rewards/margins": 8.510354995727539, "rewards/rejected": -3.8840622901916504, "step": 2270 }, { "epoch": 0.77, "learning_rate": 1.2504722327162826e-07, "logits/chosen": -1.5438110828399658, "logits/rejected": -1.3265063762664795, "logps/chosen": -336.172607421875, "logps/rejected": -555.7020263671875, "loss": 0.0643, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 4.796633243560791, "rewards/margins": 8.188325881958008, "rewards/rejected": -3.391692638397217, "step": 2280 }, { "epoch": 0.78, "learning_rate": 1.231582924064979e-07, "logits/chosen": -1.534425973892212, "logits/rejected": -1.2709665298461914, "logps/chosen": -331.15771484375, "logps/rejected": -599.520751953125, "loss": 0.0743, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 4.660555362701416, "rewards/margins": 7.926393985748291, "rewards/rejected": -3.265838146209717, "step": 2290 }, { "epoch": 0.78, "learning_rate": 1.2126936154136757e-07, "logits/chosen": -1.5267812013626099, "logits/rejected": -1.2841061353683472, "logps/chosen": -363.42718505859375, "logps/rejected": -614.720947265625, "loss": 0.0407, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 4.113651752471924, "rewards/margins": 7.806565284729004, "rewards/rejected": -3.69291353225708, "step": 2300 }, { "epoch": 0.78, "eval_logits/chosen": -1.5241639614105225, "eval_logits/rejected": -1.207804799079895, "eval_logps/chosen": -376.9122009277344, "eval_logps/rejected": -597.497802734375, "eval_loss": 0.04077613726258278, "eval_rewards/accuracies": 0.9882155060768127, "eval_rewards/chosen": 4.301055908203125, "eval_rewards/margins": 8.099197387695312, "eval_rewards/rejected": -3.798142194747925, "eval_runtime": 561.0154, "eval_samples_per_second": 16.934, "eval_steps_per_second": 0.529, "step": 2300 }, { "epoch": 0.79, "learning_rate": 1.1938043067623725e-07, "logits/chosen": -1.5032380819320679, "logits/rejected": -1.2603862285614014, "logps/chosen": -468.38519287109375, "logps/rejected": -603.046142578125, "loss": 0.0583, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 3.9953246116638184, "rewards/margins": 7.673506259918213, "rewards/rejected": -3.6781811714172363, "step": 2310 }, { "epoch": 0.79, "learning_rate": 1.1749149981110692e-07, "logits/chosen": -1.5227621793746948, "logits/rejected": -1.294641137123108, "logps/chosen": -368.20526123046875, "logps/rejected": -515.7191162109375, "loss": 0.0287, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 4.12636661529541, "rewards/margins": 7.043761253356934, "rewards/rejected": -2.9173948764801025, "step": 2320 }, { "epoch": 0.79, "learning_rate": 1.1560256894597657e-07, "logits/chosen": -1.4953995943069458, "logits/rejected": -1.1584880352020264, "logps/chosen": -586.2529296875, "logps/rejected": -516.81591796875, "loss": 0.0496, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 3.4468929767608643, "rewards/margins": 7.300196647644043, "rewards/rejected": -3.853304386138916, "step": 2330 }, { "epoch": 0.8, "learning_rate": 1.1371363808084623e-07, "logits/chosen": -1.511156439781189, "logits/rejected": -1.1865122318267822, "logps/chosen": -372.8666076660156, "logps/rejected": -906.1705932617188, "loss": 0.032, "rewards/accuracies": 1.0, "rewards/chosen": 4.395539283752441, "rewards/margins": 8.54565715789795, "rewards/rejected": -4.150118827819824, "step": 2340 }, { "epoch": 0.8, "learning_rate": 1.118247072157159e-07, "logits/chosen": -1.5099351406097412, "logits/rejected": -1.203018307685852, "logps/chosen": -348.9670715332031, "logps/rejected": -429.0040588378906, "loss": 0.0443, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 4.664444923400879, "rewards/margins": 8.25013542175293, "rewards/rejected": -3.5856919288635254, "step": 2350 }, { "epoch": 0.8, "learning_rate": 1.0993577635058557e-07, "logits/chosen": -1.5239416360855103, "logits/rejected": -1.1614112854003906, "logps/chosen": -306.3650817871094, "logps/rejected": -499.98553466796875, "loss": 0.0506, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 4.466089725494385, "rewards/margins": 8.765016555786133, "rewards/rejected": -4.298927307128906, "step": 2360 }, { "epoch": 0.81, "learning_rate": 1.0804684548545522e-07, "logits/chosen": -1.4973338842391968, "logits/rejected": -1.2353532314300537, "logps/chosen": -363.0428771972656, "logps/rejected": -688.9337158203125, "loss": 0.0569, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 4.117824554443359, "rewards/margins": 7.465353488922119, "rewards/rejected": -3.3475289344787598, "step": 2370 }, { "epoch": 0.81, "learning_rate": 1.0615791462032489e-07, "logits/chosen": -1.4992105960845947, "logits/rejected": -1.1850025653839111, "logps/chosen": -390.51898193359375, "logps/rejected": -471.59246826171875, "loss": 0.0359, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 4.290133953094482, "rewards/margins": 8.444581985473633, "rewards/rejected": -4.154448509216309, "step": 2380 }, { "epoch": 0.81, "learning_rate": 1.0426898375519455e-07, "logits/chosen": -1.5164746046066284, "logits/rejected": -1.2176775932312012, "logps/chosen": -386.88580322265625, "logps/rejected": -695.19873046875, "loss": 0.0521, "rewards/accuracies": 1.0, "rewards/chosen": 4.442442893981934, "rewards/margins": 8.452461242675781, "rewards/rejected": -4.010018348693848, "step": 2390 }, { "epoch": 0.82, "learning_rate": 1.0238005289006423e-07, "logits/chosen": -1.5462114810943604, "logits/rejected": -1.2368415594100952, "logps/chosen": -341.13232421875, "logps/rejected": -573.03857421875, "loss": 0.0386, "rewards/accuracies": 1.0, "rewards/chosen": 4.52898645401001, "rewards/margins": 7.922593593597412, "rewards/rejected": -3.3936073780059814, "step": 2400 }, { "epoch": 0.82, "eval_logits/chosen": -1.5132849216461182, "eval_logits/rejected": -1.2028939723968506, "eval_logps/chosen": -376.4996337890625, "eval_logps/rejected": -596.8302001953125, "eval_loss": 0.03966302424669266, "eval_rewards/accuracies": 0.9882155060768127, "eval_rewards/chosen": 4.342313289642334, "eval_rewards/margins": 8.073698997497559, "eval_rewards/rejected": -3.731386184692383, "eval_runtime": 559.9916, "eval_samples_per_second": 16.965, "eval_steps_per_second": 0.53, "step": 2400 }, { "epoch": 0.82, "learning_rate": 1.0049112202493389e-07, "logits/chosen": -1.5179309844970703, "logits/rejected": -1.190763235092163, "logps/chosen": -344.33892822265625, "logps/rejected": -483.5990295410156, "loss": 0.0401, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 4.271202564239502, "rewards/margins": 7.67493200302124, "rewards/rejected": -3.4037303924560547, "step": 2410 }, { "epoch": 0.82, "learning_rate": 9.860219115980354e-08, "logits/chosen": -1.501859426498413, "logits/rejected": -1.2754067182540894, "logps/chosen": -424.81298828125, "logps/rejected": -500.36175537109375, "loss": 0.0512, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 4.149248123168945, "rewards/margins": 7.505955696105957, "rewards/rejected": -3.3567073345184326, "step": 2420 }, { "epoch": 0.83, "learning_rate": 9.671326029467321e-08, "logits/chosen": -1.5037426948547363, "logits/rejected": -1.284251093864441, "logps/chosen": -380.0538024902344, "logps/rejected": -624.1697387695312, "loss": 0.054, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 4.587218284606934, "rewards/margins": 8.128026008605957, "rewards/rejected": -3.5408072471618652, "step": 2430 }, { "epoch": 0.83, "learning_rate": 9.482432942954287e-08, "logits/chosen": -1.5006518363952637, "logits/rejected": -1.248807668685913, "logps/chosen": -396.10888671875, "logps/rejected": -717.84033203125, "loss": 0.0336, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 3.7980237007141113, "rewards/margins": 7.6149582862854, "rewards/rejected": -3.816934108734131, "step": 2440 }, { "epoch": 0.83, "learning_rate": 9.293539856441255e-08, "logits/chosen": -1.5265161991119385, "logits/rejected": -1.211663007736206, "logps/chosen": -327.98809814453125, "logps/rejected": -630.8543701171875, "loss": 0.0388, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 4.494044303894043, "rewards/margins": 7.565443515777588, "rewards/rejected": -3.0713984966278076, "step": 2450 }, { "epoch": 0.84, "learning_rate": 9.10464676992822e-08, "logits/chosen": -1.5001169443130493, "logits/rejected": -1.2648394107818604, "logps/chosen": -304.59185791015625, "logps/rejected": -596.6958618164062, "loss": 0.049, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 4.436002254486084, "rewards/margins": 7.953620910644531, "rewards/rejected": -3.517618179321289, "step": 2460 }, { "epoch": 0.84, "learning_rate": 8.915753683415186e-08, "logits/chosen": -1.5231083631515503, "logits/rejected": -1.221145749092102, "logps/chosen": -391.0690612792969, "logps/rejected": -552.14208984375, "loss": 0.0472, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 4.607051849365234, "rewards/margins": 8.291250228881836, "rewards/rejected": -3.6841976642608643, "step": 2470 }, { "epoch": 0.84, "learning_rate": 8.726860596902153e-08, "logits/chosen": -1.4747313261032104, "logits/rejected": -1.266494631767273, "logps/chosen": -392.5005798339844, "logps/rejected": -545.3479614257812, "loss": 0.0409, "rewards/accuracies": 1.0, "rewards/chosen": 4.316119194030762, "rewards/margins": 7.8105034828186035, "rewards/rejected": -3.494384288787842, "step": 2480 }, { "epoch": 0.85, "learning_rate": 8.53796751038912e-08, "logits/chosen": -1.5253236293792725, "logits/rejected": -1.2925410270690918, "logps/chosen": -378.5972900390625, "logps/rejected": -446.5828552246094, "loss": 0.04, "rewards/accuracies": 1.0, "rewards/chosen": 4.2583818435668945, "rewards/margins": 7.742008209228516, "rewards/rejected": -3.4836268424987793, "step": 2490 }, { "epoch": 0.85, "learning_rate": 8.349074423876085e-08, "logits/chosen": -1.4970636367797852, "logits/rejected": -1.173380970954895, "logps/chosen": -453.74249267578125, "logps/rejected": -443.0990295410156, "loss": 0.0504, "rewards/accuracies": 1.0, "rewards/chosen": 4.253788948059082, "rewards/margins": 8.546039581298828, "rewards/rejected": -4.292250156402588, "step": 2500 }, { "epoch": 0.85, "eval_logits/chosen": -1.5187809467315674, "eval_logits/rejected": -1.2023788690567017, "eval_logps/chosen": -376.1911926269531, "eval_logps/rejected": -597.20654296875, "eval_loss": 0.03895895555615425, "eval_rewards/accuracies": 0.9856902360916138, "eval_rewards/chosen": 4.3731584548950195, "eval_rewards/margins": 8.142176628112793, "eval_rewards/rejected": -3.7690184116363525, "eval_runtime": 560.5484, "eval_samples_per_second": 16.948, "eval_steps_per_second": 0.53, "step": 2500 }, { "epoch": 0.85, "learning_rate": 8.160181337363052e-08, "logits/chosen": -1.5079090595245361, "logits/rejected": -1.218942403793335, "logps/chosen": -400.8089904785156, "logps/rejected": -563.3532104492188, "loss": 0.0373, "rewards/accuracies": 1.0, "rewards/chosen": 4.257572650909424, "rewards/margins": 7.628092288970947, "rewards/rejected": -3.3705201148986816, "step": 2510 }, { "epoch": 0.86, "learning_rate": 7.971288250850018e-08, "logits/chosen": -1.5301258563995361, "logits/rejected": -1.2807587385177612, "logps/chosen": -315.27337646484375, "logps/rejected": -816.6290893554688, "loss": 0.0417, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 4.818511009216309, "rewards/margins": 8.743834495544434, "rewards/rejected": -3.925323486328125, "step": 2520 }, { "epoch": 0.86, "learning_rate": 7.782395164336985e-08, "logits/chosen": -1.5073165893554688, "logits/rejected": -1.2790597677230835, "logps/chosen": -434.49267578125, "logps/rejected": -593.9933471679688, "loss": 0.0468, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 4.8378801345825195, "rewards/margins": 7.976640224456787, "rewards/rejected": -3.1387598514556885, "step": 2530 }, { "epoch": 0.86, "learning_rate": 7.593502077823952e-08, "logits/chosen": -1.503222942352295, "logits/rejected": -1.2196729183197021, "logps/chosen": -313.89654541015625, "logps/rejected": -462.62078857421875, "loss": 0.0303, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 4.41795015335083, "rewards/margins": 7.8886284828186035, "rewards/rejected": -3.4706790447235107, "step": 2540 }, { "epoch": 0.87, "learning_rate": 7.404608991310917e-08, "logits/chosen": -1.5052754878997803, "logits/rejected": -1.2386561632156372, "logps/chosen": -372.96209716796875, "logps/rejected": -585.10791015625, "loss": 0.0583, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 4.49515438079834, "rewards/margins": 8.383849143981934, "rewards/rejected": -3.888695478439331, "step": 2550 }, { "epoch": 0.87, "learning_rate": 7.215715904797884e-08, "logits/chosen": -1.525687336921692, "logits/rejected": -1.1762199401855469, "logps/chosen": -317.4388122558594, "logps/rejected": -654.0875854492188, "loss": 0.0313, "rewards/accuracies": 1.0, "rewards/chosen": 4.393351078033447, "rewards/margins": 8.096270561218262, "rewards/rejected": -3.7029201984405518, "step": 2560 }, { "epoch": 0.87, "learning_rate": 7.02682281828485e-08, "logits/chosen": -1.520235300064087, "logits/rejected": -1.1894917488098145, "logps/chosen": -396.32781982421875, "logps/rejected": -622.02783203125, "loss": 0.061, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 3.850032329559326, "rewards/margins": 7.790719032287598, "rewards/rejected": -3.940687656402588, "step": 2570 }, { "epoch": 0.88, "learning_rate": 6.837929731771818e-08, "logits/chosen": -1.5212862491607666, "logits/rejected": -1.2499208450317383, "logps/chosen": -376.70684814453125, "logps/rejected": -389.7237854003906, "loss": 0.0437, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 4.127648830413818, "rewards/margins": 7.433091640472412, "rewards/rejected": -3.305443286895752, "step": 2580 }, { "epoch": 0.88, "learning_rate": 6.649036645258783e-08, "logits/chosen": -1.517173171043396, "logits/rejected": -1.1914104223251343, "logps/chosen": -444.6543884277344, "logps/rejected": -365.63848876953125, "loss": 0.047, "rewards/accuracies": 1.0, "rewards/chosen": 4.557802677154541, "rewards/margins": 8.270492553710938, "rewards/rejected": -3.7126896381378174, "step": 2590 }, { "epoch": 0.88, "learning_rate": 6.460143558745749e-08, "logits/chosen": -1.5273593664169312, "logits/rejected": -1.1451303958892822, "logps/chosen": -325.8979797363281, "logps/rejected": -582.1531982421875, "loss": 0.0402, "rewards/accuracies": 1.0, "rewards/chosen": 4.247693061828613, "rewards/margins": 8.258806228637695, "rewards/rejected": -4.011113166809082, "step": 2600 }, { "epoch": 0.88, "eval_logits/chosen": -1.5157567262649536, "eval_logits/rejected": -1.1977304220199585, "eval_logps/chosen": -376.56494140625, "eval_logps/rejected": -597.8150024414062, "eval_loss": 0.037716832011938095, "eval_rewards/accuracies": 0.9865319728851318, "eval_rewards/chosen": 4.33577823638916, "eval_rewards/margins": 8.16563892364502, "eval_rewards/rejected": -3.8298606872558594, "eval_runtime": 560.9406, "eval_samples_per_second": 16.936, "eval_steps_per_second": 0.529, "step": 2600 }, { "epoch": 0.89, "learning_rate": 6.271250472232716e-08, "logits/chosen": -1.5143485069274902, "logits/rejected": -1.3070073127746582, "logps/chosen": -389.19464111328125, "logps/rejected": -658.4906616210938, "loss": 0.0493, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 4.09715461730957, "rewards/margins": 7.326831817626953, "rewards/rejected": -3.229677200317383, "step": 2610 }, { "epoch": 0.89, "learning_rate": 6.082357385719683e-08, "logits/chosen": -1.5194426774978638, "logits/rejected": -1.338354229927063, "logps/chosen": -472.4857482910156, "logps/rejected": -520.1239013671875, "loss": 0.0476, "rewards/accuracies": 1.0, "rewards/chosen": 4.095303535461426, "rewards/margins": 7.620616912841797, "rewards/rejected": -3.525313138961792, "step": 2620 }, { "epoch": 0.89, "learning_rate": 5.893464299206649e-08, "logits/chosen": -1.5334171056747437, "logits/rejected": -1.2070086002349854, "logps/chosen": -345.03106689453125, "logps/rejected": -601.37060546875, "loss": 0.0327, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 4.165444374084473, "rewards/margins": 7.924208641052246, "rewards/rejected": -3.7587637901306152, "step": 2630 }, { "epoch": 0.9, "learning_rate": 5.704571212693615e-08, "logits/chosen": -1.5042918920516968, "logits/rejected": -1.2301785945892334, "logps/chosen": -357.8497314453125, "logps/rejected": -494.36944580078125, "loss": 0.0457, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 3.3990867137908936, "rewards/margins": 6.8430280685424805, "rewards/rejected": -3.443941593170166, "step": 2640 }, { "epoch": 0.9, "learning_rate": 5.5156781261805816e-08, "logits/chosen": -1.5347890853881836, "logits/rejected": -1.2450568675994873, "logps/chosen": -290.27862548828125, "logps/rejected": -452.07562255859375, "loss": 0.0342, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 4.202007293701172, "rewards/margins": 7.855565547943115, "rewards/rejected": -3.6535582542419434, "step": 2650 }, { "epoch": 0.9, "learning_rate": 5.326785039667548e-08, "logits/chosen": -1.5297716856002808, "logits/rejected": -1.1971065998077393, "logps/chosen": -395.7029724121094, "logps/rejected": -416.86810302734375, "loss": 0.0418, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 4.438989639282227, "rewards/margins": 8.2936372756958, "rewards/rejected": -3.854647159576416, "step": 2660 }, { "epoch": 0.91, "learning_rate": 5.137891953154514e-08, "logits/chosen": -1.5386561155319214, "logits/rejected": -1.268654704093933, "logps/chosen": -334.1921691894531, "logps/rejected": -703.80908203125, "loss": 0.0391, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 5.104978084564209, "rewards/margins": 8.777002334594727, "rewards/rejected": -3.6720242500305176, "step": 2670 }, { "epoch": 0.91, "learning_rate": 4.948998866641481e-08, "logits/chosen": -1.5136685371398926, "logits/rejected": -1.2055470943450928, "logps/chosen": -393.09869384765625, "logps/rejected": -756.4305419921875, "loss": 0.0371, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 4.0205078125, "rewards/margins": 8.307108879089355, "rewards/rejected": -4.2866010665893555, "step": 2680 }, { "epoch": 0.91, "learning_rate": 4.760105780128447e-08, "logits/chosen": -1.5168330669403076, "logits/rejected": -1.1547820568084717, "logps/chosen": -425.5591735839844, "logps/rejected": -774.383544921875, "loss": 0.0347, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 4.134261131286621, "rewards/margins": 8.559054374694824, "rewards/rejected": -4.424793720245361, "step": 2690 }, { "epoch": 0.92, "learning_rate": 4.5712126936154134e-08, "logits/chosen": -1.5314137935638428, "logits/rejected": -1.2539647817611694, "logps/chosen": -260.22467041015625, "logps/rejected": -489.43182373046875, "loss": 0.038, "rewards/accuracies": 1.0, "rewards/chosen": 4.627935409545898, "rewards/margins": 8.63275146484375, "rewards/rejected": -4.00481653213501, "step": 2700 }, { "epoch": 0.92, "eval_logits/chosen": -1.5139025449752808, "eval_logits/rejected": -1.2032972574234009, "eval_logps/chosen": -376.6385803222656, "eval_logps/rejected": -597.8989868164062, "eval_loss": 0.039693351835012436, "eval_rewards/accuracies": 0.9890572428703308, "eval_rewards/chosen": 4.328419208526611, "eval_rewards/margins": 8.166685104370117, "eval_rewards/rejected": -3.838265895843506, "eval_runtime": 558.6431, "eval_samples_per_second": 17.005, "eval_steps_per_second": 0.532, "step": 2700 }, { "epoch": 0.92, "learning_rate": 4.3823196071023796e-08, "logits/chosen": -1.5291705131530762, "logits/rejected": -1.2298305034637451, "logps/chosen": -323.2745056152344, "logps/rejected": -548.8238525390625, "loss": 0.0424, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 4.2751970291137695, "rewards/margins": 8.485635757446289, "rewards/rejected": -4.2104387283325195, "step": 2710 }, { "epoch": 0.92, "learning_rate": 4.1934265205893465e-08, "logits/chosen": -1.503124475479126, "logits/rejected": -1.2478026151657104, "logps/chosen": -480.8046875, "logps/rejected": -691.4312133789062, "loss": 0.0365, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 4.418412208557129, "rewards/margins": 8.399371147155762, "rewards/rejected": -3.980959415435791, "step": 2720 }, { "epoch": 0.93, "learning_rate": 4.004533434076313e-08, "logits/chosen": -1.5199840068817139, "logits/rejected": -1.2516810894012451, "logps/chosen": -318.7385559082031, "logps/rejected": -670.9912719726562, "loss": 0.0378, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 4.218963623046875, "rewards/margins": 7.438061714172363, "rewards/rejected": -3.21909761428833, "step": 2730 }, { "epoch": 0.93, "learning_rate": 3.815640347563279e-08, "logits/chosen": -1.4905387163162231, "logits/rejected": -1.271468162536621, "logps/chosen": -392.78240966796875, "logps/rejected": -901.37548828125, "loss": 0.0291, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 3.8263752460479736, "rewards/margins": 7.711263179779053, "rewards/rejected": -3.8848884105682373, "step": 2740 }, { "epoch": 0.93, "learning_rate": 3.626747261050245e-08, "logits/chosen": -1.5208760499954224, "logits/rejected": -1.2187827825546265, "logps/chosen": -320.1753845214844, "logps/rejected": -740.0049438476562, "loss": 0.0378, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 4.149036884307861, "rewards/margins": 8.07148551940918, "rewards/rejected": -3.9224491119384766, "step": 2750 }, { "epoch": 0.94, "learning_rate": 3.4378541745372115e-08, "logits/chosen": -1.5195062160491943, "logits/rejected": -1.2230064868927002, "logps/chosen": -427.1581115722656, "logps/rejected": -645.3033447265625, "loss": 0.0417, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 4.26482629776001, "rewards/margins": 8.393260955810547, "rewards/rejected": -4.128435134887695, "step": 2760 }, { "epoch": 0.94, "learning_rate": 3.2489610880241784e-08, "logits/chosen": -1.525866150856018, "logits/rejected": -1.2397754192352295, "logps/chosen": -306.03778076171875, "logps/rejected": -587.6482543945312, "loss": 0.0332, "rewards/accuracies": 1.0, "rewards/chosen": 4.998318672180176, "rewards/margins": 8.605379104614258, "rewards/rejected": -3.6070590019226074, "step": 2770 }, { "epoch": 0.94, "learning_rate": 3.0600680015111446e-08, "logits/chosen": -1.516852617263794, "logits/rejected": -1.1658298969268799, "logps/chosen": -312.1228942871094, "logps/rejected": -346.4730224609375, "loss": 0.0445, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 4.162473678588867, "rewards/margins": 8.016084671020508, "rewards/rejected": -3.853611469268799, "step": 2780 }, { "epoch": 0.95, "learning_rate": 2.871174914998111e-08, "logits/chosen": -1.524436354637146, "logits/rejected": -1.1867830753326416, "logps/chosen": -326.0414123535156, "logps/rejected": -485.95166015625, "loss": 0.0375, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 4.58970308303833, "rewards/margins": 8.010432243347168, "rewards/rejected": -3.420729875564575, "step": 2790 }, { "epoch": 0.95, "learning_rate": 2.682281828485077e-08, "logits/chosen": -1.520810842514038, "logits/rejected": -1.1647284030914307, "logps/chosen": -333.65008544921875, "logps/rejected": -827.0930786132812, "loss": 0.0527, "rewards/accuracies": 1.0, "rewards/chosen": 4.306121349334717, "rewards/margins": 8.471829414367676, "rewards/rejected": -4.165709018707275, "step": 2800 }, { "epoch": 0.95, "eval_logits/chosen": -1.5196325778961182, "eval_logits/rejected": -1.2036585807800293, "eval_logps/chosen": -376.93743896484375, "eval_logps/rejected": -598.0059204101562, "eval_loss": 0.0383492186665535, "eval_rewards/accuracies": 0.9856902360916138, "eval_rewards/chosen": 4.298529148101807, "eval_rewards/margins": 8.147479057312012, "eval_rewards/rejected": -3.848950147628784, "eval_runtime": 558.5691, "eval_samples_per_second": 17.008, "eval_steps_per_second": 0.532, "step": 2800 }, { "epoch": 0.96, "learning_rate": 2.4933887419720436e-08, "logits/chosen": -1.5408833026885986, "logits/rejected": -1.2266209125518799, "logps/chosen": -303.4648132324219, "logps/rejected": -399.6147155761719, "loss": 0.0527, "rewards/accuracies": 1.0, "rewards/chosen": 4.5173773765563965, "rewards/margins": 8.121113777160645, "rewards/rejected": -3.6037354469299316, "step": 2810 }, { "epoch": 0.96, "learning_rate": 2.30449565545901e-08, "logits/chosen": -1.5411306619644165, "logits/rejected": -1.2380434274673462, "logps/chosen": -333.23699951171875, "logps/rejected": -425.56365966796875, "loss": 0.0324, "rewards/accuracies": 1.0, "rewards/chosen": 4.0661211013793945, "rewards/margins": 7.654998779296875, "rewards/rejected": -3.588876724243164, "step": 2820 }, { "epoch": 0.96, "learning_rate": 2.1156025689459764e-08, "logits/chosen": -1.5202014446258545, "logits/rejected": -1.249887466430664, "logps/chosen": -415.40435791015625, "logps/rejected": -705.8953857421875, "loss": 0.0325, "rewards/accuracies": 1.0, "rewards/chosen": 4.5467915534973145, "rewards/margins": 8.021774291992188, "rewards/rejected": -3.474982500076294, "step": 2830 }, { "epoch": 0.97, "learning_rate": 1.926709482432943e-08, "logits/chosen": -1.5168020725250244, "logits/rejected": -1.1783835887908936, "logps/chosen": -368.14337158203125, "logps/rejected": -583.8258056640625, "loss": 0.0346, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 4.783400058746338, "rewards/margins": 8.449257850646973, "rewards/rejected": -3.6658573150634766, "step": 2840 }, { "epoch": 0.97, "learning_rate": 1.7378163959199092e-08, "logits/chosen": -1.5222840309143066, "logits/rejected": -1.2157676219940186, "logps/chosen": -377.74932861328125, "logps/rejected": -491.2120056152344, "loss": 0.0368, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 4.284539222717285, "rewards/margins": 8.006728172302246, "rewards/rejected": -3.7221896648406982, "step": 2850 }, { "epoch": 0.97, "learning_rate": 1.5489233094068758e-08, "logits/chosen": -1.5340877771377563, "logits/rejected": -1.2179819345474243, "logps/chosen": -341.79974365234375, "logps/rejected": -472.01611328125, "loss": 0.0316, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 4.628954887390137, "rewards/margins": 8.175573348999023, "rewards/rejected": -3.546616315841675, "step": 2860 }, { "epoch": 0.98, "learning_rate": 1.3600302228938419e-08, "logits/chosen": -1.5086033344268799, "logits/rejected": -1.2281320095062256, "logps/chosen": -449.17156982421875, "logps/rejected": -668.3966064453125, "loss": 0.0593, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 4.049460411071777, "rewards/margins": 7.482198238372803, "rewards/rejected": -3.4327378273010254, "step": 2870 }, { "epoch": 0.98, "learning_rate": 1.1711371363808084e-08, "logits/chosen": -1.533616304397583, "logits/rejected": -1.2686628103256226, "logps/chosen": -389.5843200683594, "logps/rejected": -875.8938598632812, "loss": 0.0386, "rewards/accuracies": 1.0, "rewards/chosen": 4.17772102355957, "rewards/margins": 8.47387981414795, "rewards/rejected": -4.296158790588379, "step": 2880 }, { "epoch": 0.98, "learning_rate": 9.822440498677748e-09, "logits/chosen": -1.5015050172805786, "logits/rejected": -1.2158093452453613, "logps/chosen": -532.0431518554688, "logps/rejected": -657.7987670898438, "loss": 0.037, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 3.858596086502075, "rewards/margins": 7.628444671630859, "rewards/rejected": -3.769848346710205, "step": 2890 }, { "epoch": 0.99, "learning_rate": 7.933509633547412e-09, "logits/chosen": -1.5313704013824463, "logits/rejected": -1.2360942363739014, "logps/chosen": -339.72064208984375, "logps/rejected": -532.837158203125, "loss": 0.0365, "rewards/accuracies": 1.0, "rewards/chosen": 4.4045090675354, "rewards/margins": 8.255632400512695, "rewards/rejected": -3.851123332977295, "step": 2900 }, { "epoch": 0.99, "eval_logits/chosen": -1.5156338214874268, "eval_logits/rejected": -1.1996530294418335, "eval_logps/chosen": -376.83685302734375, "eval_logps/rejected": -597.8652954101562, "eval_loss": 0.03792084753513336, "eval_rewards/accuracies": 0.9873737096786499, "eval_rewards/chosen": 4.308588981628418, "eval_rewards/margins": 8.143476486206055, "eval_rewards/rejected": -3.8348886966705322, "eval_runtime": 561.0021, "eval_samples_per_second": 16.934, "eval_steps_per_second": 0.529, "step": 2900 }, { "epoch": 0.99, "learning_rate": 6.044578768417076e-09, "logits/chosen": -1.5284096002578735, "logits/rejected": -1.2779542207717896, "logps/chosen": -324.67864990234375, "logps/rejected": -710.6837768554688, "loss": 0.0286, "rewards/accuracies": 1.0, "rewards/chosen": 3.9714195728302, "rewards/margins": 7.755260467529297, "rewards/rejected": -3.7838408946990967, "step": 2910 }, { "epoch": 0.99, "learning_rate": 4.15564790328674e-09, "logits/chosen": -1.5134741067886353, "logits/rejected": -1.2182211875915527, "logps/chosen": -336.8125915527344, "logps/rejected": -749.4577026367188, "loss": 0.028, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 4.103480815887451, "rewards/margins": 7.714101314544678, "rewards/rejected": -3.6106209754943848, "step": 2920 }, { "epoch": 1.0, "learning_rate": 2.2667170381564033e-09, "logits/chosen": -1.5044059753417969, "logits/rejected": -1.248711347579956, "logps/chosen": -317.59130859375, "logps/rejected": -411.42706298828125, "loss": 0.038, "rewards/accuracies": 1.0, "rewards/chosen": 3.9643845558166504, "rewards/margins": 7.606657981872559, "rewards/rejected": -3.64227294921875, "step": 2930 }, { "epoch": 1.0, "learning_rate": 3.7778617302606723e-10, "logits/chosen": -1.4944725036621094, "logits/rejected": -1.208418369293213, "logps/chosen": -398.1802673339844, "logps/rejected": -528.9981689453125, "loss": 0.0369, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 4.150896072387695, "rewards/margins": 7.481464385986328, "rewards/rejected": -3.3305678367614746, "step": 2940 }, { "epoch": 1.0, "step": 2942, "total_flos": 0.0, "train_loss": 0.11494619559330763, "train_runtime": 36321.6775, "train_samples_per_second": 5.184, "train_steps_per_second": 0.081 } ], "logging_steps": 10, "max_steps": 2942, "num_train_epochs": 1, "save_steps": 500, "total_flos": 0.0, "trial_name": null, "trial_params": null }