|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 2000, |
|
"global_step": 4168, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0002399232245681382, |
|
"grad_norm": 4.7795046499064915, |
|
"learning_rate": 1.199040767386091e-09, |
|
"logits/chosen": -0.7570170760154724, |
|
"logits/rejected": -0.7606267929077148, |
|
"logps/chosen": -147.62075805664062, |
|
"logps/rejected": -139.63986206054688, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0023992322456813818, |
|
"grad_norm": 5.0729607226586175, |
|
"learning_rate": 1.199040767386091e-08, |
|
"logits/chosen": -0.7337759137153625, |
|
"logits/rejected": -0.8291671872138977, |
|
"logps/chosen": -372.46026611328125, |
|
"logps/rejected": -298.1966247558594, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.5277777910232544, |
|
"rewards/chosen": 0.0005176405538804829, |
|
"rewards/margins": 0.00119220616761595, |
|
"rewards/rejected": -0.0006745656137354672, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0047984644913627635, |
|
"grad_norm": 5.162819171123915, |
|
"learning_rate": 2.398081534772182e-08, |
|
"logits/chosen": -0.7522455453872681, |
|
"logits/rejected": -0.7984375953674316, |
|
"logps/chosen": -240.97720336914062, |
|
"logps/rejected": -211.13278198242188, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": 0.00016315083485096693, |
|
"rewards/margins": 0.0002667726075742394, |
|
"rewards/rejected": -0.00010362181637901813, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.007197696737044146, |
|
"grad_norm": 4.74337539097734, |
|
"learning_rate": 3.597122302158273e-08, |
|
"logits/chosen": -0.7967968583106995, |
|
"logits/rejected": -0.8497036099433899, |
|
"logps/chosen": -252.3729705810547, |
|
"logps/rejected": -261.5249328613281, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.0017296618316322565, |
|
"rewards/margins": -0.0007607643492519855, |
|
"rewards/rejected": -0.0009688973659649491, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.009596928982725527, |
|
"grad_norm": 4.907565292084559, |
|
"learning_rate": 4.796163069544364e-08, |
|
"logits/chosen": -0.8299921154975891, |
|
"logits/rejected": -0.883353054523468, |
|
"logps/chosen": -268.02789306640625, |
|
"logps/rejected": -251.27548217773438, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 6.413871597032994e-05, |
|
"rewards/margins": 0.0001428989926353097, |
|
"rewards/rejected": -7.876028394093737e-05, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01199616122840691, |
|
"grad_norm": 5.291199439758451, |
|
"learning_rate": 5.995203836930455e-08, |
|
"logits/chosen": -0.7905577421188354, |
|
"logits/rejected": -0.8132292032241821, |
|
"logps/chosen": -273.465087890625, |
|
"logps/rejected": -236.5275421142578, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.00024220789782702923, |
|
"rewards/margins": -0.0008948832983151078, |
|
"rewards/rejected": 0.0006526754004880786, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.014395393474088292, |
|
"grad_norm": 5.461587897395331, |
|
"learning_rate": 7.194244604316546e-08, |
|
"logits/chosen": -0.8055087924003601, |
|
"logits/rejected": -0.7774447202682495, |
|
"logps/chosen": -279.95806884765625, |
|
"logps/rejected": -260.2548828125, |
|
"loss": 0.6933, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.0018558722222223878, |
|
"rewards/margins": -0.0015968760708346963, |
|
"rewards/rejected": -0.00025899597676470876, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.016794625719769675, |
|
"grad_norm": 4.789491285565402, |
|
"learning_rate": 8.393285371702638e-08, |
|
"logits/chosen": -0.6775354743003845, |
|
"logits/rejected": -0.6865079998970032, |
|
"logps/chosen": -284.73492431640625, |
|
"logps/rejected": -268.7757263183594, |
|
"loss": 0.6933, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.0004096252378076315, |
|
"rewards/margins": -0.001045037293806672, |
|
"rewards/rejected": 0.0006354121142067015, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.019193857965451054, |
|
"grad_norm": 5.22044457856631, |
|
"learning_rate": 9.592326139088728e-08, |
|
"logits/chosen": -0.7918148040771484, |
|
"logits/rejected": -0.6770384907722473, |
|
"logps/chosen": -193.21511840820312, |
|
"logps/rejected": -248.8389892578125, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.0001543355901958421, |
|
"rewards/margins": 0.0017128061736002564, |
|
"rewards/rejected": -0.0015584708889946342, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.021593090211132437, |
|
"grad_norm": 5.0135506626659065, |
|
"learning_rate": 1.0791366906474819e-07, |
|
"logits/chosen": -0.860626220703125, |
|
"logits/rejected": -0.9020501971244812, |
|
"logps/chosen": -332.2583312988281, |
|
"logps/rejected": -287.39312744140625, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -5.847834836458787e-05, |
|
"rewards/margins": 0.00035616609966382384, |
|
"rewards/rejected": -0.00041464445530436933, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.02399232245681382, |
|
"grad_norm": 5.263212106273348, |
|
"learning_rate": 1.199040767386091e-07, |
|
"logits/chosen": -0.7230492234230042, |
|
"logits/rejected": -0.6537036895751953, |
|
"logps/chosen": -265.91143798828125, |
|
"logps/rejected": -282.36163330078125, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.00011261176405241713, |
|
"rewards/margins": 0.00016119341307785362, |
|
"rewards/rejected": -0.00027380516985431314, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.026391554702495202, |
|
"grad_norm": 4.559210233997684, |
|
"learning_rate": 1.3189448441247004e-07, |
|
"logits/chosen": -0.8084124326705933, |
|
"logits/rejected": -0.838187038898468, |
|
"logps/chosen": -228.7566375732422, |
|
"logps/rejected": -229.68017578125, |
|
"loss": 0.6923, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.00025232377811335027, |
|
"rewards/margins": 0.0021261090878397226, |
|
"rewards/rejected": -0.002378433011472225, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.028790786948176585, |
|
"grad_norm": 4.987801425382141, |
|
"learning_rate": 1.4388489208633092e-07, |
|
"logits/chosen": -0.7631937265396118, |
|
"logits/rejected": -0.8265846967697144, |
|
"logps/chosen": -287.75518798828125, |
|
"logps/rejected": -274.0089111328125, |
|
"loss": 0.6917, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.0009368563769385219, |
|
"rewards/margins": 0.0011595094110816717, |
|
"rewards/rejected": -0.0020963659044355154, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.031190019193857964, |
|
"grad_norm": 4.539938061260808, |
|
"learning_rate": 1.5587529976019183e-07, |
|
"logits/chosen": -0.8044384717941284, |
|
"logits/rejected": -0.7853862643241882, |
|
"logps/chosen": -208.2334747314453, |
|
"logps/rejected": -308.89727783203125, |
|
"loss": 0.6917, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.0008903613197617233, |
|
"rewards/margins": 0.003986647818237543, |
|
"rewards/rejected": -0.004877009429037571, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.03358925143953935, |
|
"grad_norm": 4.86277843753179, |
|
"learning_rate": 1.6786570743405277e-07, |
|
"logits/chosen": -0.59493488073349, |
|
"logits/rejected": -0.6423755288124084, |
|
"logps/chosen": -296.8682861328125, |
|
"logps/rejected": -286.8326721191406, |
|
"loss": 0.6908, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.0003669736906886101, |
|
"rewards/margins": 0.006478472147136927, |
|
"rewards/rejected": -0.006845445372164249, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.03598848368522073, |
|
"grad_norm": 5.140685164747867, |
|
"learning_rate": 1.7985611510791365e-07, |
|
"logits/chosen": -0.7444754838943481, |
|
"logits/rejected": -0.7507014870643616, |
|
"logps/chosen": -225.19686889648438, |
|
"logps/rejected": -223.80783081054688, |
|
"loss": 0.6908, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.0018180795013904572, |
|
"rewards/margins": 0.006333982106298208, |
|
"rewards/rejected": -0.008152060210704803, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.03838771593090211, |
|
"grad_norm": 5.210882534550865, |
|
"learning_rate": 1.9184652278177456e-07, |
|
"logits/chosen": -0.6324438452720642, |
|
"logits/rejected": -0.6643397212028503, |
|
"logps/chosen": -304.49700927734375, |
|
"logps/rejected": -235.69424438476562, |
|
"loss": 0.6896, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.0027639209292829037, |
|
"rewards/margins": 0.008128685876727104, |
|
"rewards/rejected": -0.010892605409026146, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.040786948176583494, |
|
"grad_norm": 4.6747728490323635, |
|
"learning_rate": 2.038369304556355e-07, |
|
"logits/chosen": -0.6203088164329529, |
|
"logits/rejected": -0.6542561650276184, |
|
"logps/chosen": -337.5506591796875, |
|
"logps/rejected": -324.4564208984375, |
|
"loss": 0.6879, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.0021853535436093807, |
|
"rewards/margins": 0.010294707491993904, |
|
"rewards/rejected": -0.012480061501264572, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.04318618042226487, |
|
"grad_norm": 5.220785522583115, |
|
"learning_rate": 2.1582733812949638e-07, |
|
"logits/chosen": -0.8055013418197632, |
|
"logits/rejected": -0.8089167475700378, |
|
"logps/chosen": -238.187744140625, |
|
"logps/rejected": -231.9917449951172, |
|
"loss": 0.6886, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.004614435136318207, |
|
"rewards/margins": 0.018629100173711777, |
|
"rewards/rejected": -0.023243537172675133, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.04558541266794626, |
|
"grad_norm": 5.9389729833935325, |
|
"learning_rate": 2.278177458033573e-07, |
|
"logits/chosen": -0.6565154790878296, |
|
"logits/rejected": -0.7033632397651672, |
|
"logps/chosen": -318.5955505371094, |
|
"logps/rejected": -258.2650451660156, |
|
"loss": 0.6882, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -0.005404843017458916, |
|
"rewards/margins": 0.0013396486174315214, |
|
"rewards/rejected": -0.0067444914020597935, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.04798464491362764, |
|
"grad_norm": 4.680422689180021, |
|
"learning_rate": 2.398081534772182e-07, |
|
"logits/chosen": -0.7583560347557068, |
|
"logits/rejected": -0.7128076553344727, |
|
"logps/chosen": -315.15093994140625, |
|
"logps/rejected": -300.1588134765625, |
|
"loss": 0.685, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.0010681712301447988, |
|
"rewards/margins": 0.02815566025674343, |
|
"rewards/rejected": -0.029223833233118057, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05038387715930902, |
|
"grad_norm": 4.9609467030055505, |
|
"learning_rate": 2.517985611510791e-07, |
|
"logits/chosen": -0.7415071725845337, |
|
"logits/rejected": -0.7684369683265686, |
|
"logps/chosen": -241.5952911376953, |
|
"logps/rejected": -265.6112976074219, |
|
"loss": 0.6852, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.006404706742614508, |
|
"rewards/margins": 0.019062474370002747, |
|
"rewards/rejected": -0.02546718157827854, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.052783109404990404, |
|
"grad_norm": 4.801299794937751, |
|
"learning_rate": 2.637889688249401e-07, |
|
"logits/chosen": -0.6760513186454773, |
|
"logits/rejected": -0.6948543190956116, |
|
"logps/chosen": -311.57281494140625, |
|
"logps/rejected": -320.0372619628906, |
|
"loss": 0.6852, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.010622961446642876, |
|
"rewards/margins": 0.013131847605109215, |
|
"rewards/rejected": -0.02375480905175209, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.05518234165067178, |
|
"grad_norm": 5.226626430555928, |
|
"learning_rate": 2.7577937649880093e-07, |
|
"logits/chosen": -0.6577489376068115, |
|
"logits/rejected": -0.5907109975814819, |
|
"logps/chosen": -237.6232147216797, |
|
"logps/rejected": -272.1260986328125, |
|
"loss": 0.6815, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.017997443675994873, |
|
"rewards/margins": 0.015421544201672077, |
|
"rewards/rejected": -0.033418990671634674, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.05758157389635317, |
|
"grad_norm": 5.781159666071322, |
|
"learning_rate": 2.8776978417266184e-07, |
|
"logits/chosen": -0.6846515536308289, |
|
"logits/rejected": -0.7044352293014526, |
|
"logps/chosen": -290.9536437988281, |
|
"logps/rejected": -245.861083984375, |
|
"loss": 0.6786, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.00569694209843874, |
|
"rewards/margins": 0.03246745467185974, |
|
"rewards/rejected": -0.038164399564266205, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.05998080614203455, |
|
"grad_norm": 5.24934898048967, |
|
"learning_rate": 2.997601918465228e-07, |
|
"logits/chosen": -0.6797415614128113, |
|
"logits/rejected": -0.6883940696716309, |
|
"logps/chosen": -233.06948852539062, |
|
"logps/rejected": -224.38671875, |
|
"loss": 0.6791, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.0182084571570158, |
|
"rewards/margins": 0.017979206517338753, |
|
"rewards/rejected": -0.03618766739964485, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.06238003838771593, |
|
"grad_norm": 4.992367199190442, |
|
"learning_rate": 3.1175059952038366e-07, |
|
"logits/chosen": -0.7312067747116089, |
|
"logits/rejected": -0.6521024703979492, |
|
"logps/chosen": -271.38824462890625, |
|
"logps/rejected": -275.63653564453125, |
|
"loss": 0.6747, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.02519085630774498, |
|
"rewards/margins": 0.04188547283411026, |
|
"rewards/rejected": -0.06707633286714554, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.0647792706333973, |
|
"grad_norm": 5.17440656592256, |
|
"learning_rate": 3.2374100719424457e-07, |
|
"logits/chosen": -0.6548904180526733, |
|
"logits/rejected": -0.7951699495315552, |
|
"logps/chosen": -282.63873291015625, |
|
"logps/rejected": -225.5301971435547, |
|
"loss": 0.6735, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.02142253890633583, |
|
"rewards/margins": 0.012398405000567436, |
|
"rewards/rejected": -0.033820949494838715, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.0671785028790787, |
|
"grad_norm": 5.277296067925045, |
|
"learning_rate": 3.3573141486810554e-07, |
|
"logits/chosen": -0.7172076106071472, |
|
"logits/rejected": -0.6975899934768677, |
|
"logps/chosen": -296.5246887207031, |
|
"logps/rejected": -288.5944519042969, |
|
"loss": 0.6668, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.018556680530309677, |
|
"rewards/margins": 0.05493815615773201, |
|
"rewards/rejected": -0.07349482923746109, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.06957773512476008, |
|
"grad_norm": 4.781163975921149, |
|
"learning_rate": 3.477218225419664e-07, |
|
"logits/chosen": -0.6228010654449463, |
|
"logits/rejected": -0.5732084512710571, |
|
"logps/chosen": -288.7183532714844, |
|
"logps/rejected": -263.90496826171875, |
|
"loss": 0.6668, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.021407321095466614, |
|
"rewards/margins": 0.04037974029779434, |
|
"rewards/rejected": -0.061787061393260956, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.07197696737044146, |
|
"grad_norm": 5.423349784612897, |
|
"learning_rate": 3.597122302158273e-07, |
|
"logits/chosen": -0.7558736801147461, |
|
"logits/rejected": -0.7680533528327942, |
|
"logps/chosen": -264.5168762207031, |
|
"logps/rejected": -289.5086669921875, |
|
"loss": 0.6685, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.04736015945672989, |
|
"rewards/margins": 0.04246506839990616, |
|
"rewards/rejected": -0.08982523530721664, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.07437619961612284, |
|
"grad_norm": 5.058813842555934, |
|
"learning_rate": 3.7170263788968827e-07, |
|
"logits/chosen": -0.6308411359786987, |
|
"logits/rejected": -0.6718063950538635, |
|
"logps/chosen": -270.92779541015625, |
|
"logps/rejected": -247.31472778320312, |
|
"loss": 0.6721, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.044375158846378326, |
|
"rewards/margins": 0.06773975491523743, |
|
"rewards/rejected": -0.11211492121219635, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.07677543186180422, |
|
"grad_norm": 4.805850462678792, |
|
"learning_rate": 3.836930455635491e-07, |
|
"logits/chosen": -0.7107186913490295, |
|
"logits/rejected": -0.729761004447937, |
|
"logps/chosen": -273.9128723144531, |
|
"logps/rejected": -246.8804168701172, |
|
"loss": 0.665, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.05261250585317612, |
|
"rewards/margins": 0.05193439871072769, |
|
"rewards/rejected": -0.10454690456390381, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.07917466410748561, |
|
"grad_norm": 4.995629742536248, |
|
"learning_rate": 3.9568345323741003e-07, |
|
"logits/chosen": -0.6203581094741821, |
|
"logits/rejected": -0.5326763391494751, |
|
"logps/chosen": -261.4637756347656, |
|
"logps/rejected": -304.72015380859375, |
|
"loss": 0.6605, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.09535319358110428, |
|
"rewards/margins": 0.07053264230489731, |
|
"rewards/rejected": -0.1658858358860016, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.08157389635316699, |
|
"grad_norm": 4.930244515909295, |
|
"learning_rate": 4.07673860911271e-07, |
|
"logits/chosen": -0.5719800591468811, |
|
"logits/rejected": -0.576286792755127, |
|
"logps/chosen": -240.22964477539062, |
|
"logps/rejected": -272.12823486328125, |
|
"loss": 0.6597, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.05066823214292526, |
|
"rewards/margins": 0.10919372737407684, |
|
"rewards/rejected": -0.1598619669675827, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.08397312859884837, |
|
"grad_norm": 5.465735130601733, |
|
"learning_rate": 4.1966426858513185e-07, |
|
"logits/chosen": -0.7455052137374878, |
|
"logits/rejected": -0.7238417863845825, |
|
"logps/chosen": -306.32135009765625, |
|
"logps/rejected": -303.4272766113281, |
|
"loss": 0.661, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.10594834387302399, |
|
"rewards/margins": 0.061454661190509796, |
|
"rewards/rejected": -0.16740299761295319, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.08637236084452975, |
|
"grad_norm": 5.474448104756549, |
|
"learning_rate": 4.3165467625899276e-07, |
|
"logits/chosen": -0.6257158517837524, |
|
"logits/rejected": -0.6943267583847046, |
|
"logps/chosen": -263.67681884765625, |
|
"logps/rejected": -227.45315551757812, |
|
"loss": 0.6617, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.10281842947006226, |
|
"rewards/margins": 0.06660200655460358, |
|
"rewards/rejected": -0.16942045092582703, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.08877159309021113, |
|
"grad_norm": 5.5768966423340265, |
|
"learning_rate": 4.436450839328537e-07, |
|
"logits/chosen": -0.6406761407852173, |
|
"logits/rejected": -0.6252005696296692, |
|
"logps/chosen": -254.68783569335938, |
|
"logps/rejected": -282.90228271484375, |
|
"loss": 0.6526, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.13191966712474823, |
|
"rewards/margins": 0.1186646968126297, |
|
"rewards/rejected": -0.25058436393737793, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.09117082533589252, |
|
"grad_norm": 4.831375981831261, |
|
"learning_rate": 4.556354916067146e-07, |
|
"logits/chosen": -0.6866484880447388, |
|
"logits/rejected": -0.633335530757904, |
|
"logps/chosen": -240.5032196044922, |
|
"logps/rejected": -261.5648498535156, |
|
"loss": 0.6392, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.10136518627405167, |
|
"rewards/margins": 0.14594808220863342, |
|
"rewards/rejected": -0.2473132610321045, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.0935700575815739, |
|
"grad_norm": 5.472261214448905, |
|
"learning_rate": 4.676258992805755e-07, |
|
"logits/chosen": -0.564619243144989, |
|
"logits/rejected": -0.5628719329833984, |
|
"logps/chosen": -286.52899169921875, |
|
"logps/rejected": -262.25323486328125, |
|
"loss": 0.6426, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.15715694427490234, |
|
"rewards/margins": 0.07161404192447662, |
|
"rewards/rejected": -0.22877097129821777, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.09596928982725528, |
|
"grad_norm": 5.347372222074903, |
|
"learning_rate": 4.796163069544364e-07, |
|
"logits/chosen": -0.6121346354484558, |
|
"logits/rejected": -0.6788171529769897, |
|
"logps/chosen": -266.82220458984375, |
|
"logps/rejected": -270.36724853515625, |
|
"loss": 0.6421, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.1677761971950531, |
|
"rewards/margins": 0.17789766192436218, |
|
"rewards/rejected": -0.3456738591194153, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.09836852207293666, |
|
"grad_norm": 5.340397422554379, |
|
"learning_rate": 4.916067146282974e-07, |
|
"logits/chosen": -0.6538274884223938, |
|
"logits/rejected": -0.6384181976318359, |
|
"logps/chosen": -270.4076843261719, |
|
"logps/rejected": -313.26544189453125, |
|
"loss": 0.6258, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.1868169903755188, |
|
"rewards/margins": 0.1523607075214386, |
|
"rewards/rejected": -0.3391777276992798, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.10076775431861804, |
|
"grad_norm": 5.46694192660825, |
|
"learning_rate": 4.999992108529978e-07, |
|
"logits/chosen": -0.585496723651886, |
|
"logits/rejected": -0.5861325263977051, |
|
"logps/chosen": -343.7078857421875, |
|
"logps/rejected": -326.8913269042969, |
|
"loss": 0.6332, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.23270578682422638, |
|
"rewards/margins": 0.18326610326766968, |
|
"rewards/rejected": -0.41597190499305725, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.10316698656429943, |
|
"grad_norm": 5.835218906444854, |
|
"learning_rate": 4.999851817115532e-07, |
|
"logits/chosen": -0.7467209100723267, |
|
"logits/rejected": -0.6660154461860657, |
|
"logps/chosen": -266.6215515136719, |
|
"logps/rejected": -292.622802734375, |
|
"loss": 0.6348, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.22090856730937958, |
|
"rewards/margins": 0.29071298241615295, |
|
"rewards/rejected": -0.5116215348243713, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.10556621880998081, |
|
"grad_norm": 5.421070680650146, |
|
"learning_rate": 4.999536171027889e-07, |
|
"logits/chosen": -0.5800718069076538, |
|
"logits/rejected": -0.6239966154098511, |
|
"logps/chosen": -310.7662353515625, |
|
"logps/rejected": -315.01727294921875, |
|
"loss": 0.6241, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.30067840218544006, |
|
"rewards/margins": 0.14732010662555695, |
|
"rewards/rejected": -0.4479985237121582, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.10796545105566219, |
|
"grad_norm": 6.239774740521307, |
|
"learning_rate": 4.999045192408369e-07, |
|
"logits/chosen": -0.5860768556594849, |
|
"logits/rejected": -0.5695077180862427, |
|
"logps/chosen": -266.48162841796875, |
|
"logps/rejected": -264.7235107421875, |
|
"loss": 0.6248, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.3346542716026306, |
|
"rewards/margins": 0.12011837959289551, |
|
"rewards/rejected": -0.4547726511955261, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.11036468330134357, |
|
"grad_norm": 6.029927081598656, |
|
"learning_rate": 4.998378915697171e-07, |
|
"logits/chosen": -0.6232699155807495, |
|
"logits/rejected": -0.6357511878013611, |
|
"logps/chosen": -296.1357116699219, |
|
"logps/rejected": -316.9218444824219, |
|
"loss": 0.6016, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.28828713297843933, |
|
"rewards/margins": 0.2776426374912262, |
|
"rewards/rejected": -0.5659297704696655, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.11276391554702495, |
|
"grad_norm": 5.6604945230319625, |
|
"learning_rate": 4.997537387630958e-07, |
|
"logits/chosen": -0.5975057482719421, |
|
"logits/rejected": -0.6152404546737671, |
|
"logps/chosen": -235.7269744873047, |
|
"logps/rejected": -265.6780090332031, |
|
"loss": 0.5969, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.2783138155937195, |
|
"rewards/margins": 0.27526405453681946, |
|
"rewards/rejected": -0.5535778403282166, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.11516314779270634, |
|
"grad_norm": 6.693255407381944, |
|
"learning_rate": 4.996520667239582e-07, |
|
"logits/chosen": -0.7205396294593811, |
|
"logits/rejected": -0.6762118339538574, |
|
"logps/chosen": -265.1204833984375, |
|
"logps/rejected": -349.3818054199219, |
|
"loss": 0.6045, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.3487080931663513, |
|
"rewards/margins": 0.32969897985458374, |
|
"rewards/rejected": -0.6784070730209351, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.11756238003838772, |
|
"grad_norm": 6.713235956519299, |
|
"learning_rate": 4.995328825841939e-07, |
|
"logits/chosen": -0.5713664293289185, |
|
"logits/rejected": -0.553689181804657, |
|
"logps/chosen": -246.03598022460938, |
|
"logps/rejected": -300.48797607421875, |
|
"loss": 0.5929, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.35947948694229126, |
|
"rewards/margins": 0.5059111714363098, |
|
"rewards/rejected": -0.8653906583786011, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.1199616122840691, |
|
"grad_norm": 6.608316940106426, |
|
"learning_rate": 4.993961947040967e-07, |
|
"logits/chosen": -0.5497556924819946, |
|
"logits/rejected": -0.5836997032165527, |
|
"logps/chosen": -330.4705505371094, |
|
"logps/rejected": -313.43511962890625, |
|
"loss": 0.6159, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.5261253118515015, |
|
"rewards/margins": 0.1777980625629425, |
|
"rewards/rejected": -0.7039234042167664, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.12236084452975048, |
|
"grad_norm": 6.046831260369165, |
|
"learning_rate": 4.992420126717784e-07, |
|
"logits/chosen": -0.5983260869979858, |
|
"logits/rejected": -0.5698710680007935, |
|
"logps/chosen": -275.0146789550781, |
|
"logps/rejected": -332.9678039550781, |
|
"loss": 0.5951, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.3116165101528168, |
|
"rewards/margins": 0.5498673319816589, |
|
"rewards/rejected": -0.8614838719367981, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.12476007677543186, |
|
"grad_norm": 7.348411835249425, |
|
"learning_rate": 4.990703473024958e-07, |
|
"logits/chosen": -0.5138384103775024, |
|
"logits/rejected": -0.5233681201934814, |
|
"logps/chosen": -332.6457214355469, |
|
"logps/rejected": -353.2841796875, |
|
"loss": 0.621, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.5376469492912292, |
|
"rewards/margins": 0.28140488266944885, |
|
"rewards/rejected": -0.8190518617630005, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.12715930902111325, |
|
"grad_norm": 7.638245462015975, |
|
"learning_rate": 4.98881210637893e-07, |
|
"logits/chosen": -0.6331408619880676, |
|
"logits/rejected": -0.5843578577041626, |
|
"logps/chosen": -253.6781005859375, |
|
"logps/rejected": -326.880126953125, |
|
"loss": 0.6034, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.44268542528152466, |
|
"rewards/margins": 0.36151397228240967, |
|
"rewards/rejected": -0.8041993379592896, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.1295585412667946, |
|
"grad_norm": 5.437673001258359, |
|
"learning_rate": 4.986746159451553e-07, |
|
"logits/chosen": -0.5594351887702942, |
|
"logits/rejected": -0.5495598912239075, |
|
"logps/chosen": -293.2762756347656, |
|
"logps/rejected": -318.1295166015625, |
|
"loss": 0.6058, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.37716802954673767, |
|
"rewards/margins": 0.341768354177475, |
|
"rewards/rejected": -0.7189363837242126, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.131957773512476, |
|
"grad_norm": 5.462303995641746, |
|
"learning_rate": 4.984505777160795e-07, |
|
"logits/chosen": -0.499727725982666, |
|
"logits/rejected": -0.5212177038192749, |
|
"logps/chosen": -356.2312927246094, |
|
"logps/rejected": -389.5164794921875, |
|
"loss": 0.6123, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.5117184519767761, |
|
"rewards/margins": 0.31264665722846985, |
|
"rewards/rejected": -0.8243652582168579, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.1343570057581574, |
|
"grad_norm": 6.563474963984782, |
|
"learning_rate": 4.982091116660574e-07, |
|
"logits/chosen": -0.6962921023368835, |
|
"logits/rejected": -0.7188105583190918, |
|
"logps/chosen": -248.63955688476562, |
|
"logps/rejected": -241.68521118164062, |
|
"loss": 0.6252, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.5236629843711853, |
|
"rewards/margins": 0.1912154257297516, |
|
"rewards/rejected": -0.7148783802986145, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.13675623800383876, |
|
"grad_norm": 7.806740806376691, |
|
"learning_rate": 4.979502347329732e-07, |
|
"logits/chosen": -0.531540036201477, |
|
"logits/rejected": -0.5084825754165649, |
|
"logps/chosen": -359.24871826171875, |
|
"logps/rejected": -422.90606689453125, |
|
"loss": 0.6116, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.6582085490226746, |
|
"rewards/margins": 0.3848935067653656, |
|
"rewards/rejected": -1.0431021451950073, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.13915547024952016, |
|
"grad_norm": 8.43363363786984, |
|
"learning_rate": 4.976739650760151e-07, |
|
"logits/chosen": -0.6741775274276733, |
|
"logits/rejected": -0.6754758358001709, |
|
"logps/chosen": -318.57708740234375, |
|
"logps/rejected": -327.61669921875, |
|
"loss": 0.5905, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.5984118580818176, |
|
"rewards/margins": 0.26561444997787476, |
|
"rewards/rejected": -0.8640263676643372, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.14155470249520152, |
|
"grad_norm": 8.305901855269768, |
|
"learning_rate": 4.97380322074402e-07, |
|
"logits/chosen": -0.5298658609390259, |
|
"logits/rejected": -0.5446540117263794, |
|
"logps/chosen": -281.94403076171875, |
|
"logps/rejected": -312.7386474609375, |
|
"loss": 0.6096, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.6375762820243835, |
|
"rewards/margins": 0.3014167249202728, |
|
"rewards/rejected": -0.9389930963516235, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.14395393474088292, |
|
"grad_norm": 6.932480462761205, |
|
"learning_rate": 4.970693263260237e-07, |
|
"logits/chosen": -0.6071778535842896, |
|
"logits/rejected": -0.6448204517364502, |
|
"logps/chosen": -334.240478515625, |
|
"logps/rejected": -351.1219177246094, |
|
"loss": 0.6118, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.5482883453369141, |
|
"rewards/margins": 0.4772118031978607, |
|
"rewards/rejected": -1.0255001783370972, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.1463531669865643, |
|
"grad_norm": 7.988462606950694, |
|
"learning_rate": 4.967409996459966e-07, |
|
"logits/chosen": -0.6251802444458008, |
|
"logits/rejected": -0.6402324438095093, |
|
"logps/chosen": -337.7198486328125, |
|
"logps/rejected": -350.0640563964844, |
|
"loss": 0.5869, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.5524300932884216, |
|
"rewards/margins": 0.395042359828949, |
|
"rewards/rejected": -0.9474723935127258, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.14875239923224567, |
|
"grad_norm": 6.807256286481946, |
|
"learning_rate": 4.963953650651326e-07, |
|
"logits/chosen": -0.5530382394790649, |
|
"logits/rejected": -0.5686200857162476, |
|
"logps/chosen": -411.8724670410156, |
|
"logps/rejected": -351.87310791015625, |
|
"loss": 0.5886, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.6493805646896362, |
|
"rewards/margins": 0.2970190942287445, |
|
"rewards/rejected": -0.9463998079299927, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.15115163147792707, |
|
"grad_norm": 6.66103506943935, |
|
"learning_rate": 4.960324468283248e-07, |
|
"logits/chosen": -0.7188149690628052, |
|
"logits/rejected": -0.7464720010757446, |
|
"logps/chosen": -291.5167541503906, |
|
"logps/rejected": -326.8276672363281, |
|
"loss": 0.5613, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.6833261251449585, |
|
"rewards/margins": 0.3538287281990051, |
|
"rewards/rejected": -1.0371549129486084, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.15355086372360843, |
|
"grad_norm": 7.385295168977064, |
|
"learning_rate": 4.956522703928451e-07, |
|
"logits/chosen": -0.7066096663475037, |
|
"logits/rejected": -0.6415206789970398, |
|
"logps/chosen": -306.70269775390625, |
|
"logps/rejected": -343.36724853515625, |
|
"loss": 0.5626, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.7203932404518127, |
|
"rewards/margins": 0.3621978163719177, |
|
"rewards/rejected": -1.082590937614441, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.15595009596928983, |
|
"grad_norm": 9.96633693590498, |
|
"learning_rate": 4.952548624265606e-07, |
|
"logits/chosen": -0.5960395336151123, |
|
"logits/rejected": -0.5949414372444153, |
|
"logps/chosen": -375.6698303222656, |
|
"logps/rejected": -379.85662841796875, |
|
"loss": 0.6024, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.9392830729484558, |
|
"rewards/margins": 0.2532385289669037, |
|
"rewards/rejected": -1.1925214529037476, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.15834932821497122, |
|
"grad_norm": 7.433452911452113, |
|
"learning_rate": 4.948402508060607e-07, |
|
"logits/chosen": -0.6946985721588135, |
|
"logits/rejected": -0.714805006980896, |
|
"logps/chosen": -307.7847900390625, |
|
"logps/rejected": -352.8008117675781, |
|
"loss": 0.6016, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.7437388896942139, |
|
"rewards/margins": 0.5146031379699707, |
|
"rewards/rejected": -1.2583420276641846, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.16074856046065258, |
|
"grad_norm": 8.260908471494544, |
|
"learning_rate": 4.944084646147038e-07, |
|
"logits/chosen": -0.6452184319496155, |
|
"logits/rejected": -0.6763893961906433, |
|
"logps/chosen": -397.63629150390625, |
|
"logps/rejected": -404.8137512207031, |
|
"loss": 0.6115, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.8227389454841614, |
|
"rewards/margins": 0.29742100834846497, |
|
"rewards/rejected": -1.1201599836349487, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.16314779270633398, |
|
"grad_norm": 8.266084905632274, |
|
"learning_rate": 4.939595341405754e-07, |
|
"logits/chosen": -0.7573758363723755, |
|
"logits/rejected": -0.780553936958313, |
|
"logps/chosen": -331.1211853027344, |
|
"logps/rejected": -373.88775634765625, |
|
"loss": 0.5759, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.7678017616271973, |
|
"rewards/margins": 0.4917237162590027, |
|
"rewards/rejected": -1.2595255374908447, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.16554702495201534, |
|
"grad_norm": 7.625957912581509, |
|
"learning_rate": 4.93493490874365e-07, |
|
"logits/chosen": -0.6451135873794556, |
|
"logits/rejected": -0.6591531038284302, |
|
"logps/chosen": -325.8547058105469, |
|
"logps/rejected": -366.7559814453125, |
|
"loss": 0.5467, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.821052074432373, |
|
"rewards/margins": 0.33492714166641235, |
|
"rewards/rejected": -1.1559793949127197, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.16794625719769674, |
|
"grad_norm": 10.17202506828973, |
|
"learning_rate": 4.93010367507156e-07, |
|
"logits/chosen": -0.7482548356056213, |
|
"logits/rejected": -0.7481337189674377, |
|
"logps/chosen": -276.33294677734375, |
|
"logps/rejected": -313.0743103027344, |
|
"loss": 0.5607, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.673913300037384, |
|
"rewards/margins": 0.6004130244255066, |
|
"rewards/rejected": -1.2743263244628906, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.17034548944337813, |
|
"grad_norm": 9.522996639673643, |
|
"learning_rate": 4.925101979281332e-07, |
|
"logits/chosen": -0.6719304919242859, |
|
"logits/rejected": -0.7497730851173401, |
|
"logps/chosen": -369.2864685058594, |
|
"logps/rejected": -377.1894226074219, |
|
"loss": 0.5774, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.7069920301437378, |
|
"rewards/margins": 0.6167112588882446, |
|
"rewards/rejected": -1.3237032890319824, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.1727447216890595, |
|
"grad_norm": 7.878026358158539, |
|
"learning_rate": 4.919930172222054e-07, |
|
"logits/chosen": -0.7336487174034119, |
|
"logits/rejected": -0.7846344709396362, |
|
"logps/chosen": -344.8207702636719, |
|
"logps/rejected": -387.56201171875, |
|
"loss": 0.5369, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.8977760076522827, |
|
"rewards/margins": 0.49707236886024475, |
|
"rewards/rejected": -1.3948484659194946, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.1751439539347409, |
|
"grad_norm": 8.810480029842584, |
|
"learning_rate": 4.914588616675445e-07, |
|
"logits/chosen": -0.8329795598983765, |
|
"logits/rejected": -0.8481542468070984, |
|
"logps/chosen": -279.27081298828125, |
|
"logps/rejected": -337.36883544921875, |
|
"loss": 0.5906, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.61821049451828, |
|
"rewards/margins": 0.4857397675514221, |
|
"rewards/rejected": -1.1039502620697021, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.17754318618042225, |
|
"grad_norm": 9.762970453211091, |
|
"learning_rate": 4.909077687330404e-07, |
|
"logits/chosen": -0.6972378492355347, |
|
"logits/rejected": -0.7455834150314331, |
|
"logps/chosen": -354.2886657714844, |
|
"logps/rejected": -357.53558349609375, |
|
"loss": 0.553, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.7641565799713135, |
|
"rewards/margins": 0.3690626621246338, |
|
"rewards/rejected": -1.1332192420959473, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.17994241842610365, |
|
"grad_norm": 9.234397345125467, |
|
"learning_rate": 4.903397770756729e-07, |
|
"logits/chosen": -0.7595505714416504, |
|
"logits/rejected": -0.7833656668663025, |
|
"logps/chosen": -351.7059020996094, |
|
"logps/rejected": -407.5093688964844, |
|
"loss": 0.5617, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.8937844038009644, |
|
"rewards/margins": 0.57142174243927, |
|
"rewards/rejected": -1.4652063846588135, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.18234165067178504, |
|
"grad_norm": 7.71688649402669, |
|
"learning_rate": 4.897549265378004e-07, |
|
"logits/chosen": -0.7180362939834595, |
|
"logits/rejected": -0.7230840921401978, |
|
"logps/chosen": -430.9496154785156, |
|
"logps/rejected": -468.16937255859375, |
|
"loss": 0.5631, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.040961503982544, |
|
"rewards/margins": 0.4251536428928375, |
|
"rewards/rejected": -1.4661149978637695, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.1847408829174664, |
|
"grad_norm": 8.153325048910528, |
|
"learning_rate": 4.891532581443643e-07, |
|
"logits/chosen": -0.8323251008987427, |
|
"logits/rejected": -0.8525883555412292, |
|
"logps/chosen": -381.8115234375, |
|
"logps/rejected": -471.56719970703125, |
|
"loss": 0.5333, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.7869538068771362, |
|
"rewards/margins": 0.9414682388305664, |
|
"rewards/rejected": -1.7284221649169922, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.1871401151631478, |
|
"grad_norm": 8.422916796673588, |
|
"learning_rate": 4.885348141000122e-07, |
|
"logits/chosen": -0.7381910085678101, |
|
"logits/rejected": -0.7251767516136169, |
|
"logps/chosen": -332.1246032714844, |
|
"logps/rejected": -410.90057373046875, |
|
"loss": 0.5641, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.8256322741508484, |
|
"rewards/margins": 0.6119931936264038, |
|
"rewards/rejected": -1.4376256465911865, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.18953934740882916, |
|
"grad_norm": 9.466411263035182, |
|
"learning_rate": 4.878996377861367e-07, |
|
"logits/chosen": -0.86748206615448, |
|
"logits/rejected": -0.8805437088012695, |
|
"logps/chosen": -308.0349426269531, |
|
"logps/rejected": -359.1808166503906, |
|
"loss": 0.5249, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.915812611579895, |
|
"rewards/margins": 0.4137847423553467, |
|
"rewards/rejected": -1.3295972347259521, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.19193857965451055, |
|
"grad_norm": 9.035814907343086, |
|
"learning_rate": 4.872477737578327e-07, |
|
"logits/chosen": -0.830212414264679, |
|
"logits/rejected": -0.7413343787193298, |
|
"logps/chosen": -373.04217529296875, |
|
"logps/rejected": -468.3797302246094, |
|
"loss": 0.5248, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.003941535949707, |
|
"rewards/margins": 0.9732707738876343, |
|
"rewards/rejected": -1.9772125482559204, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.19433781190019195, |
|
"grad_norm": 11.703896273776289, |
|
"learning_rate": 4.865792677407718e-07, |
|
"logits/chosen": -0.8188837766647339, |
|
"logits/rejected": -0.8448683023452759, |
|
"logps/chosen": -368.94512939453125, |
|
"logps/rejected": -377.5952453613281, |
|
"loss": 0.5712, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.1495884656906128, |
|
"rewards/margins": 0.362571656703949, |
|
"rewards/rejected": -1.512160062789917, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.1967370441458733, |
|
"grad_norm": 10.81943352397323, |
|
"learning_rate": 4.858941666279955e-07, |
|
"logits/chosen": -0.8488418459892273, |
|
"logits/rejected": -0.880780816078186, |
|
"logps/chosen": -353.0042419433594, |
|
"logps/rejected": -379.8878479003906, |
|
"loss": 0.5742, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.79103684425354, |
|
"rewards/margins": 0.4048077464103699, |
|
"rewards/rejected": -1.1958444118499756, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.1991362763915547, |
|
"grad_norm": 8.925559856487725, |
|
"learning_rate": 4.851925184766247e-07, |
|
"logits/chosen": -0.8389931917190552, |
|
"logits/rejected": -0.8778663873672485, |
|
"logps/chosen": -348.4869079589844, |
|
"logps/rejected": -395.1742248535156, |
|
"loss": 0.5534, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.9398209452629089, |
|
"rewards/margins": 0.6886889934539795, |
|
"rewards/rejected": -1.6285101175308228, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.20153550863723607, |
|
"grad_norm": 11.109685433240054, |
|
"learning_rate": 4.844743725044897e-07, |
|
"logits/chosen": -0.8412739038467407, |
|
"logits/rejected": -0.9312038421630859, |
|
"logps/chosen": -339.4559631347656, |
|
"logps/rejected": -370.5246887207031, |
|
"loss": 0.5445, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.9467164278030396, |
|
"rewards/margins": 0.5613173246383667, |
|
"rewards/rejected": -1.5080337524414062, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.20393474088291746, |
|
"grad_norm": 10.72753800655601, |
|
"learning_rate": 4.837397790866774e-07, |
|
"logits/chosen": -0.8491243124008179, |
|
"logits/rejected": -0.8605045080184937, |
|
"logps/chosen": -366.4127502441406, |
|
"logps/rejected": -440.4471130371094, |
|
"loss": 0.5521, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.7613908052444458, |
|
"rewards/margins": 0.9519069790840149, |
|
"rewards/rejected": -1.7132980823516846, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.20633397312859886, |
|
"grad_norm": 10.004722580523001, |
|
"learning_rate": 4.829887897519974e-07, |
|
"logits/chosen": -0.9373795390129089, |
|
"logits/rejected": -0.9096584320068359, |
|
"logps/chosen": -319.7113952636719, |
|
"logps/rejected": -403.3350524902344, |
|
"loss": 0.5559, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.8845510482788086, |
|
"rewards/margins": 0.5768999457359314, |
|
"rewards/rejected": -1.4614509344100952, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.20873320537428022, |
|
"grad_norm": 9.07756678312938, |
|
"learning_rate": 4.82221457179368e-07, |
|
"logits/chosen": -0.869964599609375, |
|
"logits/rejected": -0.871699333190918, |
|
"logps/chosen": -354.28704833984375, |
|
"logps/rejected": -417.8169860839844, |
|
"loss": 0.5254, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.8736898303031921, |
|
"rewards/margins": 0.754965603351593, |
|
"rewards/rejected": -1.6286554336547852, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.21113243761996162, |
|
"grad_norm": 11.276381279159365, |
|
"learning_rate": 4.814378351941206e-07, |
|
"logits/chosen": -0.8650039434432983, |
|
"logits/rejected": -0.8803671002388, |
|
"logps/chosen": -343.48822021484375, |
|
"logps/rejected": -381.94207763671875, |
|
"loss": 0.5619, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.9373790621757507, |
|
"rewards/margins": 0.47534435987472534, |
|
"rewards/rejected": -1.4127235412597656, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.21353166986564298, |
|
"grad_norm": 10.076876257674526, |
|
"learning_rate": 4.806379787642241e-07, |
|
"logits/chosen": -0.8381707072257996, |
|
"logits/rejected": -0.8108996152877808, |
|
"logps/chosen": -331.9487609863281, |
|
"logps/rejected": -403.20867919921875, |
|
"loss": 0.5839, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.8763322830200195, |
|
"rewards/margins": 0.624114990234375, |
|
"rewards/rejected": -1.500447392463684, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.21593090211132437, |
|
"grad_norm": 10.034596459189128, |
|
"learning_rate": 4.798219439964293e-07, |
|
"logits/chosen": -0.8501941561698914, |
|
"logits/rejected": -0.8989803194999695, |
|
"logps/chosen": -343.5841979980469, |
|
"logps/rejected": -392.810791015625, |
|
"loss": 0.5347, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.0320571660995483, |
|
"rewards/margins": 0.36501601338386536, |
|
"rewards/rejected": -1.3970732688903809, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.21833013435700577, |
|
"grad_norm": 10.27150503174172, |
|
"learning_rate": 4.78989788132333e-07, |
|
"logits/chosen": -0.902818500995636, |
|
"logits/rejected": -0.8824566006660461, |
|
"logps/chosen": -300.7974853515625, |
|
"logps/rejected": -388.55126953125, |
|
"loss": 0.5081, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.8453769683837891, |
|
"rewards/margins": 0.8020466566085815, |
|
"rewards/rejected": -1.647423505783081, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.22072936660268713, |
|
"grad_norm": 8.577531584564799, |
|
"learning_rate": 4.781415695443631e-07, |
|
"logits/chosen": -0.7745347619056702, |
|
"logits/rejected": -0.7863970398902893, |
|
"logps/chosen": -420.1585388183594, |
|
"logps/rejected": -460.1143493652344, |
|
"loss": 0.5703, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.2678101062774658, |
|
"rewards/margins": 0.29927223920822144, |
|
"rewards/rejected": -1.5670822858810425, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.22312859884836853, |
|
"grad_norm": 7.712609609832512, |
|
"learning_rate": 4.772773477316836e-07, |
|
"logits/chosen": -0.7534626722335815, |
|
"logits/rejected": -0.7615676522254944, |
|
"logps/chosen": -395.2740478515625, |
|
"logps/rejected": -448.7425231933594, |
|
"loss": 0.5444, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.115117073059082, |
|
"rewards/margins": 0.5547926425933838, |
|
"rewards/rejected": -1.6699097156524658, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.2255278310940499, |
|
"grad_norm": 14.001350284298912, |
|
"learning_rate": 4.7639718331602117e-07, |
|
"logits/chosen": -0.7357865571975708, |
|
"logits/rejected": -0.7330624461174011, |
|
"logps/chosen": -353.99200439453125, |
|
"logps/rejected": -431.35565185546875, |
|
"loss": 0.5372, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.8342105150222778, |
|
"rewards/margins": 0.878664493560791, |
|
"rewards/rejected": -1.7128750085830688, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.22792706333973128, |
|
"grad_norm": 10.221546212484748, |
|
"learning_rate": 4.7550113803741275e-07, |
|
"logits/chosen": -0.7402353882789612, |
|
"logits/rejected": -0.8165884017944336, |
|
"logps/chosen": -379.7347717285156, |
|
"logps/rejected": -362.58721923828125, |
|
"loss": 0.5637, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.0031925439834595, |
|
"rewards/margins": 0.4585431218147278, |
|
"rewards/rejected": -1.461735486984253, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.23032629558541268, |
|
"grad_norm": 10.091860887109807, |
|
"learning_rate": 4.7458927474987454e-07, |
|
"logits/chosen": -0.699491560459137, |
|
"logits/rejected": -0.7056708931922913, |
|
"logps/chosen": -411.66754150390625, |
|
"logps/rejected": -392.9963073730469, |
|
"loss": 0.5194, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.8979185223579407, |
|
"rewards/margins": 0.4219675064086914, |
|
"rewards/rejected": -1.3198859691619873, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.23272552783109404, |
|
"grad_norm": 10.700825723854395, |
|
"learning_rate": 4.7366165741699347e-07, |
|
"logits/chosen": -0.7538058161735535, |
|
"logits/rejected": -0.7752776145935059, |
|
"logps/chosen": -426.29840087890625, |
|
"logps/rejected": -456.43865966796875, |
|
"loss": 0.5367, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.0425876379013062, |
|
"rewards/margins": 0.5545080900192261, |
|
"rewards/rejected": -1.5970958471298218, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.23512476007677544, |
|
"grad_norm": 9.318743253411762, |
|
"learning_rate": 4.727183511074401e-07, |
|
"logits/chosen": -0.8688480257987976, |
|
"logits/rejected": -0.8663204908370972, |
|
"logps/chosen": -376.7490234375, |
|
"logps/rejected": -404.92816162109375, |
|
"loss": 0.5436, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.009129285812378, |
|
"rewards/margins": 0.382347047328949, |
|
"rewards/rejected": -1.3914763927459717, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.2375239923224568, |
|
"grad_norm": 10.050271627816938, |
|
"learning_rate": 4.717594219904043e-07, |
|
"logits/chosen": -0.7565699815750122, |
|
"logits/rejected": -0.805103600025177, |
|
"logps/chosen": -377.40472412109375, |
|
"logps/rejected": -379.1291809082031, |
|
"loss": 0.5345, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.0759207010269165, |
|
"rewards/margins": 0.45810168981552124, |
|
"rewards/rejected": -1.534022331237793, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.2399232245681382, |
|
"grad_norm": 9.560726789203981, |
|
"learning_rate": 4.7078493733095393e-07, |
|
"logits/chosen": -0.9027126431465149, |
|
"logits/rejected": -0.9078402519226074, |
|
"logps/chosen": -370.6812744140625, |
|
"logps/rejected": -446.0091247558594, |
|
"loss": 0.5318, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.204393982887268, |
|
"rewards/margins": 0.6276249289512634, |
|
"rewards/rejected": -1.8320188522338867, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.2423224568138196, |
|
"grad_norm": 9.375367945145504, |
|
"learning_rate": 4.6979496548531614e-07, |
|
"logits/chosen": -0.8843992352485657, |
|
"logits/rejected": -0.8497310876846313, |
|
"logps/chosen": -399.2445373535156, |
|
"logps/rejected": -512.7918701171875, |
|
"loss": 0.5464, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.4072428941726685, |
|
"rewards/margins": 0.6233394742012024, |
|
"rewards/rejected": -2.0305821895599365, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.24472168905950095, |
|
"grad_norm": 9.752181299642505, |
|
"learning_rate": 4.6878957589608293e-07, |
|
"logits/chosen": -0.8666139841079712, |
|
"logits/rejected": -0.8512627482414246, |
|
"logps/chosen": -390.6292724609375, |
|
"logps/rejected": -506.28228759765625, |
|
"loss": 0.5451, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.2309002876281738, |
|
"rewards/margins": 0.7173486351966858, |
|
"rewards/rejected": -1.9482488632202148, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.24712092130518235, |
|
"grad_norm": 9.424652839992445, |
|
"learning_rate": 4.6776883908733956e-07, |
|
"logits/chosen": -0.9692492485046387, |
|
"logits/rejected": -1.0165684223175049, |
|
"logps/chosen": -394.48858642578125, |
|
"logps/rejected": -401.189697265625, |
|
"loss": 0.5154, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.0245649814605713, |
|
"rewards/margins": 0.7163118720054626, |
|
"rewards/rejected": -1.7408767938613892, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.2495201535508637, |
|
"grad_norm": 12.277316191566172, |
|
"learning_rate": 4.667328266597178e-07, |
|
"logits/chosen": -0.8959840536117554, |
|
"logits/rejected": -0.9235955476760864, |
|
"logps/chosen": -361.2985534667969, |
|
"logps/rejected": -423.58233642578125, |
|
"loss": 0.5001, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.9520134925842285, |
|
"rewards/margins": 0.6992291808128357, |
|
"rewards/rejected": -1.6512426137924194, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.2519193857965451, |
|
"grad_norm": 8.94451963423609, |
|
"learning_rate": 4.6568161128537354e-07, |
|
"logits/chosen": -0.8420774340629578, |
|
"logits/rejected": -0.9481871724128723, |
|
"logps/chosen": -380.89874267578125, |
|
"logps/rejected": -393.58160400390625, |
|
"loss": 0.5246, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.2700251340866089, |
|
"rewards/margins": 0.634412407875061, |
|
"rewards/rejected": -1.9044376611709595, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.2543186180422265, |
|
"grad_norm": 14.034213989285222, |
|
"learning_rate": 4.6461526670288877e-07, |
|
"logits/chosen": -0.8429055213928223, |
|
"logits/rejected": -0.8352988958358765, |
|
"logps/chosen": -404.8660888671875, |
|
"logps/rejected": -438.3995056152344, |
|
"loss": 0.5668, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.1988664865493774, |
|
"rewards/margins": 0.6478247046470642, |
|
"rewards/rejected": -1.8466911315917969, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.2567178502879079, |
|
"grad_norm": 13.373585055843934, |
|
"learning_rate": 4.635338677120994e-07, |
|
"logits/chosen": -0.9964144825935364, |
|
"logits/rejected": -0.9940506815910339, |
|
"logps/chosen": -377.0820617675781, |
|
"logps/rejected": -486.6973571777344, |
|
"loss": 0.4774, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.1816045045852661, |
|
"rewards/margins": 0.8956181406974792, |
|
"rewards/rejected": -2.0772225856781006, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.2591170825335892, |
|
"grad_norm": 11.284998984427544, |
|
"learning_rate": 4.6243749016884835e-07, |
|
"logits/chosen": -0.7764107584953308, |
|
"logits/rejected": -0.8370550870895386, |
|
"logps/chosen": -418.0027770996094, |
|
"logps/rejected": -593.8638916015625, |
|
"loss": 0.5251, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.493062138557434, |
|
"rewards/margins": 1.221855640411377, |
|
"rewards/rejected": -2.7149176597595215, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.2615163147792706, |
|
"grad_norm": 16.100264142741796, |
|
"learning_rate": 4.613262109796645e-07, |
|
"logits/chosen": -0.9073816537857056, |
|
"logits/rejected": -0.8367312550544739, |
|
"logps/chosen": -432.54095458984375, |
|
"logps/rejected": -573.1912841796875, |
|
"loss": 0.5319, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.6614210605621338, |
|
"rewards/margins": 0.9968196749687195, |
|
"rewards/rejected": -2.658240556716919, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.263915547024952, |
|
"grad_norm": 12.598181058336907, |
|
"learning_rate": 4.602001080963678e-07, |
|
"logits/chosen": -0.8620105981826782, |
|
"logits/rejected": -0.8818934559822083, |
|
"logps/chosen": -452.8822326660156, |
|
"logps/rejected": -499.30694580078125, |
|
"loss": 0.5321, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.6771061420440674, |
|
"rewards/margins": 0.7576761841773987, |
|
"rewards/rejected": -2.4347822666168213, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.2663147792706334, |
|
"grad_norm": 11.020616319405283, |
|
"learning_rate": 4.590592605106017e-07, |
|
"logits/chosen": -0.925918698310852, |
|
"logits/rejected": -0.9431027173995972, |
|
"logps/chosen": -427.30279541015625, |
|
"logps/rejected": -476.481201171875, |
|
"loss": 0.5515, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.2021554708480835, |
|
"rewards/margins": 0.7562609910964966, |
|
"rewards/rejected": -1.9584165811538696, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.2687140115163148, |
|
"grad_norm": 11.43283460001136, |
|
"learning_rate": 4.5790374824829165e-07, |
|
"logits/chosen": -0.8499002456665039, |
|
"logits/rejected": -0.8679935336112976, |
|
"logps/chosen": -310.17327880859375, |
|
"logps/rejected": -387.49249267578125, |
|
"loss": 0.5063, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.2178689241409302, |
|
"rewards/margins": 0.6209184527397156, |
|
"rewards/rejected": -1.8387874364852905, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.27111324376199614, |
|
"grad_norm": 10.510361605616744, |
|
"learning_rate": 4.5673365236403216e-07, |
|
"logits/chosen": -0.882469654083252, |
|
"logits/rejected": -0.9395925402641296, |
|
"logps/chosen": -300.57720947265625, |
|
"logps/rejected": -434.6649475097656, |
|
"loss": 0.5097, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.0626921653747559, |
|
"rewards/margins": 1.0293641090393066, |
|
"rewards/rejected": -2.0920560359954834, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.27351247600767753, |
|
"grad_norm": 11.418966211240742, |
|
"learning_rate": 4.5554905493540075e-07, |
|
"logits/chosen": -0.9051049947738647, |
|
"logits/rejected": -0.880601704120636, |
|
"logps/chosen": -340.77911376953125, |
|
"logps/rejected": -454.90252685546875, |
|
"loss": 0.4885, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.225037932395935, |
|
"rewards/margins": 1.0688270330429077, |
|
"rewards/rejected": -2.2938647270202637, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.2759117082533589, |
|
"grad_norm": 9.72460464054391, |
|
"learning_rate": 4.5435003905720074e-07, |
|
"logits/chosen": -0.8102267980575562, |
|
"logits/rejected": -0.8643985986709595, |
|
"logps/chosen": -390.0484619140625, |
|
"logps/rejected": -450.08868408203125, |
|
"loss": 0.5012, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.0651195049285889, |
|
"rewards/margins": 0.8835436701774597, |
|
"rewards/rejected": -1.9486631155014038, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.2783109404990403, |
|
"grad_norm": 13.360087004422779, |
|
"learning_rate": 4.531366888356324e-07, |
|
"logits/chosen": -0.8684479594230652, |
|
"logits/rejected": -0.8134763836860657, |
|
"logps/chosen": -311.7840881347656, |
|
"logps/rejected": -458.7945861816406, |
|
"loss": 0.4901, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.3148356676101685, |
|
"rewards/margins": 1.0154473781585693, |
|
"rewards/rejected": -2.3302829265594482, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.2807101727447217, |
|
"grad_norm": 17.680025948725728, |
|
"learning_rate": 4.519090893823931e-07, |
|
"logits/chosen": -0.830313503742218, |
|
"logits/rejected": -0.8483401536941528, |
|
"logps/chosen": -398.6609802246094, |
|
"logps/rejected": -483.01348876953125, |
|
"loss": 0.4966, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.591963291168213, |
|
"rewards/margins": 0.8999356031417847, |
|
"rewards/rejected": -2.491899013519287, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.28310940499040305, |
|
"grad_norm": 10.953334824928836, |
|
"learning_rate": 4.5066732680870734e-07, |
|
"logits/chosen": -0.8117620348930359, |
|
"logits/rejected": -0.855734646320343, |
|
"logps/chosen": -401.9914245605469, |
|
"logps/rejected": -469.42218017578125, |
|
"loss": 0.5003, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.4848546981811523, |
|
"rewards/margins": 1.069037914276123, |
|
"rewards/rejected": -2.5538926124572754, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.28550863723608444, |
|
"grad_norm": 12.957936678071963, |
|
"learning_rate": 4.494114882192862e-07, |
|
"logits/chosen": -0.8582413792610168, |
|
"logits/rejected": -0.8433802723884583, |
|
"logps/chosen": -400.0009460449219, |
|
"logps/rejected": -485.3779296875, |
|
"loss": 0.4816, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.388290524482727, |
|
"rewards/margins": 1.1432268619537354, |
|
"rewards/rejected": -2.531517267227173, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.28790786948176583, |
|
"grad_norm": 12.79986874331055, |
|
"learning_rate": 4.4814166170621735e-07, |
|
"logits/chosen": -0.8589996099472046, |
|
"logits/rejected": -0.8760782480239868, |
|
"logps/chosen": -367.97552490234375, |
|
"logps/rejected": -463.1073303222656, |
|
"loss": 0.5092, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.2390401363372803, |
|
"rewards/margins": 1.1767470836639404, |
|
"rewards/rejected": -2.4157872200012207, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.2903071017274472, |
|
"grad_norm": 12.234631147028441, |
|
"learning_rate": 4.468579363427858e-07, |
|
"logits/chosen": -0.8185877799987793, |
|
"logits/rejected": -0.8393834829330444, |
|
"logps/chosen": -394.2176513671875, |
|
"logps/rejected": -454.900146484375, |
|
"loss": 0.5058, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.4075071811676025, |
|
"rewards/margins": 0.8511323928833008, |
|
"rewards/rejected": -2.2586395740509033, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.2927063339731286, |
|
"grad_norm": 17.553407135322153, |
|
"learning_rate": 4.4556040217722555e-07, |
|
"logits/chosen": -0.8887416124343872, |
|
"logits/rejected": -0.860831618309021, |
|
"logps/chosen": -352.8416748046875, |
|
"logps/rejected": -510.03546142578125, |
|
"loss": 0.4867, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.1481072902679443, |
|
"rewards/margins": 1.2108467817306519, |
|
"rewards/rejected": -2.3589539527893066, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.29510556621880996, |
|
"grad_norm": 11.868156097873015, |
|
"learning_rate": 4.442491502264033e-07, |
|
"logits/chosen": -0.8076246976852417, |
|
"logits/rejected": -0.8352873921394348, |
|
"logps/chosen": -364.78826904296875, |
|
"logps/rejected": -407.4501953125, |
|
"loss": 0.5063, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.4375666379928589, |
|
"rewards/margins": 0.5911107659339905, |
|
"rewards/rejected": -2.028677463531494, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.29750479846449135, |
|
"grad_norm": 11.089930522236687, |
|
"learning_rate": 4.429242724694338e-07, |
|
"logits/chosen": -0.8699033856391907, |
|
"logits/rejected": -0.8417544364929199, |
|
"logps/chosen": -404.20159912109375, |
|
"logps/rejected": -516.6590576171875, |
|
"loss": 0.5064, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.583553433418274, |
|
"rewards/margins": 0.9518542289733887, |
|
"rewards/rejected": -2.535407543182373, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.29990403071017274, |
|
"grad_norm": 11.987389579927841, |
|
"learning_rate": 4.4158586184122817e-07, |
|
"logits/chosen": -0.8027983903884888, |
|
"logits/rejected": -0.8380182981491089, |
|
"logps/chosen": -424.4278259277344, |
|
"logps/rejected": -504.82476806640625, |
|
"loss": 0.4858, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.338322401046753, |
|
"rewards/margins": 1.1540087461471558, |
|
"rewards/rejected": -2.4923312664031982, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.30230326295585414, |
|
"grad_norm": 11.726927303441652, |
|
"learning_rate": 4.4023401222597443e-07, |
|
"logits/chosen": -0.7710140943527222, |
|
"logits/rejected": -0.8672100901603699, |
|
"logps/chosen": -440.61566162109375, |
|
"logps/rejected": -509.686767578125, |
|
"loss": 0.4782, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.5568013191223145, |
|
"rewards/margins": 0.9064668416976929, |
|
"rewards/rejected": -2.463268518447876, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.30470249520153553, |
|
"grad_norm": 13.87871055626238, |
|
"learning_rate": 4.3886881845055235e-07, |
|
"logits/chosen": -0.8198641538619995, |
|
"logits/rejected": -0.8790807723999023, |
|
"logps/chosen": -392.24658203125, |
|
"logps/rejected": -504.7674255371094, |
|
"loss": 0.4534, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.4889254570007324, |
|
"rewards/margins": 1.196481466293335, |
|
"rewards/rejected": -2.6854069232940674, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.30710172744721687, |
|
"grad_norm": 10.998393295453823, |
|
"learning_rate": 4.374903762778814e-07, |
|
"logits/chosen": -0.8656896352767944, |
|
"logits/rejected": -0.8914599418640137, |
|
"logps/chosen": -444.3971252441406, |
|
"logps/rejected": -520.9356689453125, |
|
"loss": 0.4696, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.8613601922988892, |
|
"rewards/margins": 1.0016160011291504, |
|
"rewards/rejected": -2.862975835800171, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.30950095969289826, |
|
"grad_norm": 12.50531025670152, |
|
"learning_rate": 4.3609878240020356e-07, |
|
"logits/chosen": -0.8085900545120239, |
|
"logits/rejected": -0.8679038882255554, |
|
"logps/chosen": -484.76251220703125, |
|
"logps/rejected": -533.459716796875, |
|
"loss": 0.4863, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.8432880640029907, |
|
"rewards/margins": 1.1199102401733398, |
|
"rewards/rejected": -2.963197946548462, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.31190019193857965, |
|
"grad_norm": 11.511964627048417, |
|
"learning_rate": 4.346941344323005e-07, |
|
"logits/chosen": -0.8386822938919067, |
|
"logits/rejected": -0.9056866765022278, |
|
"logps/chosen": -432.3689880371094, |
|
"logps/rejected": -450.2909240722656, |
|
"loss": 0.5441, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.9001528024673462, |
|
"rewards/margins": 0.6564325094223022, |
|
"rewards/rejected": -2.5565853118896484, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.31429942418426104, |
|
"grad_norm": 11.097528659174904, |
|
"learning_rate": 4.332765309046467e-07, |
|
"logits/chosen": -0.7923992276191711, |
|
"logits/rejected": -0.7886919379234314, |
|
"logps/chosen": -419.7078552246094, |
|
"logps/rejected": -483.94049072265625, |
|
"loss": 0.5005, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.4908955097198486, |
|
"rewards/margins": 1.0034749507904053, |
|
"rewards/rejected": -2.494370460510254, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.31669865642994244, |
|
"grad_norm": 15.474416362716356, |
|
"learning_rate": 4.3184607125649754e-07, |
|
"logits/chosen": -0.8138014078140259, |
|
"logits/rejected": -0.8160893321037292, |
|
"logps/chosen": -396.61572265625, |
|
"logps/rejected": -518.4467163085938, |
|
"loss": 0.5069, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.2408325672149658, |
|
"rewards/margins": 1.0694630146026611, |
|
"rewards/rejected": -2.310295581817627, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.3190978886756238, |
|
"grad_norm": 10.27191089173125, |
|
"learning_rate": 4.304028558289141e-07, |
|
"logits/chosen": -0.8450434803962708, |
|
"logits/rejected": -0.8716680407524109, |
|
"logps/chosen": -392.87164306640625, |
|
"logps/rejected": -477.4569396972656, |
|
"loss": 0.4715, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.1086870431900024, |
|
"rewards/margins": 1.0651975870132446, |
|
"rewards/rejected": -2.173884630203247, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.32149712092130517, |
|
"grad_norm": 9.036616057089963, |
|
"learning_rate": 4.28946985857725e-07, |
|
"logits/chosen": -0.7608897686004639, |
|
"logits/rejected": -0.7740424871444702, |
|
"logps/chosen": -430.8748474121094, |
|
"logps/rejected": -552.3256225585938, |
|
"loss": 0.463, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.5485337972640991, |
|
"rewards/margins": 1.306312084197998, |
|
"rewards/rejected": -2.8548457622528076, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.32389635316698656, |
|
"grad_norm": 11.634362526668829, |
|
"learning_rate": 4.2747856346642445e-07, |
|
"logits/chosen": -0.8262165188789368, |
|
"logits/rejected": -0.8555091023445129, |
|
"logps/chosen": -375.15472412109375, |
|
"logps/rejected": -471.82293701171875, |
|
"loss": 0.4726, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.4888992309570312, |
|
"rewards/margins": 1.016867995262146, |
|
"rewards/rejected": -2.505767345428467, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.32629558541266795, |
|
"grad_norm": 13.398876290373195, |
|
"learning_rate": 4.2599769165900933e-07, |
|
"logits/chosen": -0.7887164950370789, |
|
"logits/rejected": -0.8400223851203918, |
|
"logps/chosen": -464.3109436035156, |
|
"logps/rejected": -523.8682861328125, |
|
"loss": 0.5048, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.0703892707824707, |
|
"rewards/margins": 0.833997905254364, |
|
"rewards/rejected": -2.9043872356414795, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.32869481765834935, |
|
"grad_norm": 10.664824987437187, |
|
"learning_rate": 4.245044743127535e-07, |
|
"logits/chosen": -0.9320866465568542, |
|
"logits/rejected": -0.9028736352920532, |
|
"logps/chosen": -407.45880126953125, |
|
"logps/rejected": -506.1048889160156, |
|
"loss": 0.4833, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.5945441722869873, |
|
"rewards/margins": 0.8470728993415833, |
|
"rewards/rejected": -2.441617250442505, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.3310940499040307, |
|
"grad_norm": 14.46708847771426, |
|
"learning_rate": 4.229990161709214e-07, |
|
"logits/chosen": -0.7717675566673279, |
|
"logits/rejected": -0.7377297282218933, |
|
"logps/chosen": -367.9638977050781, |
|
"logps/rejected": -525.7390747070312, |
|
"loss": 0.5272, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.408815622329712, |
|
"rewards/margins": 1.3415130376815796, |
|
"rewards/rejected": -2.750328540802002, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.3334932821497121, |
|
"grad_norm": 8.175303654043583, |
|
"learning_rate": 4.214814228354204e-07, |
|
"logits/chosen": -0.7827272415161133, |
|
"logits/rejected": -0.7984440326690674, |
|
"logps/chosen": -451.130126953125, |
|
"logps/rejected": -563.998046875, |
|
"loss": 0.4767, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.7949268817901611, |
|
"rewards/margins": 1.3159399032592773, |
|
"rewards/rejected": -3.1108667850494385, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.33589251439539347, |
|
"grad_norm": 12.456155197274827, |
|
"learning_rate": 4.1995180075939375e-07, |
|
"logits/chosen": -0.8594837188720703, |
|
"logits/rejected": -0.8565725088119507, |
|
"logps/chosen": -457.4839782714844, |
|
"logps/rejected": -527.7620239257812, |
|
"loss": 0.4705, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.6926393508911133, |
|
"rewards/margins": 0.9544218182563782, |
|
"rewards/rejected": -2.647061347961426, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.33829174664107486, |
|
"grad_norm": 10.492615269241377, |
|
"learning_rate": 4.1841025723975297e-07, |
|
"logits/chosen": -0.8039811849594116, |
|
"logits/rejected": -0.809655487537384, |
|
"logps/chosen": -395.67999267578125, |
|
"logps/rejected": -496.8030700683594, |
|
"loss": 0.4625, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.0631120204925537, |
|
"rewards/margins": 1.2070002555847168, |
|
"rewards/rejected": -2.2701122760772705, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.34069097888675626, |
|
"grad_norm": 11.628088236483801, |
|
"learning_rate": 4.168569004096516e-07, |
|
"logits/chosen": -0.7779537439346313, |
|
"logits/rejected": -0.732982337474823, |
|
"logps/chosen": -365.4278259277344, |
|
"logps/rejected": -518.9385986328125, |
|
"loss": 0.4622, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.3053802251815796, |
|
"rewards/margins": 1.3282335996627808, |
|
"rewards/rejected": -2.6336138248443604, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.3430902111324376, |
|
"grad_norm": 9.810034255281902, |
|
"learning_rate": 4.152918392308997e-07, |
|
"logits/chosen": -0.9322064518928528, |
|
"logits/rejected": -0.9035415649414062, |
|
"logps/chosen": -429.0191345214844, |
|
"logps/rejected": -481.86846923828125, |
|
"loss": 0.4737, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.8905305862426758, |
|
"rewards/margins": 0.6541526317596436, |
|
"rewards/rejected": -2.5446829795837402, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.345489443378119, |
|
"grad_norm": 16.302876025790795, |
|
"learning_rate": 4.137151834863213e-07, |
|
"logits/chosen": -0.8372869491577148, |
|
"logits/rejected": -0.7931715250015259, |
|
"logps/chosen": -425.23651123046875, |
|
"logps/rejected": -571.8636474609375, |
|
"loss": 0.5414, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.9143438339233398, |
|
"rewards/margins": 1.09660804271698, |
|
"rewards/rejected": -3.0109522342681885, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.3478886756238004, |
|
"grad_norm": 14.266445268878716, |
|
"learning_rate": 4.121270437720526e-07, |
|
"logits/chosen": -0.7531959414482117, |
|
"logits/rejected": -0.7015701532363892, |
|
"logps/chosen": -415.9129943847656, |
|
"logps/rejected": -544.4853515625, |
|
"loss": 0.4963, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -2.1056931018829346, |
|
"rewards/margins": 0.7997118234634399, |
|
"rewards/rejected": -2.905405044555664, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.3502879078694818, |
|
"grad_norm": 8.666123707311465, |
|
"learning_rate": 4.105275314897852e-07, |
|
"logits/chosen": -0.8094059228897095, |
|
"logits/rejected": -0.8042441606521606, |
|
"logps/chosen": -387.4685363769531, |
|
"logps/rejected": -575.4444580078125, |
|
"loss": 0.497, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.7935253381729126, |
|
"rewards/margins": 1.4926183223724365, |
|
"rewards/rejected": -3.2861435413360596, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.35268714011516317, |
|
"grad_norm": 10.639348735955451, |
|
"learning_rate": 4.089167588389508e-07, |
|
"logits/chosen": -0.7043929100036621, |
|
"logits/rejected": -0.7534819841384888, |
|
"logps/chosen": -515.1029663085938, |
|
"logps/rejected": -574.6419067382812, |
|
"loss": 0.4865, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.7564353942871094, |
|
"rewards/margins": 1.080705165863037, |
|
"rewards/rejected": -2.8371405601501465, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.3550863723608445, |
|
"grad_norm": 13.106435290020489, |
|
"learning_rate": 4.072948388088515e-07, |
|
"logits/chosen": -0.6628540754318237, |
|
"logits/rejected": -0.6470414400100708, |
|
"logps/chosen": -432.12005615234375, |
|
"logps/rejected": -549.7205200195312, |
|
"loss": 0.4851, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.650691270828247, |
|
"rewards/margins": 1.125057578086853, |
|
"rewards/rejected": -2.7757484912872314, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.3574856046065259, |
|
"grad_norm": 13.06259828621992, |
|
"learning_rate": 4.056618851707334e-07, |
|
"logits/chosen": -0.6907030344009399, |
|
"logits/rejected": -0.7093620300292969, |
|
"logps/chosen": -397.7687683105469, |
|
"logps/rejected": -526.349853515625, |
|
"loss": 0.4663, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.3325411081314087, |
|
"rewards/margins": 1.221077561378479, |
|
"rewards/rejected": -2.553618907928467, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.3598848368522073, |
|
"grad_norm": 12.917502818953556, |
|
"learning_rate": 4.0401801246980675e-07, |
|
"logits/chosen": -0.8259037137031555, |
|
"logits/rejected": -0.8357691764831543, |
|
"logps/chosen": -384.6356506347656, |
|
"logps/rejected": -453.1322326660156, |
|
"loss": 0.4882, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.7201900482177734, |
|
"rewards/margins": 0.8327716588973999, |
|
"rewards/rejected": -2.552961587905884, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.3622840690978887, |
|
"grad_norm": 12.433094406122816, |
|
"learning_rate": 4.0236333601721043e-07, |
|
"logits/chosen": -0.7449339628219604, |
|
"logits/rejected": -0.7388188242912292, |
|
"logps/chosen": -460.6793518066406, |
|
"logps/rejected": -544.5335693359375, |
|
"loss": 0.5044, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.6877784729003906, |
|
"rewards/margins": 0.7297952175140381, |
|
"rewards/rejected": -2.417573928833008, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.3646833013435701, |
|
"grad_norm": 13.800192452791908, |
|
"learning_rate": 4.0069797188192364e-07, |
|
"logits/chosen": -0.7582114934921265, |
|
"logits/rejected": -0.742210328578949, |
|
"logps/chosen": -439.11114501953125, |
|
"logps/rejected": -521.8330688476562, |
|
"loss": 0.4944, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.5508705377578735, |
|
"rewards/margins": 1.0782378911972046, |
|
"rewards/rejected": -2.6291086673736572, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.3670825335892514, |
|
"grad_norm": 13.571364318064191, |
|
"learning_rate": 3.9902203688262417e-07, |
|
"logits/chosen": -0.721932590007782, |
|
"logits/rejected": -0.7364694476127625, |
|
"logps/chosen": -405.0948791503906, |
|
"logps/rejected": -501.7353515625, |
|
"loss": 0.4661, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.3923845291137695, |
|
"rewards/margins": 1.094702959060669, |
|
"rewards/rejected": -2.4870872497558594, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.3694817658349328, |
|
"grad_norm": 11.011970777974243, |
|
"learning_rate": 3.9733564857949365e-07, |
|
"logits/chosen": -0.6986292600631714, |
|
"logits/rejected": -0.6912825703620911, |
|
"logps/chosen": -500.9569396972656, |
|
"logps/rejected": -553.8840942382812, |
|
"loss": 0.4869, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.857325553894043, |
|
"rewards/margins": 0.9566340446472168, |
|
"rewards/rejected": -2.8139595985412598, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.3718809980806142, |
|
"grad_norm": 10.905693632178256, |
|
"learning_rate": 3.9563892526597177e-07, |
|
"logits/chosen": -0.7152280807495117, |
|
"logits/rejected": -0.6881515383720398, |
|
"logps/chosen": -388.0267333984375, |
|
"logps/rejected": -521.2376708984375, |
|
"loss": 0.4572, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.6515610218048096, |
|
"rewards/margins": 0.8464974164962769, |
|
"rewards/rejected": -2.498058319091797, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.3742802303262956, |
|
"grad_norm": 10.08743827300553, |
|
"learning_rate": 3.9393198596045795e-07, |
|
"logits/chosen": -0.7806371450424194, |
|
"logits/rejected": -0.76411372423172, |
|
"logps/chosen": -399.4765930175781, |
|
"logps/rejected": -516.7181396484375, |
|
"loss": 0.5182, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.7115662097930908, |
|
"rewards/margins": 0.941506028175354, |
|
"rewards/rejected": -2.6530721187591553, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.376679462571977, |
|
"grad_norm": 8.868869870998214, |
|
"learning_rate": 3.922149503979628e-07, |
|
"logits/chosen": -0.6893107295036316, |
|
"logits/rejected": -0.7296844124794006, |
|
"logps/chosen": -438.3675231933594, |
|
"logps/rejected": -609.4227905273438, |
|
"loss": 0.4809, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.628042221069336, |
|
"rewards/margins": 1.6386429071426392, |
|
"rewards/rejected": -3.2666850090026855, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.3790786948176583, |
|
"grad_norm": 13.078715942181777, |
|
"learning_rate": 3.904879390217095e-07, |
|
"logits/chosen": -0.799870491027832, |
|
"logits/rejected": -0.8303624987602234, |
|
"logps/chosen": -410.641845703125, |
|
"logps/rejected": -479.2076721191406, |
|
"loss": 0.458, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.6100763082504272, |
|
"rewards/margins": 0.8601690530776978, |
|
"rewards/rejected": -2.470245361328125, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.3814779270633397, |
|
"grad_norm": 12.71197235549074, |
|
"learning_rate": 3.8875107297468463e-07, |
|
"logits/chosen": -0.7600913643836975, |
|
"logits/rejected": -0.7607609033584595, |
|
"logps/chosen": -388.62042236328125, |
|
"logps/rejected": -583.7432250976562, |
|
"loss": 0.4781, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.4480386972427368, |
|
"rewards/margins": 1.5103285312652588, |
|
"rewards/rejected": -2.958367109298706, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.3838771593090211, |
|
"grad_norm": 13.466303880551026, |
|
"learning_rate": 3.87004474091141e-07, |
|
"logits/chosen": -0.6408634781837463, |
|
"logits/rejected": -0.6452223062515259, |
|
"logps/chosen": -373.9280700683594, |
|
"logps/rejected": -497.0530700683594, |
|
"loss": 0.4874, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.4634227752685547, |
|
"rewards/margins": 1.0529407262802124, |
|
"rewards/rejected": -2.5163636207580566, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.3862763915547025, |
|
"grad_norm": 12.405090587714351, |
|
"learning_rate": 3.8524826488805114e-07, |
|
"logits/chosen": -0.812592625617981, |
|
"logits/rejected": -0.774621844291687, |
|
"logps/chosen": -457.66961669921875, |
|
"logps/rejected": -521.51904296875, |
|
"loss": 0.5016, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.722423791885376, |
|
"rewards/margins": 1.091522455215454, |
|
"rewards/rejected": -2.81394624710083, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.3886756238003839, |
|
"grad_norm": 10.910380188178692, |
|
"learning_rate": 3.834825685565133e-07, |
|
"logits/chosen": -0.8149593472480774, |
|
"logits/rejected": -0.8616162538528442, |
|
"logps/chosen": -388.2657165527344, |
|
"logps/rejected": -452.9178161621094, |
|
"loss": 0.4461, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.5048949718475342, |
|
"rewards/margins": 1.0155197381973267, |
|
"rewards/rejected": -2.5204145908355713, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.39107485604606523, |
|
"grad_norm": 12.461150034624684, |
|
"learning_rate": 3.8170750895311007e-07, |
|
"logits/chosen": -0.7702925205230713, |
|
"logits/rejected": -0.7936859726905823, |
|
"logps/chosen": -419.08270263671875, |
|
"logps/rejected": -508.16571044921875, |
|
"loss": 0.4739, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.3406755924224854, |
|
"rewards/margins": 1.148717999458313, |
|
"rewards/rejected": -2.489393711090088, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.3934740882917466, |
|
"grad_norm": 10.482680213963254, |
|
"learning_rate": 3.7992321059122045e-07, |
|
"logits/chosen": -0.7163397073745728, |
|
"logits/rejected": -0.7596901059150696, |
|
"logps/chosen": -418.07147216796875, |
|
"logps/rejected": -498.05078125, |
|
"loss": 0.4946, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.8541247844696045, |
|
"rewards/margins": 0.9185699224472046, |
|
"rewards/rejected": -2.7726948261260986, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.395873320537428, |
|
"grad_norm": 12.021180525729404, |
|
"learning_rate": 3.7812979863228576e-07, |
|
"logits/chosen": -0.8531166911125183, |
|
"logits/rejected": -0.8554477691650391, |
|
"logps/chosen": -381.1147155761719, |
|
"logps/rejected": -508.91668701171875, |
|
"loss": 0.4537, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.7566955089569092, |
|
"rewards/margins": 1.1585729122161865, |
|
"rewards/rejected": -2.9152684211730957, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.3982725527831094, |
|
"grad_norm": 12.979308903163362, |
|
"learning_rate": 3.763273988770296e-07, |
|
"logits/chosen": -0.7346752285957336, |
|
"logits/rejected": -0.7792466878890991, |
|
"logps/chosen": -394.4084167480469, |
|
"logps/rejected": -528.5557861328125, |
|
"loss": 0.455, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.4199392795562744, |
|
"rewards/margins": 1.3389031887054443, |
|
"rewards/rejected": -2.7588424682617188, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.4006717850287908, |
|
"grad_norm": 12.280364894112322, |
|
"learning_rate": 3.7451613775663405e-07, |
|
"logits/chosen": -0.7985413670539856, |
|
"logits/rejected": -0.7544962763786316, |
|
"logps/chosen": -410.6094665527344, |
|
"logps/rejected": -567.0599365234375, |
|
"loss": 0.4986, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.7071869373321533, |
|
"rewards/margins": 1.444684624671936, |
|
"rewards/rejected": -3.1518714427948, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.40307101727447214, |
|
"grad_norm": 11.808414350786867, |
|
"learning_rate": 3.726961423238706e-07, |
|
"logits/chosen": -0.8868053555488586, |
|
"logits/rejected": -0.9050809741020203, |
|
"logps/chosen": -378.8218078613281, |
|
"logps/rejected": -536.3219604492188, |
|
"loss": 0.4686, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.5447314977645874, |
|
"rewards/margins": 1.3333966732025146, |
|
"rewards/rejected": -2.8781278133392334, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.40547024952015354, |
|
"grad_norm": 12.787716674975018, |
|
"learning_rate": 3.708675402441882e-07, |
|
"logits/chosen": -0.7429116368293762, |
|
"logits/rejected": -0.7805765867233276, |
|
"logps/chosen": -458.7632751464844, |
|
"logps/rejected": -516.9869384765625, |
|
"loss": 0.4928, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.7429218292236328, |
|
"rewards/margins": 0.9082300066947937, |
|
"rewards/rejected": -2.6511518955230713, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.40786948176583493, |
|
"grad_norm": 10.598422088340676, |
|
"learning_rate": 3.6903045978675775e-07, |
|
"logits/chosen": -0.7389672994613647, |
|
"logits/rejected": -0.7964872717857361, |
|
"logps/chosen": -411.5867614746094, |
|
"logps/rejected": -550.23876953125, |
|
"loss": 0.4801, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.7794177532196045, |
|
"rewards/margins": 1.554722785949707, |
|
"rewards/rejected": -3.3341403007507324, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.4102687140115163, |
|
"grad_norm": 10.761090409994697, |
|
"learning_rate": 3.6718502981547474e-07, |
|
"logits/chosen": -0.7715443968772888, |
|
"logits/rejected": -0.7918425798416138, |
|
"logps/chosen": -430.85003662109375, |
|
"logps/rejected": -561.6787109375, |
|
"loss": 0.4899, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.676509141921997, |
|
"rewards/margins": 0.8913475871086121, |
|
"rewards/rejected": -2.567856788635254, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.4126679462571977, |
|
"grad_norm": 9.729723638899472, |
|
"learning_rate": 3.6533137977991986e-07, |
|
"logits/chosen": -0.755111575126648, |
|
"logits/rejected": -0.7594307065010071, |
|
"logps/chosen": -430.3711853027344, |
|
"logps/rejected": -534.5823974609375, |
|
"loss": 0.5115, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.5598688125610352, |
|
"rewards/margins": 0.8007798194885254, |
|
"rewards/rejected": -2.3606486320495605, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.41506717850287905, |
|
"grad_norm": 9.372402487840775, |
|
"learning_rate": 3.6346963970627865e-07, |
|
"logits/chosen": -0.6953638195991516, |
|
"logits/rejected": -0.6692907214164734, |
|
"logps/chosen": -390.3316955566406, |
|
"logps/rejected": -514.3018798828125, |
|
"loss": 0.4468, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.3828855752944946, |
|
"rewards/margins": 1.066463828086853, |
|
"rewards/rejected": -2.4493489265441895, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.41746641074856045, |
|
"grad_norm": 11.954691591503995, |
|
"learning_rate": 3.615999401882207e-07, |
|
"logits/chosen": -0.8805049657821655, |
|
"logits/rejected": -0.8518358469009399, |
|
"logps/chosen": -412.022216796875, |
|
"logps/rejected": -552.1925659179688, |
|
"loss": 0.4814, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.9677274227142334, |
|
"rewards/margins": 1.194000005722046, |
|
"rewards/rejected": -3.1617274284362793, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.41986564299424184, |
|
"grad_norm": 11.021704456258432, |
|
"learning_rate": 3.597224123777389e-07, |
|
"logits/chosen": -0.7318686246871948, |
|
"logits/rejected": -0.7280600666999817, |
|
"logps/chosen": -430.3202209472656, |
|
"logps/rejected": -572.8101806640625, |
|
"loss": 0.4749, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.8212677240371704, |
|
"rewards/margins": 1.2428979873657227, |
|
"rewards/rejected": -3.0641655921936035, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.42226487523992323, |
|
"grad_norm": 10.015819519130572, |
|
"learning_rate": 3.5783718797595e-07, |
|
"logits/chosen": -0.8521868586540222, |
|
"logits/rejected": -0.8556682467460632, |
|
"logps/chosen": -473.8487243652344, |
|
"logps/rejected": -544.9535522460938, |
|
"loss": 0.4797, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.8864997625350952, |
|
"rewards/margins": 1.0899362564086914, |
|
"rewards/rejected": -2.976435661315918, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.4246641074856046, |
|
"grad_norm": 11.86534189148174, |
|
"learning_rate": 3.559443992238558e-07, |
|
"logits/chosen": -0.7805435061454773, |
|
"logits/rejected": -0.8231045007705688, |
|
"logps/chosen": -400.96697998046875, |
|
"logps/rejected": -577.6618041992188, |
|
"loss": 0.4997, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.5335006713867188, |
|
"rewards/margins": 1.4942717552185059, |
|
"rewards/rejected": -3.0277724266052246, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.42706333973128596, |
|
"grad_norm": 10.141163144169631, |
|
"learning_rate": 3.540441788930673e-07, |
|
"logits/chosen": -0.7176542282104492, |
|
"logits/rejected": -0.7344351410865784, |
|
"logps/chosen": -456.525146484375, |
|
"logps/rejected": -563.7665405273438, |
|
"loss": 0.4548, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.6193135976791382, |
|
"rewards/margins": 1.4220505952835083, |
|
"rewards/rejected": -3.0413641929626465, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.42946257197696736, |
|
"grad_norm": 12.961054235399837, |
|
"learning_rate": 3.5213666027649123e-07, |
|
"logits/chosen": -0.7940319180488586, |
|
"logits/rejected": -0.8246362805366516, |
|
"logps/chosen": -489.11004638671875, |
|
"logps/rejected": -525.5428466796875, |
|
"loss": 0.4772, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.1909642219543457, |
|
"rewards/margins": 0.7601931691169739, |
|
"rewards/rejected": -2.951157808303833, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.43186180422264875, |
|
"grad_norm": 11.84758063846055, |
|
"learning_rate": 3.5022197717898017e-07, |
|
"logits/chosen": -0.8181630969047546, |
|
"logits/rejected": -0.8500107526779175, |
|
"logps/chosen": -393.4739685058594, |
|
"logps/rejected": -486.96917724609375, |
|
"loss": 0.4156, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.7599443197250366, |
|
"rewards/margins": 1.216341257095337, |
|
"rewards/rejected": -2.976285457611084, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.43426103646833014, |
|
"grad_norm": 10.86372215583825, |
|
"learning_rate": 3.4830026390794633e-07, |
|
"logits/chosen": -0.8050792813301086, |
|
"logits/rejected": -0.8329674005508423, |
|
"logps/chosen": -507.2084045410156, |
|
"logps/rejected": -583.702880859375, |
|
"loss": 0.4382, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.1328043937683105, |
|
"rewards/margins": 1.2732837200164795, |
|
"rewards/rejected": -3.406088352203369, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.43666026871401153, |
|
"grad_norm": 7.738248672410445, |
|
"learning_rate": 3.4637165526394104e-07, |
|
"logits/chosen": -0.7997580766677856, |
|
"logits/rejected": -0.8064180612564087, |
|
"logps/chosen": -423.777587890625, |
|
"logps/rejected": -542.863525390625, |
|
"loss": 0.4725, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.9629684686660767, |
|
"rewards/margins": 1.0524226427078247, |
|
"rewards/rejected": -3.0153908729553223, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.43905950095969287, |
|
"grad_norm": 11.736843534921078, |
|
"learning_rate": 3.4443628653119814e-07, |
|
"logits/chosen": -0.7079404592514038, |
|
"logits/rejected": -0.7036377191543579, |
|
"logps/chosen": -483.62896728515625, |
|
"logps/rejected": -678.022216796875, |
|
"loss": 0.4814, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.1326420307159424, |
|
"rewards/margins": 1.5004966259002686, |
|
"rewards/rejected": -3.633139133453369, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.44145873320537427, |
|
"grad_norm": 10.785415108021422, |
|
"learning_rate": 3.424942934681453e-07, |
|
"logits/chosen": -0.8217443227767944, |
|
"logits/rejected": -0.8721915483474731, |
|
"logps/chosen": -385.87860107421875, |
|
"logps/rejected": -556.9625854492188, |
|
"loss": 0.4584, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.4244273900985718, |
|
"rewards/margins": 1.7936569452285767, |
|
"rewards/rejected": -3.2180843353271484, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.44385796545105566, |
|
"grad_norm": 14.588516416025033, |
|
"learning_rate": 3.405458122978804e-07, |
|
"logits/chosen": -0.8775800466537476, |
|
"logits/rejected": -0.8566424250602722, |
|
"logps/chosen": -458.1707458496094, |
|
"logps/rejected": -545.3057861328125, |
|
"loss": 0.4726, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -1.7237602472305298, |
|
"rewards/margins": 1.154767632484436, |
|
"rewards/rejected": -2.878527879714966, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.44625719769673705, |
|
"grad_norm": 13.706269723531113, |
|
"learning_rate": 3.3859097969861633e-07, |
|
"logits/chosen": -0.7905477285385132, |
|
"logits/rejected": -0.7530331015586853, |
|
"logps/chosen": -460.21728515625, |
|
"logps/rejected": -553.216796875, |
|
"loss": 0.4504, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.8379026651382446, |
|
"rewards/margins": 1.2619093656539917, |
|
"rewards/rejected": -3.0998120307922363, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.44865642994241844, |
|
"grad_norm": 13.3769074850146, |
|
"learning_rate": 3.366299327940936e-07, |
|
"logits/chosen": -0.8109074831008911, |
|
"logits/rejected": -0.773389458656311, |
|
"logps/chosen": -470.64947509765625, |
|
"logps/rejected": -587.9104614257812, |
|
"loss": 0.4834, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.7903703451156616, |
|
"rewards/margins": 0.9450393915176392, |
|
"rewards/rejected": -2.7354094982147217, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.4510556621880998, |
|
"grad_norm": 11.753551278940575, |
|
"learning_rate": 3.3466280914396117e-07, |
|
"logits/chosen": -0.773266077041626, |
|
"logits/rejected": -0.7779923677444458, |
|
"logps/chosen": -402.0985412597656, |
|
"logps/rejected": -570.60693359375, |
|
"loss": 0.4764, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.6223961114883423, |
|
"rewards/margins": 1.4883135557174683, |
|
"rewards/rejected": -3.1107096672058105, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.4534548944337812, |
|
"grad_norm": 14.233604576877488, |
|
"learning_rate": 3.326897467341281e-07, |
|
"logits/chosen": -0.8112883567810059, |
|
"logits/rejected": -0.8336831331253052, |
|
"logps/chosen": -351.8514404296875, |
|
"logps/rejected": -480.0999450683594, |
|
"loss": 0.474, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.529261827468872, |
|
"rewards/margins": 1.1229501962661743, |
|
"rewards/rejected": -2.652211904525757, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.45585412667946257, |
|
"grad_norm": 13.300598272317474, |
|
"learning_rate": 3.3071088396708335e-07, |
|
"logits/chosen": -0.8645914196968079, |
|
"logits/rejected": -0.8331616520881653, |
|
"logps/chosen": -363.18206787109375, |
|
"logps/rejected": -533.6971435546875, |
|
"loss": 0.4819, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.6752821207046509, |
|
"rewards/margins": 1.4778271913528442, |
|
"rewards/rejected": -3.153109550476074, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.45825335892514396, |
|
"grad_norm": 12.640086996532164, |
|
"learning_rate": 3.2872635965218824e-07, |
|
"logits/chosen": -0.6304786205291748, |
|
"logits/rejected": -0.6618056297302246, |
|
"logps/chosen": -448.3641052246094, |
|
"logps/rejected": -589.9913330078125, |
|
"loss": 0.5109, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.9804298877716064, |
|
"rewards/margins": 1.1321611404418945, |
|
"rewards/rejected": -3.112590789794922, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.46065259117082535, |
|
"grad_norm": 9.986496934257348, |
|
"learning_rate": 3.2673631299593905e-07, |
|
"logits/chosen": -0.7311594486236572, |
|
"logits/rejected": -0.8009797930717468, |
|
"logps/chosen": -461.6552734375, |
|
"logps/rejected": -577.480712890625, |
|
"loss": 0.4668, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.8996919393539429, |
|
"rewards/margins": 1.237914800643921, |
|
"rewards/rejected": -3.1376068592071533, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.4630518234165067, |
|
"grad_norm": 13.218071975981442, |
|
"learning_rate": 3.247408835922024e-07, |
|
"logits/chosen": -0.7413262724876404, |
|
"logits/rejected": -0.7506011724472046, |
|
"logps/chosen": -488.389404296875, |
|
"logps/rejected": -615.5206298828125, |
|
"loss": 0.4835, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.845880150794983, |
|
"rewards/margins": 1.2004640102386475, |
|
"rewards/rejected": -3.046344041824341, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.4654510556621881, |
|
"grad_norm": 12.635690431262567, |
|
"learning_rate": 3.2274021141242306e-07, |
|
"logits/chosen": -0.6942049860954285, |
|
"logits/rejected": -0.7160819172859192, |
|
"logps/chosen": -423.4578552246094, |
|
"logps/rejected": -537.9940795898438, |
|
"loss": 0.4495, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.6584323644638062, |
|
"rewards/margins": 1.0953803062438965, |
|
"rewards/rejected": -2.753812551498413, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.4678502879078695, |
|
"grad_norm": 15.405837310763367, |
|
"learning_rate": 3.2073443679580613e-07, |
|
"logits/chosen": -0.7654497027397156, |
|
"logits/rejected": -0.7917548418045044, |
|
"logps/chosen": -448.06378173828125, |
|
"logps/rejected": -538.6315307617188, |
|
"loss": 0.4705, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.71183180809021, |
|
"rewards/margins": 0.9023053050041199, |
|
"rewards/rejected": -2.614137649536133, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.47024952015355087, |
|
"grad_norm": 9.857507863821315, |
|
"learning_rate": 3.1872370043947194e-07, |
|
"logits/chosen": -0.8800600171089172, |
|
"logits/rejected": -0.888513445854187, |
|
"logps/chosen": -407.2889709472656, |
|
"logps/rejected": -570.4630126953125, |
|
"loss": 0.4494, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.3753950595855713, |
|
"rewards/margins": 1.693584680557251, |
|
"rewards/rejected": -3.0689799785614014, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.47264875239923226, |
|
"grad_norm": 11.49826557648692, |
|
"learning_rate": 3.167081433885874e-07, |
|
"logits/chosen": -0.6018909215927124, |
|
"logits/rejected": -0.6167675852775574, |
|
"logps/chosen": -508.90655517578125, |
|
"logps/rejected": -644.7164916992188, |
|
"loss": 0.4066, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.8501713275909424, |
|
"rewards/margins": 0.9607030749320984, |
|
"rewards/rejected": -2.8108744621276855, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.4750479846449136, |
|
"grad_norm": 13.29052258173837, |
|
"learning_rate": 3.14687907026472e-07, |
|
"logits/chosen": -0.6973208785057068, |
|
"logits/rejected": -0.7304965257644653, |
|
"logps/chosen": -395.8529357910156, |
|
"logps/rejected": -551.4783935546875, |
|
"loss": 0.4563, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -1.6641004085540771, |
|
"rewards/margins": 1.4298467636108398, |
|
"rewards/rejected": -3.093946933746338, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.477447216890595, |
|
"grad_norm": 12.240475995337231, |
|
"learning_rate": 3.126631330646801e-07, |
|
"logits/chosen": -0.699920117855072, |
|
"logits/rejected": -0.7062256932258606, |
|
"logps/chosen": -519.5510864257812, |
|
"logps/rejected": -607.6595458984375, |
|
"loss": 0.4849, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -2.1099679470062256, |
|
"rewards/margins": 0.9010626077651978, |
|
"rewards/rejected": -3.011030673980713, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.4798464491362764, |
|
"grad_norm": 13.241550075194965, |
|
"learning_rate": 3.1063396353306097e-07, |
|
"logits/chosen": -0.726953387260437, |
|
"logits/rejected": -0.788418173789978, |
|
"logps/chosen": -436.1697692871094, |
|
"logps/rejected": -513.4632568359375, |
|
"loss": 0.4416, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.6597273349761963, |
|
"rewards/margins": 1.236422061920166, |
|
"rewards/rejected": -2.896149158477783, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.4798464491362764, |
|
"eval_logits/chosen": -0.7618333697319031, |
|
"eval_logits/rejected": -0.7765002846717834, |
|
"eval_logps/chosen": -424.0602111816406, |
|
"eval_logps/rejected": -575.2555541992188, |
|
"eval_loss": 0.45312055945396423, |
|
"eval_rewards/accuracies": 0.8160714507102966, |
|
"eval_rewards/chosen": -1.7141555547714233, |
|
"eval_rewards/margins": 1.430633306503296, |
|
"eval_rewards/rejected": -3.144789457321167, |
|
"eval_runtime": 233.581, |
|
"eval_samples_per_second": 19.098, |
|
"eval_steps_per_second": 0.3, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.4822456813819578, |
|
"grad_norm": 11.1357246980386, |
|
"learning_rate": 3.0860054076979535e-07, |
|
"logits/chosen": -0.7470555901527405, |
|
"logits/rejected": -0.74172443151474, |
|
"logps/chosen": -460.82867431640625, |
|
"logps/rejected": -552.6365356445312, |
|
"loss": 0.4743, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.9067308902740479, |
|
"rewards/margins": 1.165321946144104, |
|
"rewards/rejected": -3.0720529556274414, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.4846449136276392, |
|
"grad_norm": 17.785449989220815, |
|
"learning_rate": 3.065630074114115e-07, |
|
"logits/chosen": -0.7443466782569885, |
|
"logits/rejected": -0.762556254863739, |
|
"logps/chosen": -465.6786193847656, |
|
"logps/rejected": -580.1879272460938, |
|
"loss": 0.4669, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.7899072170257568, |
|
"rewards/margins": 1.538434624671936, |
|
"rewards/rejected": -3.3283417224884033, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.4870441458733205, |
|
"grad_norm": 16.171594176924803, |
|
"learning_rate": 3.0452150638277947e-07, |
|
"logits/chosen": -0.6747657060623169, |
|
"logits/rejected": -0.6558694839477539, |
|
"logps/chosen": -392.1339416503906, |
|
"logps/rejected": -500.20025634765625, |
|
"loss": 0.5029, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.720434546470642, |
|
"rewards/margins": 0.9752155542373657, |
|
"rewards/rejected": -2.695650339126587, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.4894433781190019, |
|
"grad_norm": 8.227849718874795, |
|
"learning_rate": 3.024761808870856e-07, |
|
"logits/chosen": -0.7772229313850403, |
|
"logits/rejected": -0.7858240008354187, |
|
"logps/chosen": -381.36236572265625, |
|
"logps/rejected": -597.6624145507812, |
|
"loss": 0.4504, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -1.510353684425354, |
|
"rewards/margins": 2.1284565925598145, |
|
"rewards/rejected": -3.6388099193573, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.4918426103646833, |
|
"grad_norm": 20.051647299898132, |
|
"learning_rate": 3.004271743957875e-07, |
|
"logits/chosen": -0.6749522089958191, |
|
"logits/rejected": -0.6905564069747925, |
|
"logps/chosen": -484.854736328125, |
|
"logps/rejected": -604.3311767578125, |
|
"loss": 0.4945, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.175485134124756, |
|
"rewards/margins": 0.9892854690551758, |
|
"rewards/rejected": -3.1647706031799316, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.4942418426103647, |
|
"grad_norm": 12.004150798678578, |
|
"learning_rate": 2.983746306385499e-07, |
|
"logits/chosen": -0.8129485845565796, |
|
"logits/rejected": -0.7655819654464722, |
|
"logps/chosen": -414.5586853027344, |
|
"logps/rejected": -578.7470703125, |
|
"loss": 0.4453, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.7315165996551514, |
|
"rewards/margins": 1.4258909225463867, |
|
"rewards/rejected": -3.1574079990386963, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.4966410748560461, |
|
"grad_norm": 11.308824836556166, |
|
"learning_rate": 2.963186935931628e-07, |
|
"logits/chosen": -0.7485088109970093, |
|
"logits/rejected": -0.7199539542198181, |
|
"logps/chosen": -450.9239196777344, |
|
"logps/rejected": -583.173583984375, |
|
"loss": 0.4482, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.732980728149414, |
|
"rewards/margins": 1.3427022695541382, |
|
"rewards/rejected": -3.075683116912842, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.4990403071017274, |
|
"grad_norm": 9.373923594426847, |
|
"learning_rate": 2.9425950747544176e-07, |
|
"logits/chosen": -0.6613216400146484, |
|
"logits/rejected": -0.7013477683067322, |
|
"logps/chosen": -505.81573486328125, |
|
"logps/rejected": -642.0211181640625, |
|
"loss": 0.4343, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.986654281616211, |
|
"rewards/margins": 1.6596873998641968, |
|
"rewards/rejected": -3.646341323852539, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.5014395393474088, |
|
"grad_norm": 15.749741167722831, |
|
"learning_rate": 2.921972167291119e-07, |
|
"logits/chosen": -0.7112385630607605, |
|
"logits/rejected": -0.7526477575302124, |
|
"logps/chosen": -436.2796325683594, |
|
"logps/rejected": -586.3831787109375, |
|
"loss": 0.4399, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.5150210857391357, |
|
"rewards/margins": 1.2452566623687744, |
|
"rewards/rejected": -2.760277271270752, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.5038387715930902, |
|
"grad_norm": 13.419312414930658, |
|
"learning_rate": 2.9013196601567567e-07, |
|
"logits/chosen": -0.7128900289535522, |
|
"logits/rejected": -0.730756402015686, |
|
"logps/chosen": -419.7554626464844, |
|
"logps/rejected": -551.4823608398438, |
|
"loss": 0.5299, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.718276023864746, |
|
"rewards/margins": 1.1899365186691284, |
|
"rewards/rejected": -2.908212661743164, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.5062380038387716, |
|
"grad_norm": 9.651741347788393, |
|
"learning_rate": 2.8806390020426555e-07, |
|
"logits/chosen": -0.7773910760879517, |
|
"logits/rejected": -0.7731062173843384, |
|
"logps/chosen": -431.44317626953125, |
|
"logps/rejected": -574.38720703125, |
|
"loss": 0.4257, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.687414526939392, |
|
"rewards/margins": 1.4306485652923584, |
|
"rewards/rejected": -3.11806321144104, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.508637236084453, |
|
"grad_norm": 13.48177862842598, |
|
"learning_rate": 2.8599316436148187e-07, |
|
"logits/chosen": -0.6940504312515259, |
|
"logits/rejected": -0.6836844682693481, |
|
"logps/chosen": -445.3380432128906, |
|
"logps/rejected": -537.2091064453125, |
|
"loss": 0.4344, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.9495445489883423, |
|
"rewards/margins": 0.9152109026908875, |
|
"rewards/rejected": -2.864755153656006, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.5110364683301344, |
|
"grad_norm": 13.289305185609427, |
|
"learning_rate": 2.8391990374121723e-07, |
|
"logits/chosen": -0.726272463798523, |
|
"logits/rejected": -0.7124502062797546, |
|
"logps/chosen": -440.59832763671875, |
|
"logps/rejected": -601.8394775390625, |
|
"loss": 0.4771, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.0392353534698486, |
|
"rewards/margins": 1.3474972248077393, |
|
"rewards/rejected": -3.386732816696167, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.5134357005758158, |
|
"grad_norm": 12.7036271351969, |
|
"learning_rate": 2.818442637744669e-07, |
|
"logits/chosen": -0.7679746747016907, |
|
"logits/rejected": -0.7643837332725525, |
|
"logps/chosen": -455.5375061035156, |
|
"logps/rejected": -597.4730224609375, |
|
"loss": 0.4634, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.0901169776916504, |
|
"rewards/margins": 1.3490521907806396, |
|
"rewards/rejected": -3.439169406890869, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.5158349328214972, |
|
"grad_norm": 10.384572302471735, |
|
"learning_rate": 2.797663900591284e-07, |
|
"logits/chosen": -0.7398999333381653, |
|
"logits/rejected": -0.7659087181091309, |
|
"logps/chosen": -447.51593017578125, |
|
"logps/rejected": -535.8583984375, |
|
"loss": 0.4256, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.8267303705215454, |
|
"rewards/margins": 1.21419358253479, |
|
"rewards/rejected": -3.040923833847046, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.5182341650671785, |
|
"grad_norm": 12.512282425185987, |
|
"learning_rate": 2.776864283497874e-07, |
|
"logits/chosen": -0.7339873909950256, |
|
"logits/rejected": -0.7800690531730652, |
|
"logps/chosen": -417.85107421875, |
|
"logps/rejected": -591.3509521484375, |
|
"loss": 0.4684, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.8667776584625244, |
|
"rewards/margins": 1.7389957904815674, |
|
"rewards/rejected": -3.6057732105255127, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.5206333973128598, |
|
"grad_norm": 9.865835212510957, |
|
"learning_rate": 2.756045245474943e-07, |
|
"logits/chosen": -0.6862331628799438, |
|
"logits/rejected": -0.6869142651557922, |
|
"logps/chosen": -441.952880859375, |
|
"logps/rejected": -546.9207153320312, |
|
"loss": 0.4634, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.6532140970230103, |
|
"rewards/margins": 0.8384655714035034, |
|
"rewards/rejected": -2.4916796684265137, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.5230326295585412, |
|
"grad_norm": 16.184183395491957, |
|
"learning_rate": 2.7352082468952977e-07, |
|
"logits/chosen": -0.7555044889450073, |
|
"logits/rejected": -0.7893722057342529, |
|
"logps/chosen": -431.03448486328125, |
|
"logps/rejected": -618.13427734375, |
|
"loss": 0.4812, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.9443515539169312, |
|
"rewards/margins": 1.6091206073760986, |
|
"rewards/rejected": -3.5534720420837402, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.5254318618042226, |
|
"grad_norm": 14.810315270160304, |
|
"learning_rate": 2.7143547493916e-07, |
|
"logits/chosen": -0.7784820795059204, |
|
"logits/rejected": -0.7843117117881775, |
|
"logps/chosen": -398.50506591796875, |
|
"logps/rejected": -604.5455322265625, |
|
"loss": 0.4501, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.4673595428466797, |
|
"rewards/margins": 2.0520424842834473, |
|
"rewards/rejected": -3.519402027130127, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.527831094049904, |
|
"grad_norm": 14.870480901797459, |
|
"learning_rate": 2.693486215753853e-07, |
|
"logits/chosen": -0.7710849046707153, |
|
"logits/rejected": -0.7755874395370483, |
|
"logps/chosen": -432.92803955078125, |
|
"logps/rejected": -596.2999877929688, |
|
"loss": 0.467, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.9311811923980713, |
|
"rewards/margins": 1.8095871210098267, |
|
"rewards/rejected": -3.7407684326171875, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.5302303262955854, |
|
"grad_norm": 15.695210321976734, |
|
"learning_rate": 2.6726041098267805e-07, |
|
"logits/chosen": -0.8125902414321899, |
|
"logits/rejected": -0.8185180425643921, |
|
"logps/chosen": -507.8814392089844, |
|
"logps/rejected": -558.718994140625, |
|
"loss": 0.4788, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.2443294525146484, |
|
"rewards/margins": 0.8388055562973022, |
|
"rewards/rejected": -3.083134889602661, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.5326295585412668, |
|
"grad_norm": 16.11416540421591, |
|
"learning_rate": 2.6517098964071507e-07, |
|
"logits/chosen": -0.7117936015129089, |
|
"logits/rejected": -0.7254031896591187, |
|
"logps/chosen": -454.80242919921875, |
|
"logps/rejected": -549.0224609375, |
|
"loss": 0.511, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.9632365703582764, |
|
"rewards/margins": 0.8028262257575989, |
|
"rewards/rejected": -2.7660632133483887, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.5350287907869482, |
|
"grad_norm": 13.538871078171768, |
|
"learning_rate": 2.630805041141023e-07, |
|
"logits/chosen": -0.7859424352645874, |
|
"logits/rejected": -0.7693944573402405, |
|
"logps/chosen": -402.9466857910156, |
|
"logps/rejected": -582.7056884765625, |
|
"loss": 0.4555, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.7725486755371094, |
|
"rewards/margins": 1.6191837787628174, |
|
"rewards/rejected": -3.3917324542999268, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.5374280230326296, |
|
"grad_norm": 14.707908117003884, |
|
"learning_rate": 2.609891010420941e-07, |
|
"logits/chosen": -0.8051595687866211, |
|
"logits/rejected": -0.7927287817001343, |
|
"logps/chosen": -450.76519775390625, |
|
"logps/rejected": -590.6575317382812, |
|
"loss": 0.4335, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.906309723854065, |
|
"rewards/margins": 1.394263744354248, |
|
"rewards/rejected": -3.3005733489990234, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.539827255278311, |
|
"grad_norm": 13.589410553380741, |
|
"learning_rate": 2.5889692712830674e-07, |
|
"logits/chosen": -0.7646986246109009, |
|
"logits/rejected": -0.7824346423149109, |
|
"logps/chosen": -391.0314025878906, |
|
"logps/rejected": -513.9915771484375, |
|
"loss": 0.4367, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.665148377418518, |
|
"rewards/margins": 1.241813063621521, |
|
"rewards/rejected": -2.90696120262146, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.5422264875239923, |
|
"grad_norm": 13.492840980385337, |
|
"learning_rate": 2.5680412913042843e-07, |
|
"logits/chosen": -0.7536166906356812, |
|
"logits/rejected": -0.7371748685836792, |
|
"logps/chosen": -442.3362731933594, |
|
"logps/rejected": -595.1715087890625, |
|
"loss": 0.4245, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.0182454586029053, |
|
"rewards/margins": 1.4872602224349976, |
|
"rewards/rejected": -3.5055058002471924, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.5446257197696737, |
|
"grad_norm": 14.894715683363456, |
|
"learning_rate": 2.5471085384992404e-07, |
|
"logits/chosen": -0.8191806674003601, |
|
"logits/rejected": -0.806193470954895, |
|
"logps/chosen": -411.95147705078125, |
|
"logps/rejected": -647.9240112304688, |
|
"loss": 0.421, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.7768102884292603, |
|
"rewards/margins": 2.2000908851623535, |
|
"rewards/rejected": -3.9769012928009033, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.5470249520153551, |
|
"grad_norm": 11.287950370775208, |
|
"learning_rate": 2.526172481217381e-07, |
|
"logits/chosen": -0.7717296481132507, |
|
"logits/rejected": -0.7522517442703247, |
|
"logps/chosen": -446.72607421875, |
|
"logps/rejected": -596.8885498046875, |
|
"loss": 0.4738, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.3675601482391357, |
|
"rewards/margins": 1.335943579673767, |
|
"rewards/rejected": -3.703503370285034, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.5494241842610365, |
|
"grad_norm": 17.56101375995895, |
|
"learning_rate": 2.5052345880399456e-07, |
|
"logits/chosen": -0.8398829698562622, |
|
"logits/rejected": -0.8727308511734009, |
|
"logps/chosen": -449.156982421875, |
|
"logps/rejected": -570.5628662109375, |
|
"loss": 0.434, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.23502779006958, |
|
"rewards/margins": 1.1653441190719604, |
|
"rewards/rejected": -3.400371551513672, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.5518234165067178, |
|
"grad_norm": 11.302567518557918, |
|
"learning_rate": 2.4842963276769555e-07, |
|
"logits/chosen": -0.7016631364822388, |
|
"logits/rejected": -0.6639502048492432, |
|
"logps/chosen": -437.28399658203125, |
|
"logps/rejected": -608.0369873046875, |
|
"loss": 0.4484, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.180513858795166, |
|
"rewards/margins": 1.2660033702850342, |
|
"rewards/rejected": -3.446516752243042, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.5542226487523992, |
|
"grad_norm": 11.742895780710647, |
|
"learning_rate": 2.463359168864189e-07, |
|
"logits/chosen": -0.6675876379013062, |
|
"logits/rejected": -0.7527247667312622, |
|
"logps/chosen": -516.6282958984375, |
|
"logps/rejected": -603.8510131835938, |
|
"loss": 0.4765, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.1180880069732666, |
|
"rewards/margins": 1.259250283241272, |
|
"rewards/rejected": -3.377338409423828, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.5566218809980806, |
|
"grad_norm": 15.611988191868031, |
|
"learning_rate": 2.4424245802601555e-07, |
|
"logits/chosen": -0.7548837065696716, |
|
"logits/rejected": -0.7677526473999023, |
|
"logps/chosen": -436.63763427734375, |
|
"logps/rejected": -580.12109375, |
|
"loss": 0.448, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.0357186794281006, |
|
"rewards/margins": 0.8878978490829468, |
|
"rewards/rejected": -2.923617124557495, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.559021113243762, |
|
"grad_norm": 11.963131637466097, |
|
"learning_rate": 2.421494030343072e-07, |
|
"logits/chosen": -0.6403541564941406, |
|
"logits/rejected": -0.6971467137336731, |
|
"logps/chosen": -463.41668701171875, |
|
"logps/rejected": -511.3497009277344, |
|
"loss": 0.5057, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.029705047607422, |
|
"rewards/margins": 0.9956240653991699, |
|
"rewards/rejected": -3.025329113006592, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.5614203454894434, |
|
"grad_norm": 12.457563977370029, |
|
"learning_rate": 2.400568987307861e-07, |
|
"logits/chosen": -0.6719120740890503, |
|
"logits/rejected": -0.6879553198814392, |
|
"logps/chosen": -428.35992431640625, |
|
"logps/rejected": -484.6227111816406, |
|
"loss": 0.4224, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -2.0458950996398926, |
|
"rewards/margins": 0.6281440258026123, |
|
"rewards/rejected": -2.674039363861084, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.5638195777351248, |
|
"grad_norm": 11.92177172958945, |
|
"learning_rate": 2.379650918963156e-07, |
|
"logits/chosen": -0.7526946663856506, |
|
"logits/rejected": -0.7413941025733948, |
|
"logps/chosen": -406.4283447265625, |
|
"logps/rejected": -570.8126220703125, |
|
"loss": 0.4205, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.06895112991333, |
|
"rewards/margins": 1.500595211982727, |
|
"rewards/rejected": -3.5695462226867676, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.5662188099808061, |
|
"grad_norm": 16.624039363219666, |
|
"learning_rate": 2.3587412926283438e-07, |
|
"logits/chosen": -0.7837602496147156, |
|
"logits/rejected": -0.7445356249809265, |
|
"logps/chosen": -491.05059814453125, |
|
"logps/rejected": -638.3920288085938, |
|
"loss": 0.4727, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.9546314477920532, |
|
"rewards/margins": 1.7906033992767334, |
|
"rewards/rejected": -3.745234966278076, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.5686180422264875, |
|
"grad_norm": 9.978773646984067, |
|
"learning_rate": 2.337841575030642e-07, |
|
"logits/chosen": -0.6353663802146912, |
|
"logits/rejected": -0.670727014541626, |
|
"logps/chosen": -482.7936096191406, |
|
"logps/rejected": -629.0820922851562, |
|
"loss": 0.4658, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.9853990077972412, |
|
"rewards/margins": 1.31783127784729, |
|
"rewards/rejected": -3.3032302856445312, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.5710172744721689, |
|
"grad_norm": 10.175503774195834, |
|
"learning_rate": 2.316953232202206e-07, |
|
"logits/chosen": -0.6700790524482727, |
|
"logits/rejected": -0.7611835598945618, |
|
"logps/chosen": -429.82666015625, |
|
"logps/rejected": -465.277099609375, |
|
"loss": 0.423, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.9541041851043701, |
|
"rewards/margins": 0.9658061861991882, |
|
"rewards/rejected": -2.9199106693267822, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.5734165067178503, |
|
"grad_norm": 11.211484923712767, |
|
"learning_rate": 2.2960777293772958e-07, |
|
"logits/chosen": -0.6513696908950806, |
|
"logits/rejected": -0.6979095339775085, |
|
"logps/chosen": -405.17120361328125, |
|
"logps/rejected": -547.6156005859375, |
|
"loss": 0.4611, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.898032546043396, |
|
"rewards/margins": 1.5655709505081177, |
|
"rewards/rejected": -3.4636034965515137, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.5758157389635317, |
|
"grad_norm": 10.432280326904475, |
|
"learning_rate": 2.2752165308894974e-07, |
|
"logits/chosen": -0.6997479200363159, |
|
"logits/rejected": -0.6795819401741028, |
|
"logps/chosen": -405.59454345703125, |
|
"logps/rejected": -530.2125244140625, |
|
"loss": 0.4443, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.1597695350646973, |
|
"rewards/margins": 1.3249900341033936, |
|
"rewards/rejected": -3.484759569168091, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.5782149712092131, |
|
"grad_norm": 16.8329792009058, |
|
"learning_rate": 2.254371100069005e-07, |
|
"logits/chosen": -0.6621764898300171, |
|
"logits/rejected": -0.612916111946106, |
|
"logps/chosen": -440.20538330078125, |
|
"logps/rejected": -585.4799194335938, |
|
"loss": 0.4638, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.897138237953186, |
|
"rewards/margins": 1.18193781375885, |
|
"rewards/rejected": -3.0790762901306152, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.5806142034548945, |
|
"grad_norm": 16.7833808451815, |
|
"learning_rate": 2.2335428991399725e-07, |
|
"logits/chosen": -0.6124377846717834, |
|
"logits/rejected": -0.6342424154281616, |
|
"logps/chosen": -424.91595458984375, |
|
"logps/rejected": -682.1309814453125, |
|
"loss": 0.4579, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.3315539360046387, |
|
"rewards/margins": 2.402623414993286, |
|
"rewards/rejected": -4.734177112579346, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.5830134357005758, |
|
"grad_norm": 11.37683798430042, |
|
"learning_rate": 2.2127333891179458e-07, |
|
"logits/chosen": -0.6946436166763306, |
|
"logits/rejected": -0.7337637543678284, |
|
"logps/chosen": -401.97235107421875, |
|
"logps/rejected": -590.7584228515625, |
|
"loss": 0.4648, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.964185118675232, |
|
"rewards/margins": 1.5734838247299194, |
|
"rewards/rejected": -3.5376694202423096, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.5854126679462572, |
|
"grad_norm": 14.137637654420558, |
|
"learning_rate": 2.1919440297073782e-07, |
|
"logits/chosen": -0.6848565340042114, |
|
"logits/rejected": -0.7031614184379578, |
|
"logps/chosen": -389.65863037109375, |
|
"logps/rejected": -572.2056884765625, |
|
"loss": 0.4726, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.85574471950531, |
|
"rewards/margins": 1.6927335262298584, |
|
"rewards/rejected": -3.548478364944458, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.5878119001919386, |
|
"grad_norm": 11.976818733935065, |
|
"learning_rate": 2.1711762791992368e-07, |
|
"logits/chosen": -0.6271128058433533, |
|
"logits/rejected": -0.6209608316421509, |
|
"logps/chosen": -453.7582092285156, |
|
"logps/rejected": -558.2598876953125, |
|
"loss": 0.464, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.6763954162597656, |
|
"rewards/margins": 1.3264000415802002, |
|
"rewards/rejected": -3.002795696258545, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.5902111324376199, |
|
"grad_norm": 12.353087005836548, |
|
"learning_rate": 2.1504315943687114e-07, |
|
"logits/chosen": -0.7643033862113953, |
|
"logits/rejected": -0.736054539680481, |
|
"logps/chosen": -416.31512451171875, |
|
"logps/rejected": -612.2337646484375, |
|
"loss": 0.4454, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.881116509437561, |
|
"rewards/margins": 1.4656778573989868, |
|
"rewards/rejected": -3.346794605255127, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.5926103646833013, |
|
"grad_norm": 15.676634180170838, |
|
"learning_rate": 2.1297114303730248e-07, |
|
"logits/chosen": -0.6767653226852417, |
|
"logits/rejected": -0.6186730265617371, |
|
"logps/chosen": -407.5095520019531, |
|
"logps/rejected": -585.6527709960938, |
|
"loss": 0.4968, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.816948652267456, |
|
"rewards/margins": 1.2787069082260132, |
|
"rewards/rejected": -3.0956554412841797, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.5950095969289827, |
|
"grad_norm": 12.693263479126406, |
|
"learning_rate": 2.1090172406493616e-07, |
|
"logits/chosen": -0.6466863751411438, |
|
"logits/rejected": -0.6392595171928406, |
|
"logps/chosen": -390.559326171875, |
|
"logps/rejected": -549.0421142578125, |
|
"loss": 0.4125, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.5861185789108276, |
|
"rewards/margins": 1.4114429950714111, |
|
"rewards/rejected": -2.997561454772949, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.5974088291746641, |
|
"grad_norm": 14.530343733322098, |
|
"learning_rate": 2.0883504768129146e-07, |
|
"logits/chosen": -0.7057468295097351, |
|
"logits/rejected": -0.7234060764312744, |
|
"logps/chosen": -470.16229248046875, |
|
"logps/rejected": -603.843994140625, |
|
"loss": 0.478, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.9950097799301147, |
|
"rewards/margins": 1.3909389972686768, |
|
"rewards/rejected": -3.385948896408081, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.5998080614203455, |
|
"grad_norm": 13.287779534586859, |
|
"learning_rate": 2.0677125885550571e-07, |
|
"logits/chosen": -0.5642179846763611, |
|
"logits/rejected": -0.6275255084037781, |
|
"logps/chosen": -422.0602111816406, |
|
"logps/rejected": -509.85833740234375, |
|
"loss": 0.4554, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.8453181982040405, |
|
"rewards/margins": 1.3027547597885132, |
|
"rewards/rejected": -3.1480727195739746, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.6022072936660269, |
|
"grad_norm": 15.517152991224327, |
|
"learning_rate": 2.0471050235416587e-07, |
|
"logits/chosen": -0.6314137578010559, |
|
"logits/rejected": -0.7092536091804504, |
|
"logps/chosen": -456.7703552246094, |
|
"logps/rejected": -560.7614135742188, |
|
"loss": 0.4175, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.005941390991211, |
|
"rewards/margins": 1.4926408529281616, |
|
"rewards/rejected": -3.498582363128662, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.6046065259117083, |
|
"grad_norm": 15.430444876775148, |
|
"learning_rate": 2.026529227311532e-07, |
|
"logits/chosen": -0.696445107460022, |
|
"logits/rejected": -0.6817110776901245, |
|
"logps/chosen": -419.6112365722656, |
|
"logps/rejected": -545.1299438476562, |
|
"loss": 0.4725, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.074556827545166, |
|
"rewards/margins": 1.122429609298706, |
|
"rewards/rejected": -3.196986436843872, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.6070057581573897, |
|
"grad_norm": 12.135680251339695, |
|
"learning_rate": 2.005986643175036e-07, |
|
"logits/chosen": -0.6033408045768738, |
|
"logits/rejected": -0.5708281993865967, |
|
"logps/chosen": -444.126220703125, |
|
"logps/rejected": -621.4119873046875, |
|
"loss": 0.3872, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.6977285146713257, |
|
"rewards/margins": 1.8118762969970703, |
|
"rewards/rejected": -3.5096049308776855, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.6094049904030711, |
|
"grad_norm": 16.900656042802993, |
|
"learning_rate": 1.9854787121128328e-07, |
|
"logits/chosen": -0.6752146482467651, |
|
"logits/rejected": -0.6918280124664307, |
|
"logps/chosen": -413.39178466796875, |
|
"logps/rejected": -453.151123046875, |
|
"loss": 0.4863, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.9585962295532227, |
|
"rewards/margins": 0.8429145812988281, |
|
"rewards/rejected": -2.8015105724334717, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.6118042226487524, |
|
"grad_norm": 14.460169065470863, |
|
"learning_rate": 1.9650068726748106e-07, |
|
"logits/chosen": -0.6060270667076111, |
|
"logits/rejected": -0.6441822052001953, |
|
"logps/chosen": -460.1835021972656, |
|
"logps/rejected": -609.755126953125, |
|
"loss": 0.4705, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.047046184539795, |
|
"rewards/margins": 1.4443639516830444, |
|
"rewards/rejected": -3.4914097785949707, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.6142034548944337, |
|
"grad_norm": 14.47008564939435, |
|
"learning_rate": 1.9445725608791718e-07, |
|
"logits/chosen": -0.6000035405158997, |
|
"logits/rejected": -0.6268490552902222, |
|
"logps/chosen": -412.12322998046875, |
|
"logps/rejected": -622.6077270507812, |
|
"loss": 0.4549, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.5705702304840088, |
|
"rewards/margins": 2.0281901359558105, |
|
"rewards/rejected": -3.5987606048583984, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.6166026871401151, |
|
"grad_norm": 16.15106890491746, |
|
"learning_rate": 1.924177210111705e-07, |
|
"logits/chosen": -0.6954480409622192, |
|
"logits/rejected": -0.7049099206924438, |
|
"logps/chosen": -394.63970947265625, |
|
"logps/rejected": -582.3255004882812, |
|
"loss": 0.4508, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.777197241783142, |
|
"rewards/margins": 1.7624857425689697, |
|
"rewards/rejected": -3.5396828651428223, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.6190019193857965, |
|
"grad_norm": 11.273315658642467, |
|
"learning_rate": 1.9038222510252364e-07, |
|
"logits/chosen": -0.6852430105209351, |
|
"logits/rejected": -0.6738708019256592, |
|
"logps/chosen": -441.40020751953125, |
|
"logps/rejected": -517.69921875, |
|
"loss": 0.4767, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.9201513528823853, |
|
"rewards/margins": 0.8821185231208801, |
|
"rewards/rejected": -2.8022701740264893, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.6214011516314779, |
|
"grad_norm": 13.419885609170674, |
|
"learning_rate": 1.883509111439277e-07, |
|
"logits/chosen": -0.5949097275733948, |
|
"logits/rejected": -0.5894945859909058, |
|
"logps/chosen": -434.9554138183594, |
|
"logps/rejected": -670.6376953125, |
|
"loss": 0.4836, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.0458106994628906, |
|
"rewards/margins": 1.706055998802185, |
|
"rewards/rejected": -3.7518672943115234, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.6238003838771593, |
|
"grad_norm": 10.25337511089072, |
|
"learning_rate": 1.8632392162398665e-07, |
|
"logits/chosen": -0.6396509408950806, |
|
"logits/rejected": -0.6442984342575073, |
|
"logps/chosen": -447.4217834472656, |
|
"logps/rejected": -649.7928466796875, |
|
"loss": 0.4354, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -1.6608057022094727, |
|
"rewards/margins": 2.017603874206543, |
|
"rewards/rejected": -3.6784095764160156, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.6261996161228407, |
|
"grad_norm": 12.615964200890309, |
|
"learning_rate": 1.84301398727962e-07, |
|
"logits/chosen": -0.6086243391036987, |
|
"logits/rejected": -0.5457442998886108, |
|
"logps/chosen": -363.9615173339844, |
|
"logps/rejected": -625.5221557617188, |
|
"loss": 0.4274, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.8102554082870483, |
|
"rewards/margins": 2.2328672409057617, |
|
"rewards/rejected": -4.0431227684021, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.6285988483685221, |
|
"grad_norm": 13.561387993292696, |
|
"learning_rate": 1.8228348432779966e-07, |
|
"logits/chosen": -0.6876164078712463, |
|
"logits/rejected": -0.6831247210502625, |
|
"logps/chosen": -427.48388671875, |
|
"logps/rejected": -554.5697631835938, |
|
"loss": 0.5102, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.0380465984344482, |
|
"rewards/margins": 1.3742176294326782, |
|
"rewards/rejected": -3.412264347076416, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.6309980806142035, |
|
"grad_norm": 11.884741198970012, |
|
"learning_rate": 1.8027031997217773e-07, |
|
"logits/chosen": -0.7005245089530945, |
|
"logits/rejected": -0.7127174139022827, |
|
"logps/chosen": -418.88031005859375, |
|
"logps/rejected": -650.9946899414062, |
|
"loss": 0.3794, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.1531500816345215, |
|
"rewards/margins": 2.098936080932617, |
|
"rewards/rejected": -4.2520856857299805, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.6333973128598849, |
|
"grad_norm": 13.756259667587228, |
|
"learning_rate": 1.7826204687657758e-07, |
|
"logits/chosen": -0.6128356456756592, |
|
"logits/rejected": -0.5883530378341675, |
|
"logps/chosen": -479.2039489746094, |
|
"logps/rejected": -541.1580200195312, |
|
"loss": 0.4189, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.0355639457702637, |
|
"rewards/margins": 1.0127185583114624, |
|
"rewards/rejected": -3.0482823848724365, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.6357965451055663, |
|
"grad_norm": 16.334230649896075, |
|
"learning_rate": 1.762588059133781e-07, |
|
"logits/chosen": -0.5741305947303772, |
|
"logits/rejected": -0.5979640483856201, |
|
"logps/chosen": -492.15985107421875, |
|
"logps/rejected": -616.077880859375, |
|
"loss": 0.4344, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.001828670501709, |
|
"rewards/margins": 1.6204664707183838, |
|
"rewards/rejected": -3.6222949028015137, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.6381957773512476, |
|
"grad_norm": 12.818149694157945, |
|
"learning_rate": 1.7426073760197406e-07, |
|
"logits/chosen": -0.7115119099617004, |
|
"logits/rejected": -0.7030835151672363, |
|
"logps/chosen": -436.45440673828125, |
|
"logps/rejected": -656.4097900390625, |
|
"loss": 0.464, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.0036306381225586, |
|
"rewards/margins": 1.860400915145874, |
|
"rewards/rejected": -3.864032030105591, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.6405950095969289, |
|
"grad_norm": 10.772055196711287, |
|
"learning_rate": 1.7226798209891935e-07, |
|
"logits/chosen": -0.5705487132072449, |
|
"logits/rejected": -0.6100784540176392, |
|
"logps/chosen": -454.169677734375, |
|
"logps/rejected": -567.9891967773438, |
|
"loss": 0.4243, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -2.027036666870117, |
|
"rewards/margins": 1.6898279190063477, |
|
"rewards/rejected": -3.716864824295044, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.6429942418426103, |
|
"grad_norm": 12.732993242920942, |
|
"learning_rate": 1.7028067918809535e-07, |
|
"logits/chosen": -0.6443219184875488, |
|
"logits/rejected": -0.6607564687728882, |
|
"logps/chosen": -408.63385009765625, |
|
"logps/rejected": -678.4671630859375, |
|
"loss": 0.4429, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.9784246683120728, |
|
"rewards/margins": 2.1958975791931152, |
|
"rewards/rejected": -4.174322605133057, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.6453934740882917, |
|
"grad_norm": 16.43098937212258, |
|
"learning_rate": 1.6829896827090584e-07, |
|
"logits/chosen": -0.6939103007316589, |
|
"logits/rejected": -0.6823415756225586, |
|
"logps/chosen": -444.9854431152344, |
|
"logps/rejected": -528.9821166992188, |
|
"loss": 0.4604, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.0313303470611572, |
|
"rewards/margins": 1.1062291860580444, |
|
"rewards/rejected": -3.137559652328491, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.6477927063339731, |
|
"grad_norm": 9.363672145947863, |
|
"learning_rate": 1.6632298835649844e-07, |
|
"logits/chosen": -0.5836836099624634, |
|
"logits/rejected": -0.5799709558486938, |
|
"logps/chosen": -470.08197021484375, |
|
"logps/rejected": -686.9669799804688, |
|
"loss": 0.4069, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.0098516941070557, |
|
"rewards/margins": 1.7816604375839233, |
|
"rewards/rejected": -3.7915122509002686, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.6501919385796545, |
|
"grad_norm": 17.2324020009346, |
|
"learning_rate": 1.6435287805201364e-07, |
|
"logits/chosen": -0.5617779493331909, |
|
"logits/rejected": -0.5524694919586182, |
|
"logps/chosen": -467.46075439453125, |
|
"logps/rejected": -561.6852416992188, |
|
"loss": 0.4668, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.1388306617736816, |
|
"rewards/margins": 1.0130404233932495, |
|
"rewards/rejected": -3.1518714427948, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.6525911708253359, |
|
"grad_norm": 12.016483428799015, |
|
"learning_rate": 1.6238877555286207e-07, |
|
"logits/chosen": -0.6310284733772278, |
|
"logits/rejected": -0.6076905727386475, |
|
"logps/chosen": -451.049560546875, |
|
"logps/rejected": -625.4998779296875, |
|
"loss": 0.4317, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.715829849243164, |
|
"rewards/margins": 1.5645663738250732, |
|
"rewards/rejected": -3.280395984649658, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.6549904030710173, |
|
"grad_norm": 13.942118191847904, |
|
"learning_rate": 1.60430818633031e-07, |
|
"logits/chosen": -0.6367970705032349, |
|
"logits/rejected": -0.6443176865577698, |
|
"logps/chosen": -442.45428466796875, |
|
"logps/rejected": -603.2288818359375, |
|
"loss": 0.4292, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.833968162536621, |
|
"rewards/margins": 1.6198402643203735, |
|
"rewards/rejected": -3.453808546066284, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.6573896353166987, |
|
"grad_norm": 12.960755123491403, |
|
"learning_rate": 1.5847914463541939e-07, |
|
"logits/chosen": -0.6094954013824463, |
|
"logits/rejected": -0.616841197013855, |
|
"logps/chosen": -387.78448486328125, |
|
"logps/rejected": -546.4832763671875, |
|
"loss": 0.4165, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.933266043663025, |
|
"rewards/margins": 1.3234989643096924, |
|
"rewards/rejected": -3.2567648887634277, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.6597888675623801, |
|
"grad_norm": 11.216331431155993, |
|
"learning_rate": 1.5653389046220427e-07, |
|
"logits/chosen": -0.558444619178772, |
|
"logits/rejected": -0.5738928318023682, |
|
"logps/chosen": -416.5931091308594, |
|
"logps/rejected": -565.6954345703125, |
|
"loss": 0.4206, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.8503406047821045, |
|
"rewards/margins": 1.2912790775299072, |
|
"rewards/rejected": -3.141619920730591, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.6621880998080614, |
|
"grad_norm": 14.796243391579123, |
|
"learning_rate": 1.545951925652375e-07, |
|
"logits/chosen": -0.5394322872161865, |
|
"logits/rejected": -0.5567634105682373, |
|
"logps/chosen": -502.76947021484375, |
|
"logps/rejected": -626.8363037109375, |
|
"loss": 0.4198, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -1.9490293264389038, |
|
"rewards/margins": 1.7876237630844116, |
|
"rewards/rejected": -3.7366535663604736, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.6645873320537428, |
|
"grad_norm": 13.413386618717803, |
|
"learning_rate": 1.5266318693647423e-07, |
|
"logits/chosen": -0.5417942404747009, |
|
"logits/rejected": -0.5404913425445557, |
|
"logps/chosen": -455.65264892578125, |
|
"logps/rejected": -571.8460693359375, |
|
"loss": 0.4333, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.9460630416870117, |
|
"rewards/margins": 1.299930453300476, |
|
"rewards/rejected": -3.2459938526153564, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.6669865642994242, |
|
"grad_norm": 16.515416936082715, |
|
"learning_rate": 1.5073800909843353e-07, |
|
"logits/chosen": -0.5896440744400024, |
|
"logits/rejected": -0.6135233640670776, |
|
"logps/chosen": -448.375732421875, |
|
"logps/rejected": -550.3060302734375, |
|
"loss": 0.4321, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.8330185413360596, |
|
"rewards/margins": 1.5391408205032349, |
|
"rewards/rejected": -3.372159481048584, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.6693857965451055, |
|
"grad_norm": 16.63287415870903, |
|
"learning_rate": 1.488197940946922e-07, |
|
"logits/chosen": -0.5957229733467102, |
|
"logits/rejected": -0.5872025489807129, |
|
"logps/chosen": -433.13287353515625, |
|
"logps/rejected": -544.17724609375, |
|
"loss": 0.4116, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.5587198734283447, |
|
"rewards/margins": 1.6922566890716553, |
|
"rewards/rejected": -3.250977039337158, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.6717850287907869, |
|
"grad_norm": 17.943322401731898, |
|
"learning_rate": 1.4690867648041167e-07, |
|
"logits/chosen": -0.5465134978294373, |
|
"logits/rejected": -0.5925148725509644, |
|
"logps/chosen": -427.1700134277344, |
|
"logps/rejected": -597.7564697265625, |
|
"loss": 0.463, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.7293113470077515, |
|
"rewards/margins": 1.9395873546600342, |
|
"rewards/rejected": -3.668898820877075, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.6741842610364683, |
|
"grad_norm": 13.110879243171492, |
|
"learning_rate": 1.4500479031289987e-07, |
|
"logits/chosen": -0.5818893313407898, |
|
"logits/rejected": -0.6302607655525208, |
|
"logps/chosen": -445.3763732910156, |
|
"logps/rejected": -590.7268676757812, |
|
"loss": 0.4776, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.6836143732070923, |
|
"rewards/margins": 1.5271230936050415, |
|
"rewards/rejected": -3.2107372283935547, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.6765834932821497, |
|
"grad_norm": 12.058976212342188, |
|
"learning_rate": 1.4310826914220747e-07, |
|
"logits/chosen": -0.6128555536270142, |
|
"logits/rejected": -0.6189436912536621, |
|
"logps/chosen": -497.07183837890625, |
|
"logps/rejected": -601.4428100585938, |
|
"loss": 0.4442, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.8764116764068604, |
|
"rewards/margins": 1.2488231658935547, |
|
"rewards/rejected": -3.125235080718994, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.6789827255278311, |
|
"grad_norm": 17.97354363119042, |
|
"learning_rate": 1.412192460017597e-07, |
|
"logits/chosen": -0.6210779547691345, |
|
"logits/rejected": -0.6108576655387878, |
|
"logps/chosen": -455.32391357421875, |
|
"logps/rejected": -592.3138427734375, |
|
"loss": 0.4641, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.1085453033447266, |
|
"rewards/margins": 1.3274444341659546, |
|
"rewards/rejected": -3.4359898567199707, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.6813819577735125, |
|
"grad_norm": 11.08581581164283, |
|
"learning_rate": 1.3933785339902504e-07, |
|
"logits/chosen": -0.6120859384536743, |
|
"logits/rejected": -0.5777018666267395, |
|
"logps/chosen": -392.5328674316406, |
|
"logps/rejected": -571.60986328125, |
|
"loss": 0.4874, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.9550358057022095, |
|
"rewards/margins": 1.3144079446792603, |
|
"rewards/rejected": -3.2694435119628906, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.6837811900191939, |
|
"grad_norm": 11.419675800311689, |
|
"learning_rate": 1.374642233062197e-07, |
|
"logits/chosen": -0.5789315104484558, |
|
"logits/rejected": -0.5940367579460144, |
|
"logps/chosen": -489.6767578125, |
|
"logps/rejected": -603.5349731445312, |
|
"loss": 0.4489, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.0280773639678955, |
|
"rewards/margins": 1.4480621814727783, |
|
"rewards/rejected": -3.476139545440674, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.6861804222648752, |
|
"grad_norm": 12.571278512714647, |
|
"learning_rate": 1.355984871510511e-07, |
|
"logits/chosen": -0.5551937818527222, |
|
"logits/rejected": -0.526736319065094, |
|
"logps/chosen": -485.8904724121094, |
|
"logps/rejected": -637.25341796875, |
|
"loss": 0.425, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.9234853982925415, |
|
"rewards/margins": 1.3963334560394287, |
|
"rewards/rejected": -3.3198189735412598, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.6885796545105566, |
|
"grad_norm": 10.614013070716824, |
|
"learning_rate": 1.3374077580749783e-07, |
|
"logits/chosen": -0.576995313167572, |
|
"logits/rejected": -0.5995679497718811, |
|
"logps/chosen": -382.17498779296875, |
|
"logps/rejected": -547.46484375, |
|
"loss": 0.4393, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.897216796875, |
|
"rewards/margins": 1.4525885581970215, |
|
"rewards/rejected": -3.3498051166534424, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.690978886756238, |
|
"grad_norm": 18.09913751507019, |
|
"learning_rate": 1.3189121958663024e-07, |
|
"logits/chosen": -0.5638588666915894, |
|
"logits/rejected": -0.6245552897453308, |
|
"logps/chosen": -522.0628662109375, |
|
"logps/rejected": -575.4417724609375, |
|
"loss": 0.4336, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.396254062652588, |
|
"rewards/margins": 0.8919731974601746, |
|
"rewards/rejected": -3.2882275581359863, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.6933781190019194, |
|
"grad_norm": 15.80771289819406, |
|
"learning_rate": 1.3004994822746895e-07, |
|
"logits/chosen": -0.7042198181152344, |
|
"logits/rejected": -0.697306752204895, |
|
"logps/chosen": -436.90386962890625, |
|
"logps/rejected": -577.8507080078125, |
|
"loss": 0.4769, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.9693387746810913, |
|
"rewards/margins": 1.3294403553009033, |
|
"rewards/rejected": -3.298779010772705, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.6957773512476008, |
|
"grad_norm": 11.587138387929464, |
|
"learning_rate": 1.2821709088788434e-07, |
|
"logits/chosen": -0.5107399821281433, |
|
"logits/rejected": -0.5425523519515991, |
|
"logps/chosen": -380.4593505859375, |
|
"logps/rejected": -541.3460693359375, |
|
"loss": 0.4398, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.7053035497665405, |
|
"rewards/margins": 1.585318922996521, |
|
"rewards/rejected": -3.2906222343444824, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.6981765834932822, |
|
"grad_norm": 15.327291081862692, |
|
"learning_rate": 1.2639277613553736e-07, |
|
"logits/chosen": -0.5802925825119019, |
|
"logits/rejected": -0.5720899105072021, |
|
"logps/chosen": -379.57318115234375, |
|
"logps/rejected": -496.7958068847656, |
|
"loss": 0.4473, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.8436704874038696, |
|
"rewards/margins": 1.1200520992279053, |
|
"rewards/rejected": -2.9637222290039062, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.7005758157389635, |
|
"grad_norm": 11.468692622260464, |
|
"learning_rate": 1.2457713193885975e-07, |
|
"logits/chosen": -0.5771138072013855, |
|
"logits/rejected": -0.5807372331619263, |
|
"logps/chosen": -359.72613525390625, |
|
"logps/rejected": -550.9989013671875, |
|
"loss": 0.4176, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.8891617059707642, |
|
"rewards/margins": 1.5464417934417725, |
|
"rewards/rejected": -3.435603618621826, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.7029750479846449, |
|
"grad_norm": 17.115987201031448, |
|
"learning_rate": 1.2277028565807838e-07, |
|
"logits/chosen": -0.5637086629867554, |
|
"logits/rejected": -0.584968090057373, |
|
"logps/chosen": -446.92767333984375, |
|
"logps/rejected": -574.7530517578125, |
|
"loss": 0.468, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.9414465427398682, |
|
"rewards/margins": 1.3546481132507324, |
|
"rewards/rejected": -3.2960944175720215, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.7053742802303263, |
|
"grad_norm": 17.441141869897656, |
|
"learning_rate": 1.209723640362815e-07, |
|
"logits/chosen": -0.5792838335037231, |
|
"logits/rejected": -0.5826687216758728, |
|
"logps/chosen": -456.9283142089844, |
|
"logps/rejected": -620.7843627929688, |
|
"loss": 0.4813, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.8888061046600342, |
|
"rewards/margins": 1.7339365482330322, |
|
"rewards/rejected": -3.6227424144744873, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.7077735124760077, |
|
"grad_norm": 11.312151269139996, |
|
"learning_rate": 1.191834931905277e-07, |
|
"logits/chosen": -0.5471521615982056, |
|
"logits/rejected": -0.5616979598999023, |
|
"logps/chosen": -510.0934143066406, |
|
"logps/rejected": -647.6993408203125, |
|
"loss": 0.4134, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.116403341293335, |
|
"rewards/margins": 1.3659651279449463, |
|
"rewards/rejected": -3.482367992401123, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.710172744721689, |
|
"grad_norm": 13.959291203129078, |
|
"learning_rate": 1.1740379860299988e-07, |
|
"logits/chosen": -0.5202777981758118, |
|
"logits/rejected": -0.5581659078598022, |
|
"logps/chosen": -476.20635986328125, |
|
"logps/rejected": -612.1878662109375, |
|
"loss": 0.4634, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.9924306869506836, |
|
"rewards/margins": 1.1773386001586914, |
|
"rewards/rejected": -3.169769287109375, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.7125719769673704, |
|
"grad_norm": 13.163034374925202, |
|
"learning_rate": 1.1563340511220254e-07, |
|
"logits/chosen": -0.5559359788894653, |
|
"logits/rejected": -0.5668517351150513, |
|
"logps/chosen": -511.17333984375, |
|
"logps/rejected": -623.8465576171875, |
|
"loss": 0.4817, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.2064290046691895, |
|
"rewards/margins": 1.2941501140594482, |
|
"rewards/rejected": -3.5005791187286377, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.7149712092130518, |
|
"grad_norm": 12.726063519299634, |
|
"learning_rate": 1.1387243690420556e-07, |
|
"logits/chosen": -0.5109056234359741, |
|
"logits/rejected": -0.5152195692062378, |
|
"logps/chosen": -488.728759765625, |
|
"logps/rejected": -659.3638305664062, |
|
"loss": 0.4383, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.865386724472046, |
|
"rewards/margins": 1.8146740198135376, |
|
"rewards/rejected": -3.680060863494873, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.7173704414587332, |
|
"grad_norm": 15.060517596878292, |
|
"learning_rate": 1.1212101750393235e-07, |
|
"logits/chosen": -0.5651146173477173, |
|
"logits/rejected": -0.5633836984634399, |
|
"logps/chosen": -457.1798400878906, |
|
"logps/rejected": -594.5474853515625, |
|
"loss": 0.4016, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.1187145709991455, |
|
"rewards/margins": 1.5976879596710205, |
|
"rewards/rejected": -3.716402530670166, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.7197696737044146, |
|
"grad_norm": 10.840848096813268, |
|
"learning_rate": 1.1037926976649562e-07, |
|
"logits/chosen": -0.6062291860580444, |
|
"logits/rejected": -0.5924742817878723, |
|
"logps/chosen": -467.4085388183594, |
|
"logps/rejected": -654.9591674804688, |
|
"loss": 0.4878, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.1459498405456543, |
|
"rewards/margins": 1.5907325744628906, |
|
"rewards/rejected": -3.736682415008545, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.722168905950096, |
|
"grad_norm": 15.042187958894935, |
|
"learning_rate": 1.0864731586857936e-07, |
|
"logits/chosen": -0.4485263228416443, |
|
"logits/rejected": -0.44067448377609253, |
|
"logps/chosen": -470.13055419921875, |
|
"logps/rejected": -602.8482666015625, |
|
"loss": 0.4195, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -1.9293149709701538, |
|
"rewards/margins": 1.6746339797973633, |
|
"rewards/rejected": -3.6039490699768066, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.7245681381957774, |
|
"grad_norm": 12.194246416479364, |
|
"learning_rate": 1.0692527729986839e-07, |
|
"logits/chosen": -0.5851191282272339, |
|
"logits/rejected": -0.592607855796814, |
|
"logps/chosen": -451.42376708984375, |
|
"logps/rejected": -582.010498046875, |
|
"loss": 0.3994, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.9533573389053345, |
|
"rewards/margins": 1.4484539031982422, |
|
"rewards/rejected": -3.401811122894287, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.7269673704414588, |
|
"grad_norm": 14.340146970439987, |
|
"learning_rate": 1.0521327485452692e-07, |
|
"logits/chosen": -0.5049649477005005, |
|
"logits/rejected": -0.5048503875732422, |
|
"logps/chosen": -444.41015625, |
|
"logps/rejected": -574.8997802734375, |
|
"loss": 0.4438, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.013583183288574, |
|
"rewards/margins": 1.4341424703598022, |
|
"rewards/rejected": -3.447725296020508, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.7293666026871402, |
|
"grad_norm": 17.094188348902286, |
|
"learning_rate": 1.0351142862272468e-07, |
|
"logits/chosen": -0.4877733290195465, |
|
"logits/rejected": -0.542160153388977, |
|
"logps/chosen": -409.5499267578125, |
|
"logps/rejected": -598.6688232421875, |
|
"loss": 0.4483, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.956335425376892, |
|
"rewards/margins": 1.96954345703125, |
|
"rewards/rejected": -3.9258790016174316, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.7317658349328215, |
|
"grad_norm": 16.384363468476412, |
|
"learning_rate": 1.0181985798221343e-07, |
|
"logits/chosen": -0.44771862030029297, |
|
"logits/rejected": -0.48205646872520447, |
|
"logps/chosen": -469.80364990234375, |
|
"logps/rejected": -642.3453979492188, |
|
"loss": 0.4837, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.060243844985962, |
|
"rewards/margins": 1.60220468044281, |
|
"rewards/rejected": -3.6624481678009033, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.7341650671785028, |
|
"grad_norm": 15.159551813260798, |
|
"learning_rate": 1.0013868158995329e-07, |
|
"logits/chosen": -0.4246044158935547, |
|
"logits/rejected": -0.45489010214805603, |
|
"logps/chosen": -471.446533203125, |
|
"logps/rejected": -587.3192138671875, |
|
"loss": 0.4687, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.233337879180908, |
|
"rewards/margins": 1.3818399906158447, |
|
"rewards/rejected": -3.615177869796753, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.7365642994241842, |
|
"grad_norm": 13.463524340329029, |
|
"learning_rate": 9.84680173737887e-08, |
|
"logits/chosen": -0.5496365427970886, |
|
"logits/rejected": -0.569757878780365, |
|
"logps/chosen": -493.57843017578125, |
|
"logps/rejected": -592.6453247070312, |
|
"loss": 0.4307, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -2.2892556190490723, |
|
"rewards/margins": 1.3507412672042847, |
|
"rewards/rejected": -3.6399970054626465, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.7389635316698656, |
|
"grad_norm": 12.455106651157736, |
|
"learning_rate": 9.680798252417713e-08, |
|
"logits/chosen": -0.5762359499931335, |
|
"logits/rejected": -0.5943504571914673, |
|
"logps/chosen": -403.1676025390625, |
|
"logps/rejected": -588.2984619140625, |
|
"loss": 0.4171, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -1.9747021198272705, |
|
"rewards/margins": 1.486598014831543, |
|
"rewards/rejected": -3.4612998962402344, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.741362763915547, |
|
"grad_norm": 14.204008950257876, |
|
"learning_rate": 9.515869348596808e-08, |
|
"logits/chosen": -0.5742790699005127, |
|
"logits/rejected": -0.6299481987953186, |
|
"logps/chosen": -498.3915100097656, |
|
"logps/rejected": -616.3846435546875, |
|
"loss": 0.453, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.0285322666168213, |
|
"rewards/margins": 1.541265845298767, |
|
"rewards/rejected": -3.569798231124878, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.7437619961612284, |
|
"grad_norm": 11.674082631607327, |
|
"learning_rate": 9.352026595023493e-08, |
|
"logits/chosen": -0.6226130723953247, |
|
"logits/rejected": -0.6168379783630371, |
|
"logps/chosen": -476.732666015625, |
|
"logps/rejected": -552.8656005859375, |
|
"loss": 0.4441, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.860538125038147, |
|
"rewards/margins": 1.0402759313583374, |
|
"rewards/rejected": -2.9008140563964844, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.7461612284069098, |
|
"grad_norm": 13.589284320232862, |
|
"learning_rate": 9.189281484616004e-08, |
|
"logits/chosen": -0.5403670072555542, |
|
"logits/rejected": -0.5388067960739136, |
|
"logps/chosen": -406.925048828125, |
|
"logps/rejected": -575.8323974609375, |
|
"loss": 0.4779, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.1276307106018066, |
|
"rewards/margins": 1.1783473491668701, |
|
"rewards/rejected": -3.3059780597686768, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.7485604606525912, |
|
"grad_norm": 14.083306203022303, |
|
"learning_rate": 9.027645433297249e-08, |
|
"logits/chosen": -0.5009843707084656, |
|
"logits/rejected": -0.5172004699707031, |
|
"logps/chosen": -545.912109375, |
|
"logps/rejected": -643.9588012695312, |
|
"loss": 0.4964, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.4063868522644043, |
|
"rewards/margins": 1.3331215381622314, |
|
"rewards/rejected": -3.7395083904266357, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.7509596928982726, |
|
"grad_norm": 15.054060697910124, |
|
"learning_rate": 8.867129779194066e-08, |
|
"logits/chosen": -0.6103423833847046, |
|
"logits/rejected": -0.6305662393569946, |
|
"logps/chosen": -362.4765930175781, |
|
"logps/rejected": -533.6757202148438, |
|
"loss": 0.4347, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.5669844150543213, |
|
"rewards/margins": 1.7321140766143799, |
|
"rewards/rejected": -3.2990989685058594, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.753358925143954, |
|
"grad_norm": 15.171972092803582, |
|
"learning_rate": 8.707745781841866e-08, |
|
"logits/chosen": -0.5541486144065857, |
|
"logits/rejected": -0.5569981932640076, |
|
"logps/chosen": -399.5630187988281, |
|
"logps/rejected": -563.7500610351562, |
|
"loss": 0.4655, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.850064992904663, |
|
"rewards/margins": 1.5996869802474976, |
|
"rewards/rejected": -3.44975209236145, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.7557581573896354, |
|
"grad_norm": 7.987545135931887, |
|
"learning_rate": 8.549504621394831e-08, |
|
"logits/chosen": -0.6387466192245483, |
|
"logits/rejected": -0.629570484161377, |
|
"logps/chosen": -387.7325134277344, |
|
"logps/rejected": -584.460693359375, |
|
"loss": 0.3641, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.571352243423462, |
|
"rewards/margins": 1.9252641201019287, |
|
"rewards/rejected": -3.4966163635253906, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.7581573896353166, |
|
"grad_norm": 13.9921540642964, |
|
"learning_rate": 8.392417397841703e-08, |
|
"logits/chosen": -0.5311389565467834, |
|
"logits/rejected": -0.563139796257019, |
|
"logps/chosen": -412.6419982910156, |
|
"logps/rejected": -557.9671630859375, |
|
"loss": 0.4553, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.664912223815918, |
|
"rewards/margins": 1.2914505004882812, |
|
"rewards/rejected": -2.956362724304199, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.760556621880998, |
|
"grad_norm": 10.54509313617014, |
|
"learning_rate": 8.236495130227083e-08, |
|
"logits/chosen": -0.507122278213501, |
|
"logits/rejected": -0.5326481461524963, |
|
"logps/chosen": -449.63214111328125, |
|
"logps/rejected": -611.7938232421875, |
|
"loss": 0.4536, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -1.660041093826294, |
|
"rewards/margins": 1.9526357650756836, |
|
"rewards/rejected": -3.6126770973205566, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.7629558541266794, |
|
"grad_norm": 19.060642084344988, |
|
"learning_rate": 8.081748755878612e-08, |
|
"logits/chosen": -0.5622953176498413, |
|
"logits/rejected": -0.6007119417190552, |
|
"logps/chosen": -473.05029296875, |
|
"logps/rejected": -541.9755249023438, |
|
"loss": 0.4346, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.0736916065216064, |
|
"rewards/margins": 1.2077754735946655, |
|
"rewards/rejected": -3.2814669609069824, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.7653550863723608, |
|
"grad_norm": 12.641909967755108, |
|
"learning_rate": 7.928189129639632e-08, |
|
"logits/chosen": -0.5046022534370422, |
|
"logits/rejected": -0.4738716185092926, |
|
"logps/chosen": -424.58990478515625, |
|
"logps/rejected": -587.5633544921875, |
|
"loss": 0.4025, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.909470796585083, |
|
"rewards/margins": 1.5023930072784424, |
|
"rewards/rejected": -3.411863327026367, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.7677543186180422, |
|
"grad_norm": 17.284721871893858, |
|
"learning_rate": 7.775827023107834e-08, |
|
"logits/chosen": -0.5278437733650208, |
|
"logits/rejected": -0.5451136827468872, |
|
"logps/chosen": -438.02001953125, |
|
"logps/rejected": -594.689453125, |
|
"loss": 0.4761, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.9781577587127686, |
|
"rewards/margins": 1.3622440099716187, |
|
"rewards/rejected": -3.340402126312256, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.7701535508637236, |
|
"grad_norm": 16.225895751875765, |
|
"learning_rate": 7.624673123879682e-08, |
|
"logits/chosen": -0.6049574017524719, |
|
"logits/rejected": -0.6234583854675293, |
|
"logps/chosen": -408.36663818359375, |
|
"logps/rejected": -536.5324096679688, |
|
"loss": 0.4467, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.7185170650482178, |
|
"rewards/margins": 1.4202756881713867, |
|
"rewards/rejected": -3.1387927532196045, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.772552783109405, |
|
"grad_norm": 11.145879658096279, |
|
"learning_rate": 7.474738034800663e-08, |
|
"logits/chosen": -0.6624782085418701, |
|
"logits/rejected": -0.6585075259208679, |
|
"logps/chosen": -374.86956787109375, |
|
"logps/rejected": -557.4089965820312, |
|
"loss": 0.4768, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.858591079711914, |
|
"rewards/margins": 1.8989025354385376, |
|
"rewards/rejected": -3.7574939727783203, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.7749520153550864, |
|
"grad_norm": 12.542053839469856, |
|
"learning_rate": 7.326032273221606e-08, |
|
"logits/chosen": -0.5727890133857727, |
|
"logits/rejected": -0.5879526138305664, |
|
"logps/chosen": -486.57806396484375, |
|
"logps/rejected": -597.93115234375, |
|
"loss": 0.41, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -2.0051982402801514, |
|
"rewards/margins": 1.297196626663208, |
|
"rewards/rejected": -3.3023948669433594, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.7773512476007678, |
|
"grad_norm": 14.33501525091917, |
|
"learning_rate": 7.178566270260872e-08, |
|
"logits/chosen": -0.5633407235145569, |
|
"logits/rejected": -0.5676406621932983, |
|
"logps/chosen": -460.4888610839844, |
|
"logps/rejected": -608.435791015625, |
|
"loss": 0.4768, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.092958927154541, |
|
"rewards/margins": 1.1691919565200806, |
|
"rewards/rejected": -3.262151002883911, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.7797504798464492, |
|
"grad_norm": 12.819177195288049, |
|
"learning_rate": 7.032350370072709e-08, |
|
"logits/chosen": -0.5324596166610718, |
|
"logits/rejected": -0.5585157871246338, |
|
"logps/chosen": -442.884765625, |
|
"logps/rejected": -604.0455932617188, |
|
"loss": 0.4107, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.7358335256576538, |
|
"rewards/margins": 1.707724928855896, |
|
"rewards/rejected": -3.44355845451355, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.7821497120921305, |
|
"grad_norm": 12.690278052375158, |
|
"learning_rate": 6.887394829121596e-08, |
|
"logits/chosen": -0.5658280849456787, |
|
"logits/rejected": -0.6075069308280945, |
|
"logps/chosen": -454.86376953125, |
|
"logps/rejected": -674.8836059570312, |
|
"loss": 0.4234, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.9878448247909546, |
|
"rewards/margins": 2.3019330501556396, |
|
"rewards/rejected": -4.289777755737305, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.7845489443378119, |
|
"grad_norm": 13.632469694204937, |
|
"learning_rate": 6.743709815462833e-08, |
|
"logits/chosen": -0.6113773584365845, |
|
"logits/rejected": -0.62415611743927, |
|
"logps/chosen": -446.94708251953125, |
|
"logps/rejected": -559.8250732421875, |
|
"loss": 0.4336, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.9104185104370117, |
|
"rewards/margins": 1.419710397720337, |
|
"rewards/rejected": -3.3301289081573486, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.7869481765834933, |
|
"grad_norm": 11.823636613615504, |
|
"learning_rate": 6.601305408029287e-08, |
|
"logits/chosen": -0.47927188873291016, |
|
"logits/rejected": -0.48892560601234436, |
|
"logps/chosen": -441.11334228515625, |
|
"logps/rejected": -583.4063720703125, |
|
"loss": 0.4456, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.027625560760498, |
|
"rewards/margins": 1.3957432508468628, |
|
"rewards/rejected": -3.4233689308166504, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.7893474088291746, |
|
"grad_norm": 16.011451751603392, |
|
"learning_rate": 6.460191595924366e-08, |
|
"logits/chosen": -0.5301553010940552, |
|
"logits/rejected": -0.5367687940597534, |
|
"logps/chosen": -456.830322265625, |
|
"logps/rejected": -578.0042724609375, |
|
"loss": 0.4282, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.9470264911651611, |
|
"rewards/margins": 1.198061227798462, |
|
"rewards/rejected": -3.145087718963623, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.791746641074856, |
|
"grad_norm": 12.691724390773901, |
|
"learning_rate": 6.320378277721342e-08, |
|
"logits/chosen": -0.496354877948761, |
|
"logits/rejected": -0.5095658898353577, |
|
"logps/chosen": -461.4742126464844, |
|
"logps/rejected": -546.1395874023438, |
|
"loss": 0.452, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.140702724456787, |
|
"rewards/margins": 0.8940545320510864, |
|
"rewards/rejected": -3.034757137298584, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.7941458733205374, |
|
"grad_norm": 16.823392530566938, |
|
"learning_rate": 6.181875260769032e-08, |
|
"logits/chosen": -0.560473620891571, |
|
"logits/rejected": -0.5936623811721802, |
|
"logps/chosen": -451.93463134765625, |
|
"logps/rejected": -540.117431640625, |
|
"loss": 0.4733, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.6345758438110352, |
|
"rewards/margins": 1.5807870626449585, |
|
"rewards/rejected": -3.215363025665283, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.7965451055662188, |
|
"grad_norm": 14.520894739599417, |
|
"learning_rate": 6.044692260503797e-08, |
|
"logits/chosen": -0.5175925493240356, |
|
"logits/rejected": -0.5312203764915466, |
|
"logps/chosen": -503.57965087890625, |
|
"logps/rejected": -643.9119873046875, |
|
"loss": 0.3855, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.1000008583068848, |
|
"rewards/margins": 1.6387672424316406, |
|
"rewards/rejected": -3.7387681007385254, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.7989443378119002, |
|
"grad_norm": 14.229849309645635, |
|
"learning_rate": 5.9088388997680984e-08, |
|
"logits/chosen": -0.5600963830947876, |
|
"logits/rejected": -0.5659655332565308, |
|
"logps/chosen": -531.041259765625, |
|
"logps/rejected": -604.8890380859375, |
|
"loss": 0.4186, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.036707639694214, |
|
"rewards/margins": 1.4657360315322876, |
|
"rewards/rejected": -3.502443313598633, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.8013435700575816, |
|
"grad_norm": 14.73962835970847, |
|
"learning_rate": 5.774324708135439e-08, |
|
"logits/chosen": -0.6263202428817749, |
|
"logits/rejected": -0.6441248059272766, |
|
"logps/chosen": -392.24835205078125, |
|
"logps/rejected": -516.92236328125, |
|
"loss": 0.4425, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.7824525833129883, |
|
"rewards/margins": 1.451863169670105, |
|
"rewards/rejected": -3.2343153953552246, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.803742802303263, |
|
"grad_norm": 10.763534598750553, |
|
"learning_rate": 5.641159121241953e-08, |
|
"logits/chosen": -0.5910140872001648, |
|
"logits/rejected": -0.5627475380897522, |
|
"logps/chosen": -398.29547119140625, |
|
"logps/rejected": -605.0794067382812, |
|
"loss": 0.453, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.8737194538116455, |
|
"rewards/margins": 1.7353298664093018, |
|
"rewards/rejected": -3.609048366546631, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.8061420345489443, |
|
"grad_norm": 13.4623185493143, |
|
"learning_rate": 5.5093514801245106e-08, |
|
"logits/chosen": -0.497117817401886, |
|
"logits/rejected": -0.5237521529197693, |
|
"logps/chosen": -448.5010681152344, |
|
"logps/rejected": -618.0789794921875, |
|
"loss": 0.4387, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.0547759532928467, |
|
"rewards/margins": 1.4181969165802002, |
|
"rewards/rejected": -3.472972869873047, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.8085412667946257, |
|
"grad_norm": 13.694316296216712, |
|
"learning_rate": 5.378911030565453e-08, |
|
"logits/chosen": -0.44851231575012207, |
|
"logits/rejected": -0.4383707046508789, |
|
"logps/chosen": -530.1419677734375, |
|
"logps/rejected": -675.80224609375, |
|
"loss": 0.4451, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.4019455909729004, |
|
"rewards/margins": 1.1452901363372803, |
|
"rewards/rejected": -3.5472354888916016, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.8109404990403071, |
|
"grad_norm": 10.528600910072944, |
|
"learning_rate": 5.249846922444101e-08, |
|
"logits/chosen": -0.5667535066604614, |
|
"logits/rejected": -0.5957349538803101, |
|
"logps/chosen": -412.9205017089844, |
|
"logps/rejected": -626.1097412109375, |
|
"loss": 0.4203, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.0233278274536133, |
|
"rewards/margins": 2.25299334526062, |
|
"rewards/rejected": -4.276320934295654, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.8133397312859885, |
|
"grad_norm": 14.951755734068287, |
|
"learning_rate": 5.122168209094865e-08, |
|
"logits/chosen": -0.49277129769325256, |
|
"logits/rejected": -0.5049806833267212, |
|
"logps/chosen": -425.42291259765625, |
|
"logps/rejected": -525.0523071289062, |
|
"loss": 0.4278, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.1365087032318115, |
|
"rewards/margins": 1.0094716548919678, |
|
"rewards/rejected": -3.1459803581237793, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.8157389635316699, |
|
"grad_norm": 10.733513837258595, |
|
"learning_rate": 4.995883846672222e-08, |
|
"logits/chosen": -0.5534166693687439, |
|
"logits/rejected": -0.5597847700119019, |
|
"logps/chosen": -573.193115234375, |
|
"logps/rejected": -633.9830322265625, |
|
"loss": 0.4259, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.0612425804138184, |
|
"rewards/margins": 1.392027735710144, |
|
"rewards/rejected": -3.453270435333252, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.8181381957773513, |
|
"grad_norm": 14.785275906962475, |
|
"learning_rate": 4.871002693522486e-08, |
|
"logits/chosen": -0.601963222026825, |
|
"logits/rejected": -0.6202664971351624, |
|
"logps/chosen": -458.74700927734375, |
|
"logps/rejected": -539.7118530273438, |
|
"loss": 0.4571, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -2.000706434249878, |
|
"rewards/margins": 1.0372394323349, |
|
"rewards/rejected": -3.0379462242126465, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.8205374280230326, |
|
"grad_norm": 10.774672959999748, |
|
"learning_rate": 4.7475335095623956e-08, |
|
"logits/chosen": -0.5421626567840576, |
|
"logits/rejected": -0.5486319661140442, |
|
"logps/chosen": -448.543212890625, |
|
"logps/rejected": -592.2151489257812, |
|
"loss": 0.4437, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.940687894821167, |
|
"rewards/margins": 1.5618858337402344, |
|
"rewards/rejected": -3.5025742053985596, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.822936660268714, |
|
"grad_norm": 18.59487101118735, |
|
"learning_rate": 4.6254849556646714e-08, |
|
"logits/chosen": -0.4963017404079437, |
|
"logits/rejected": -0.4984667897224426, |
|
"logps/chosen": -487.8688049316406, |
|
"logps/rejected": -633.2571411132812, |
|
"loss": 0.4242, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.9285959005355835, |
|
"rewards/margins": 1.74213445186615, |
|
"rewards/rejected": -3.6707305908203125, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.8253358925143954, |
|
"grad_norm": 14.44038111770707, |
|
"learning_rate": 4.504865593050483e-08, |
|
"logits/chosen": -0.5637535452842712, |
|
"logits/rejected": -0.5861309170722961, |
|
"logps/chosen": -481.045654296875, |
|
"logps/rejected": -599.7122192382812, |
|
"loss": 0.4611, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.163588047027588, |
|
"rewards/margins": 1.1554136276245117, |
|
"rewards/rejected": -3.3190014362335205, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.8277351247600768, |
|
"grad_norm": 15.606897326164194, |
|
"learning_rate": 4.385683882688895e-08, |
|
"logits/chosen": -0.5581148862838745, |
|
"logits/rejected": -0.5503061413764954, |
|
"logps/chosen": -502.58038330078125, |
|
"logps/rejected": -550.5048828125, |
|
"loss": 0.5052, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.2417547702789307, |
|
"rewards/margins": 0.969860851764679, |
|
"rewards/rejected": -3.2116153240203857, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.8301343570057581, |
|
"grad_norm": 14.412578218384434, |
|
"learning_rate": 4.2679481847033985e-08, |
|
"logits/chosen": -0.5228904485702515, |
|
"logits/rejected": -0.5363970994949341, |
|
"logps/chosen": -470.89544677734375, |
|
"logps/rejected": -635.0361328125, |
|
"loss": 0.4608, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.150740623474121, |
|
"rewards/margins": 1.5161142349243164, |
|
"rewards/rejected": -3.6668548583984375, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.8325335892514395, |
|
"grad_norm": 12.138527821387553, |
|
"learning_rate": 4.151666757785435e-08, |
|
"logits/chosen": -0.6146914958953857, |
|
"logits/rejected": -0.6073625087738037, |
|
"logps/chosen": -406.7055969238281, |
|
"logps/rejected": -636.0123291015625, |
|
"loss": 0.4199, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.6833385229110718, |
|
"rewards/margins": 2.205997943878174, |
|
"rewards/rejected": -3.889336347579956, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.8349328214971209, |
|
"grad_norm": 14.790830386193752, |
|
"learning_rate": 4.036847758615136e-08, |
|
"logits/chosen": -0.4863740801811218, |
|
"logits/rejected": -0.5530039668083191, |
|
"logps/chosen": -498.92742919921875, |
|
"logps/rejected": -627.8193359375, |
|
"loss": 0.4632, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.640145778656006, |
|
"rewards/margins": 1.157325029373169, |
|
"rewards/rejected": -3.797470808029175, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.8373320537428023, |
|
"grad_norm": 10.798032597651305, |
|
"learning_rate": 3.923499241289113e-08, |
|
"logits/chosen": -0.6010452508926392, |
|
"logits/rejected": -0.6536823511123657, |
|
"logps/chosen": -528.5853881835938, |
|
"logps/rejected": -596.8575439453125, |
|
"loss": 0.4873, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.2441864013671875, |
|
"rewards/margins": 1.2915928363800049, |
|
"rewards/rejected": -3.5357794761657715, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.8397312859884837, |
|
"grad_norm": 10.865503254373744, |
|
"learning_rate": 3.811629156755541e-08, |
|
"logits/chosen": -0.5816788077354431, |
|
"logits/rejected": -0.6040675640106201, |
|
"logps/chosen": -496.4483947753906, |
|
"logps/rejected": -629.3803100585938, |
|
"loss": 0.4407, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.0880205631256104, |
|
"rewards/margins": 1.3838069438934326, |
|
"rewards/rejected": -3.471827268600464, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.8421305182341651, |
|
"grad_norm": 11.278652918262004, |
|
"learning_rate": 3.701245352256391e-08, |
|
"logits/chosen": -0.5675192475318909, |
|
"logits/rejected": -0.6156761050224304, |
|
"logps/chosen": -493.8138732910156, |
|
"logps/rejected": -581.7841796875, |
|
"loss": 0.449, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.045945882797241, |
|
"rewards/margins": 1.155053973197937, |
|
"rewards/rejected": -3.2009997367858887, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.8445297504798465, |
|
"grad_norm": 13.271574282231718, |
|
"learning_rate": 3.592355570776984e-08, |
|
"logits/chosen": -0.6514331102371216, |
|
"logits/rejected": -0.652426540851593, |
|
"logps/chosen": -390.1234436035156, |
|
"logps/rejected": -560.892822265625, |
|
"loss": 0.4027, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.7292102575302124, |
|
"rewards/margins": 1.5589096546173096, |
|
"rewards/rejected": -3.2881197929382324, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.8469289827255279, |
|
"grad_norm": 12.264631757796657, |
|
"learning_rate": 3.484967450502904e-08, |
|
"logits/chosen": -0.5596613883972168, |
|
"logits/rejected": -0.5722562670707703, |
|
"logps/chosen": -371.233154296875, |
|
"logps/rejected": -591.3458862304688, |
|
"loss": 0.4278, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.6561084985733032, |
|
"rewards/margins": 1.7522211074829102, |
|
"rewards/rejected": -3.408329725265503, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.8493282149712092, |
|
"grad_norm": 21.707760756412107, |
|
"learning_rate": 3.3790885242841296e-08, |
|
"logits/chosen": -0.5783206224441528, |
|
"logits/rejected": -0.6104044318199158, |
|
"logps/chosen": -453.5904235839844, |
|
"logps/rejected": -656.5443115234375, |
|
"loss": 0.4022, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.1034021377563477, |
|
"rewards/margins": 2.0261425971984863, |
|
"rewards/rejected": -4.129544258117676, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.8517274472168906, |
|
"grad_norm": 17.547573796246525, |
|
"learning_rate": 3.274726219106677e-08, |
|
"logits/chosen": -0.5989304780960083, |
|
"logits/rejected": -0.6391880512237549, |
|
"logps/chosen": -502.286865234375, |
|
"logps/rejected": -645.5471801757812, |
|
"loss": 0.465, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.106433868408203, |
|
"rewards/margins": 1.479160189628601, |
|
"rewards/rejected": -3.5855941772460938, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.8541266794625719, |
|
"grad_norm": 12.613105176272517, |
|
"learning_rate": 3.171887855571642e-08, |
|
"logits/chosen": -0.5413884520530701, |
|
"logits/rejected": -0.4987201690673828, |
|
"logps/chosen": -401.0578918457031, |
|
"logps/rejected": -514.5476684570312, |
|
"loss": 0.4373, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.801924467086792, |
|
"rewards/margins": 1.2051035165786743, |
|
"rewards/rejected": -3.007028102874756, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.8565259117082533, |
|
"grad_norm": 14.359058138666166, |
|
"learning_rate": 3.070580647381643e-08, |
|
"logits/chosen": -0.5530554056167603, |
|
"logits/rejected": -0.5961068272590637, |
|
"logps/chosen": -413.8770446777344, |
|
"logps/rejected": -572.0676879882812, |
|
"loss": 0.4628, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.856571912765503, |
|
"rewards/margins": 1.5779253244400024, |
|
"rewards/rejected": -3.434497356414795, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.8589251439539347, |
|
"grad_norm": 13.165867775542413, |
|
"learning_rate": 2.9708117008348576e-08, |
|
"logits/chosen": -0.5645931959152222, |
|
"logits/rejected": -0.5597985982894897, |
|
"logps/chosen": -502.2027893066406, |
|
"logps/rejected": -563.4842529296875, |
|
"loss": 0.4342, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.9971221685409546, |
|
"rewards/margins": 1.1243679523468018, |
|
"rewards/rejected": -3.121490240097046, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.8613243761996161, |
|
"grad_norm": 12.288962805449156, |
|
"learning_rate": 2.8725880143264992e-08, |
|
"logits/chosen": -0.6008769869804382, |
|
"logits/rejected": -0.6012517213821411, |
|
"logps/chosen": -473.57110595703125, |
|
"logps/rejected": -637.85546875, |
|
"loss": 0.4957, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.3291287422180176, |
|
"rewards/margins": 1.2044109106063843, |
|
"rewards/rejected": -3.5335395336151123, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.8637236084452975, |
|
"grad_norm": 20.135961770884553, |
|
"learning_rate": 2.775916477857948e-08, |
|
"logits/chosen": -0.5295973420143127, |
|
"logits/rejected": -0.5415645837783813, |
|
"logps/chosen": -432.3699645996094, |
|
"logps/rejected": -547.469970703125, |
|
"loss": 0.4302, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.17122220993042, |
|
"rewards/margins": 1.1380698680877686, |
|
"rewards/rejected": -3.3092925548553467, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.8661228406909789, |
|
"grad_norm": 14.482337367851747, |
|
"learning_rate": 2.680803872553408e-08, |
|
"logits/chosen": -0.5756790637969971, |
|
"logits/rejected": -0.651614248752594, |
|
"logps/chosen": -415.51275634765625, |
|
"logps/rejected": -632.2459716796875, |
|
"loss": 0.4528, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.7809759378433228, |
|
"rewards/margins": 2.2642104625701904, |
|
"rewards/rejected": -4.045186519622803, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.8685220729366603, |
|
"grad_norm": 17.202988883981018, |
|
"learning_rate": 2.5872568701842706e-08, |
|
"logits/chosen": -0.5497530698776245, |
|
"logits/rejected": -0.566763162612915, |
|
"logps/chosen": -387.01171875, |
|
"logps/rejected": -564.3225708007812, |
|
"loss": 0.4966, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.8862674236297607, |
|
"rewards/margins": 1.5258787870407104, |
|
"rewards/rejected": -3.4121460914611816, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.8709213051823417, |
|
"grad_norm": 16.587596939503936, |
|
"learning_rate": 2.495282032701096e-08, |
|
"logits/chosen": -0.5850919485092163, |
|
"logits/rejected": -0.6418455839157104, |
|
"logps/chosen": -336.73431396484375, |
|
"logps/rejected": -482.5956115722656, |
|
"loss": 0.4306, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.7132467031478882, |
|
"rewards/margins": 1.6492410898208618, |
|
"rewards/rejected": -3.36248779296875, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.8733205374280231, |
|
"grad_norm": 17.480004730447185, |
|
"learning_rate": 2.4048858117733133e-08, |
|
"logits/chosen": -0.6369383931159973, |
|
"logits/rejected": -0.6253448724746704, |
|
"logps/chosen": -445.79388427734375, |
|
"logps/rejected": -605.3549194335938, |
|
"loss": 0.4271, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.9298584461212158, |
|
"rewards/margins": 2.020317554473877, |
|
"rewards/rejected": -3.9501757621765137, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.8757197696737045, |
|
"grad_norm": 15.836939219932262, |
|
"learning_rate": 2.3160745483366938e-08, |
|
"logits/chosen": -0.5421168208122253, |
|
"logits/rejected": -0.5399103760719299, |
|
"logps/chosen": -435.59417724609375, |
|
"logps/rejected": -604.5552978515625, |
|
"loss": 0.432, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.094374179840088, |
|
"rewards/margins": 1.313892126083374, |
|
"rewards/rejected": -3.408266067504883, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.8781190019193857, |
|
"grad_norm": 15.232342004495118, |
|
"learning_rate": 2.2288544721485197e-08, |
|
"logits/chosen": -0.6265963912010193, |
|
"logits/rejected": -0.6633044481277466, |
|
"logps/chosen": -367.7839660644531, |
|
"logps/rejected": -573.351318359375, |
|
"loss": 0.4105, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.6053600311279297, |
|
"rewards/margins": 1.8461459875106812, |
|
"rewards/rejected": -3.4515061378479004, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.8805182341650671, |
|
"grad_norm": 14.172769513689536, |
|
"learning_rate": 2.1432317013506117e-08, |
|
"logits/chosen": -0.65406334400177, |
|
"logits/rejected": -0.6587377190589905, |
|
"logps/chosen": -456.6194763183594, |
|
"logps/rejected": -549.1373291015625, |
|
"loss": 0.4751, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.102041721343994, |
|
"rewards/margins": 1.3290612697601318, |
|
"rewards/rejected": -3.431102752685547, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.8829174664107485, |
|
"grad_norm": 14.701070733746729, |
|
"learning_rate": 2.0592122420401704e-08, |
|
"logits/chosen": -0.4700722098350525, |
|
"logits/rejected": -0.46845799684524536, |
|
"logps/chosen": -396.937255859375, |
|
"logps/rejected": -518.0514526367188, |
|
"loss": 0.4567, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.834855318069458, |
|
"rewards/margins": 1.0623095035552979, |
|
"rewards/rejected": -2.8971645832061768, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.8853166986564299, |
|
"grad_norm": 12.846048547846815, |
|
"learning_rate": 1.976801987848459e-08, |
|
"logits/chosen": -0.6387890577316284, |
|
"logits/rejected": -0.6454359292984009, |
|
"logps/chosen": -442.06005859375, |
|
"logps/rejected": -623.1151733398438, |
|
"loss": 0.4364, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.8863179683685303, |
|
"rewards/margins": 1.654547095298767, |
|
"rewards/rejected": -3.540865421295166, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.8877159309021113, |
|
"grad_norm": 13.872418259324444, |
|
"learning_rate": 1.8960067195273987e-08, |
|
"logits/chosen": -0.6006834506988525, |
|
"logits/rejected": -0.6487979888916016, |
|
"logps/chosen": -395.2511901855469, |
|
"logps/rejected": -565.4385986328125, |
|
"loss": 0.4271, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.9122810363769531, |
|
"rewards/margins": 1.7357158660888672, |
|
"rewards/rejected": -3.647996425628662, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.8901151631477927, |
|
"grad_norm": 14.265685542528358, |
|
"learning_rate": 1.816832104544072e-08, |
|
"logits/chosen": -0.47106099128723145, |
|
"logits/rejected": -0.5008233189582825, |
|
"logps/chosen": -471.165283203125, |
|
"logps/rejected": -570.4010620117188, |
|
"loss": 0.4474, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.060793876647949, |
|
"rewards/margins": 1.2439154386520386, |
|
"rewards/rejected": -3.3047091960906982, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.8925143953934741, |
|
"grad_norm": 12.43063163239937, |
|
"learning_rate": 1.7392836966831553e-08, |
|
"logits/chosen": -0.5043891668319702, |
|
"logits/rejected": -0.541654646396637, |
|
"logps/chosen": -437.30902099609375, |
|
"logps/rejected": -612.6980590820312, |
|
"loss": 0.4117, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -1.7850959300994873, |
|
"rewards/margins": 2.040393352508545, |
|
"rewards/rejected": -3.825489044189453, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.8949136276391555, |
|
"grad_norm": 15.428500593517235, |
|
"learning_rate": 1.663366935657373e-08, |
|
"logits/chosen": -0.6029775738716125, |
|
"logits/rejected": -0.6049376726150513, |
|
"logps/chosen": -399.92132568359375, |
|
"logps/rejected": -564.7398681640625, |
|
"loss": 0.4785, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.9421736001968384, |
|
"rewards/margins": 1.540740966796875, |
|
"rewards/rejected": -3.482914447784424, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.8973128598848369, |
|
"grad_norm": 15.901745967879851, |
|
"learning_rate": 1.5890871467258898e-08, |
|
"logits/chosen": -0.5083228945732117, |
|
"logits/rejected": -0.5067955851554871, |
|
"logps/chosen": -519.1453857421875, |
|
"logps/rejected": -613.5746459960938, |
|
"loss": 0.4358, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.022681474685669, |
|
"rewards/margins": 1.2650549411773682, |
|
"rewards/rejected": -3.287736415863037, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.8997120921305183, |
|
"grad_norm": 12.288479611102789, |
|
"learning_rate": 1.5164495403207967e-08, |
|
"logits/chosen": -0.6138381958007812, |
|
"logits/rejected": -0.648627758026123, |
|
"logps/chosen": -492.9188537597656, |
|
"logps/rejected": -688.1244506835938, |
|
"loss": 0.4429, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.270721673965454, |
|
"rewards/margins": 1.671618103981018, |
|
"rewards/rejected": -3.9423396587371826, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.9021113243761996, |
|
"grad_norm": 13.881705443551352, |
|
"learning_rate": 1.4454592116815962e-08, |
|
"logits/chosen": -0.5159580707550049, |
|
"logits/rejected": -0.5592847466468811, |
|
"logps/chosen": -446.95257568359375, |
|
"logps/rejected": -608.6639404296875, |
|
"loss": 0.4088, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.826229453086853, |
|
"rewards/margins": 1.4294321537017822, |
|
"rewards/rejected": -3.255661725997925, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.904510556621881, |
|
"grad_norm": 9.457402626337869, |
|
"learning_rate": 1.3761211404977934e-08, |
|
"logits/chosen": -0.6075922250747681, |
|
"logits/rejected": -0.6064502596855164, |
|
"logps/chosen": -420.793701171875, |
|
"logps/rejected": -630.1043701171875, |
|
"loss": 0.3875, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.974437952041626, |
|
"rewards/margins": 2.114816427230835, |
|
"rewards/rejected": -4.089253902435303, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.9069097888675623, |
|
"grad_norm": 15.273283970687197, |
|
"learning_rate": 1.3084401905596177e-08, |
|
"logits/chosen": -0.5783820152282715, |
|
"logits/rejected": -0.640872597694397, |
|
"logps/chosen": -474.72100830078125, |
|
"logps/rejected": -551.1171875, |
|
"loss": 0.4556, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.9363771677017212, |
|
"rewards/margins": 1.1951727867126465, |
|
"rewards/rejected": -3.1315500736236572, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.9093090211132437, |
|
"grad_norm": 15.207248331238338, |
|
"learning_rate": 1.2424211094168053e-08, |
|
"logits/chosen": -0.44624510407447815, |
|
"logits/rejected": -0.45766526460647583, |
|
"logps/chosen": -504.5450134277344, |
|
"logps/rejected": -639.1659545898438, |
|
"loss": 0.4296, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.8555368185043335, |
|
"rewards/margins": 1.4501540660858154, |
|
"rewards/rejected": -3.3056907653808594, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.9117082533589251, |
|
"grad_norm": 12.766030520120715, |
|
"learning_rate": 1.1780685280456143e-08, |
|
"logits/chosen": -0.561526894569397, |
|
"logits/rejected": -0.5872783064842224, |
|
"logps/chosen": -519.5202026367188, |
|
"logps/rejected": -713.41357421875, |
|
"loss": 0.4758, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.273921251296997, |
|
"rewards/margins": 1.8192304372787476, |
|
"rewards/rejected": -4.093151569366455, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.9141074856046065, |
|
"grad_norm": 14.582108396435707, |
|
"learning_rate": 1.1153869605239564e-08, |
|
"logits/chosen": -0.5614346265792847, |
|
"logits/rejected": -0.5825585722923279, |
|
"logps/chosen": -472.6640625, |
|
"logps/rejected": -527.7772216796875, |
|
"loss": 0.441, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.9882261753082275, |
|
"rewards/margins": 0.9708881378173828, |
|
"rewards/rejected": -2.9591145515441895, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 0.9165067178502879, |
|
"grad_norm": 13.50064006592574, |
|
"learning_rate": 1.0543808037147606e-08, |
|
"logits/chosen": -0.6324799060821533, |
|
"logits/rejected": -0.6540195345878601, |
|
"logps/chosen": -440.49188232421875, |
|
"logps/rejected": -644.3251342773438, |
|
"loss": 0.4312, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.86978018283844, |
|
"rewards/margins": 2.010561227798462, |
|
"rewards/rejected": -3.8803412914276123, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.9189059500959693, |
|
"grad_norm": 10.258131441990278, |
|
"learning_rate": 9.95054336957557e-09, |
|
"logits/chosen": -0.5800519585609436, |
|
"logits/rejected": -0.6037092208862305, |
|
"logps/chosen": -437.89251708984375, |
|
"logps/rejected": -572.6655883789062, |
|
"loss": 0.3879, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.8630110025405884, |
|
"rewards/margins": 1.2645984888076782, |
|
"rewards/rejected": -3.1276094913482666, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 0.9213051823416507, |
|
"grad_norm": 14.86589440455775, |
|
"learning_rate": 9.37411721768286e-09, |
|
"logits/chosen": -0.530808687210083, |
|
"logits/rejected": -0.5798245668411255, |
|
"logps/chosen": -473.449951171875, |
|
"logps/rejected": -675.644775390625, |
|
"loss": 0.4163, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.131751298904419, |
|
"rewards/margins": 1.583280324935913, |
|
"rewards/rejected": -3.715031385421753, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.9237044145873321, |
|
"grad_norm": 13.05727695798275, |
|
"learning_rate": 8.81457001547392e-09, |
|
"logits/chosen": -0.5503281354904175, |
|
"logits/rejected": -0.533139169216156, |
|
"logps/chosen": -491.100341796875, |
|
"logps/rejected": -626.0603637695312, |
|
"loss": 0.4468, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.2812390327453613, |
|
"rewards/margins": 1.2485569715499878, |
|
"rewards/rejected": -3.5297958850860596, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.9261036468330134, |
|
"grad_norm": 14.54576174319674, |
|
"learning_rate": 8.271941012961942e-09, |
|
"logits/chosen": -0.48925477266311646, |
|
"logits/rejected": -0.4920194149017334, |
|
"logps/chosen": -403.09722900390625, |
|
"logps/rejected": -654.4439086914062, |
|
"loss": 0.4432, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.928261399269104, |
|
"rewards/margins": 1.9341392517089844, |
|
"rewards/rejected": -3.862400531768799, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 0.9285028790786948, |
|
"grad_norm": 13.945738623677995, |
|
"learning_rate": 7.746268273415568e-09, |
|
"logits/chosen": -0.5374631285667419, |
|
"logits/rejected": -0.5382106900215149, |
|
"logps/chosen": -466.7080993652344, |
|
"logps/rejected": -586.3713989257812, |
|
"loss": 0.4421, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.0114636421203613, |
|
"rewards/margins": 0.7434648871421814, |
|
"rewards/rejected": -2.7549285888671875, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 0.9309021113243762, |
|
"grad_norm": 12.078781330743034, |
|
"learning_rate": 7.237588670689076e-09, |
|
"logits/chosen": -0.6346238255500793, |
|
"logits/rejected": -0.696995198726654, |
|
"logps/chosen": -439.1622619628906, |
|
"logps/rejected": -595.7288818359375, |
|
"loss": 0.4174, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.9709396362304688, |
|
"rewards/margins": 1.8637230396270752, |
|
"rewards/rejected": -3.834662675857544, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 0.9333013435700576, |
|
"grad_norm": 12.482916834754162, |
|
"learning_rate": 6.745937886635606e-09, |
|
"logits/chosen": -0.6023680567741394, |
|
"logits/rejected": -0.597190797328949, |
|
"logps/chosen": -462.8761291503906, |
|
"logps/rejected": -663.2772827148438, |
|
"loss": 0.42, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.8327722549438477, |
|
"rewards/margins": 1.956402063369751, |
|
"rewards/rejected": -3.7891743183135986, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 0.935700575815739, |
|
"grad_norm": 14.24690648354876, |
|
"learning_rate": 6.271350408604409e-09, |
|
"logits/chosen": -0.5814956426620483, |
|
"logits/rejected": -0.5836547613143921, |
|
"logps/chosen": -371.41156005859375, |
|
"logps/rejected": -554.6315307617188, |
|
"loss": 0.419, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -1.471068024635315, |
|
"rewards/margins": 1.6207103729248047, |
|
"rewards/rejected": -3.09177827835083, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.9380998080614203, |
|
"grad_norm": 13.651182373973166, |
|
"learning_rate": 5.813859527021487e-09, |
|
"logits/chosen": -0.5507432222366333, |
|
"logits/rejected": -0.5853307843208313, |
|
"logps/chosen": -442.532470703125, |
|
"logps/rejected": -599.6609497070312, |
|
"loss": 0.4181, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.9738194942474365, |
|
"rewards/margins": 1.8410329818725586, |
|
"rewards/rejected": -3.814852476119995, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 0.9404990403071017, |
|
"grad_norm": 12.561972537735913, |
|
"learning_rate": 5.373497333054616e-09, |
|
"logits/chosen": -0.6120710968971252, |
|
"logits/rejected": -0.620995044708252, |
|
"logps/chosen": -499.99139404296875, |
|
"logps/rejected": -590.181396484375, |
|
"loss": 0.4722, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.298414945602417, |
|
"rewards/margins": 0.9743332862854004, |
|
"rewards/rejected": -3.2727482318878174, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.9428982725527831, |
|
"grad_norm": 16.011643842899986, |
|
"learning_rate": 4.950294716362213e-09, |
|
"logits/chosen": -0.5541747212409973, |
|
"logits/rejected": -0.5821543335914612, |
|
"logps/chosen": -511.50970458984375, |
|
"logps/rejected": -614.6409301757812, |
|
"loss": 0.4314, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.2209866046905518, |
|
"rewards/margins": 1.0603018999099731, |
|
"rewards/rejected": -3.2812886238098145, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 0.9452975047984645, |
|
"grad_norm": 10.553547990487088, |
|
"learning_rate": 4.544281362926422e-09, |
|
"logits/chosen": -0.6090785264968872, |
|
"logits/rejected": -0.6422208547592163, |
|
"logps/chosen": -493.9991760253906, |
|
"logps/rejected": -632.8602294921875, |
|
"loss": 0.4246, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.9454662799835205, |
|
"rewards/margins": 1.474827527999878, |
|
"rewards/rejected": -3.4202942848205566, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 0.9476967370441459, |
|
"grad_norm": 12.9216379203511, |
|
"learning_rate": 4.15548575297095e-09, |
|
"logits/chosen": -0.6255580186843872, |
|
"logits/rejected": -0.6386123895645142, |
|
"logps/chosen": -431.75634765625, |
|
"logps/rejected": -608.4300537109375, |
|
"loss": 0.4087, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.9927335977554321, |
|
"rewards/margins": 1.8527963161468506, |
|
"rewards/rejected": -3.8455300331115723, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.9500959692898272, |
|
"grad_norm": 9.979083154735912, |
|
"learning_rate": 3.7839351589631366e-09, |
|
"logits/chosen": -0.5646272301673889, |
|
"logits/rejected": -0.5582197308540344, |
|
"logps/chosen": -406.14178466796875, |
|
"logps/rejected": -604.2752075195312, |
|
"loss": 0.415, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.8224601745605469, |
|
"rewards/margins": 1.4527934789657593, |
|
"rewards/rejected": -3.275254011154175, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 0.9524952015355086, |
|
"grad_norm": 14.72280065382829, |
|
"learning_rate": 3.4296556437010405e-09, |
|
"logits/chosen": -0.6492162942886353, |
|
"logits/rejected": -0.6573163866996765, |
|
"logps/chosen": -392.5443420410156, |
|
"logps/rejected": -550.0157470703125, |
|
"loss": 0.4284, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.9494428634643555, |
|
"rewards/margins": 1.5496604442596436, |
|
"rewards/rejected": -3.499103546142578, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 0.95489443378119, |
|
"grad_norm": 19.52331839763403, |
|
"learning_rate": 3.092672058485124e-09, |
|
"logits/chosen": -0.5937837362289429, |
|
"logits/rejected": -0.5867224931716919, |
|
"logps/chosen": -420.0135192871094, |
|
"logps/rejected": -637.5535888671875, |
|
"loss": 0.4764, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.9328027963638306, |
|
"rewards/margins": 1.9535869359970093, |
|
"rewards/rejected": -3.886389970779419, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 0.9572936660268714, |
|
"grad_norm": 15.465373415357526, |
|
"learning_rate": 2.7730080413750356e-09, |
|
"logits/chosen": -0.5144689083099365, |
|
"logits/rejected": -0.5366243720054626, |
|
"logps/chosen": -462.44580078125, |
|
"logps/rejected": -614.26904296875, |
|
"loss": 0.4304, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.9767814874649048, |
|
"rewards/margins": 1.4872825145721436, |
|
"rewards/rejected": -3.464064121246338, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 0.9596928982725528, |
|
"grad_norm": 12.554501395226785, |
|
"learning_rate": 2.4706860155316033e-09, |
|
"logits/chosen": -0.5792466402053833, |
|
"logits/rejected": -0.5846326947212219, |
|
"logps/chosen": -533.231201171875, |
|
"logps/rejected": -670.0733642578125, |
|
"loss": 0.46, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.0717945098876953, |
|
"rewards/margins": 1.349689245223999, |
|
"rewards/rejected": -3.4214844703674316, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.9596928982725528, |
|
"eval_logits/chosen": -0.5574566721916199, |
|
"eval_logits/rejected": -0.5789428949356079, |
|
"eval_logps/chosen": -453.795654296875, |
|
"eval_logps/rejected": -623.4196166992188, |
|
"eval_loss": 0.42671090364456177, |
|
"eval_rewards/accuracies": 0.8446428775787354, |
|
"eval_rewards/chosen": -2.0115108489990234, |
|
"eval_rewards/margins": 1.6149202585220337, |
|
"eval_rewards/rejected": -3.6264309883117676, |
|
"eval_runtime": 208.7971, |
|
"eval_samples_per_second": 21.365, |
|
"eval_steps_per_second": 0.335, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.9620921305182342, |
|
"grad_norm": 14.306928956925486, |
|
"learning_rate": 2.185727187643843e-09, |
|
"logits/chosen": -0.5993860363960266, |
|
"logits/rejected": -0.6200038194656372, |
|
"logps/chosen": -403.6116943359375, |
|
"logps/rejected": -601.7921752929688, |
|
"loss": 0.4649, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -1.9371687173843384, |
|
"rewards/margins": 1.8979822397232056, |
|
"rewards/rejected": -3.835150957107544, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 0.9644913627639156, |
|
"grad_norm": 16.135414053750264, |
|
"learning_rate": 1.9181515464413434e-09, |
|
"logits/chosen": -0.5625258088111877, |
|
"logits/rejected": -0.5911010503768921, |
|
"logps/chosen": -544.6771240234375, |
|
"logps/rejected": -725.96142578125, |
|
"loss": 0.3909, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.8679109811782837, |
|
"rewards/margins": 1.8002150058746338, |
|
"rewards/rejected": -3.668125867843628, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 0.966890595009597, |
|
"grad_norm": 15.134608783297615, |
|
"learning_rate": 1.6679778612923302e-09, |
|
"logits/chosen": -0.5708626508712769, |
|
"logits/rejected": -0.5663528442382812, |
|
"logps/chosen": -496.3414001464844, |
|
"logps/rejected": -607.8493041992188, |
|
"loss": 0.403, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.051764965057373, |
|
"rewards/margins": 1.1176114082336426, |
|
"rewards/rejected": -3.1693766117095947, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 0.9692898272552783, |
|
"grad_norm": 15.867680764731526, |
|
"learning_rate": 1.43522368088686e-09, |
|
"logits/chosen": -0.5292009115219116, |
|
"logits/rejected": -0.5536502003669739, |
|
"logps/chosen": -493.24053955078125, |
|
"logps/rejected": -687.3652954101562, |
|
"loss": 0.4896, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.3322641849517822, |
|
"rewards/margins": 1.9033256769180298, |
|
"rewards/rejected": -4.235589504241943, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 0.9716890595009597, |
|
"grad_norm": 18.145967539331632, |
|
"learning_rate": 1.2199053320059993e-09, |
|
"logits/chosen": -0.5495906472206116, |
|
"logits/rejected": -0.5624712109565735, |
|
"logps/chosen": -464.6747131347656, |
|
"logps/rejected": -626.3593139648438, |
|
"loss": 0.4444, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.9679105281829834, |
|
"rewards/margins": 1.4822068214416504, |
|
"rewards/rejected": -3.4501171112060547, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.974088291746641, |
|
"grad_norm": 11.736148609957327, |
|
"learning_rate": 1.0220379183764338e-09, |
|
"logits/chosen": -0.6397042870521545, |
|
"logits/rejected": -0.6401645541191101, |
|
"logps/chosen": -382.80255126953125, |
|
"logps/rejected": -585.4246826171875, |
|
"loss": 0.4335, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.8114726543426514, |
|
"rewards/margins": 1.9243446588516235, |
|
"rewards/rejected": -3.7358174324035645, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 0.9764875239923224, |
|
"grad_norm": 13.40993494403174, |
|
"learning_rate": 8.416353196111503e-10, |
|
"logits/chosen": -0.531282901763916, |
|
"logits/rejected": -0.5239174365997314, |
|
"logps/chosen": -447.2433166503906, |
|
"logps/rejected": -588.00341796875, |
|
"loss": 0.4917, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.130341053009033, |
|
"rewards/margins": 1.5175530910491943, |
|
"rewards/rejected": -3.6478943824768066, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 0.9788867562380038, |
|
"grad_norm": 14.008572116854983, |
|
"learning_rate": 6.787101902356873e-10, |
|
"logits/chosen": -0.5458533763885498, |
|
"logits/rejected": -0.530588686466217, |
|
"logps/chosen": -478.8675842285156, |
|
"logps/rejected": -640.27783203125, |
|
"loss": 0.4184, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.1983494758605957, |
|
"rewards/margins": 1.3467283248901367, |
|
"rewards/rejected": -3.5450775623321533, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 0.9812859884836852, |
|
"grad_norm": 17.14797402633899, |
|
"learning_rate": 5.332739588005953e-10, |
|
"logits/chosen": -0.6215013861656189, |
|
"logits/rejected": -0.6441851854324341, |
|
"logps/chosen": -390.38421630859375, |
|
"logps/rejected": -607.4275512695312, |
|
"loss": 0.4348, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.911041498184204, |
|
"rewards/margins": 1.8259137868881226, |
|
"rewards/rejected": -3.736954927444458, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 0.9836852207293666, |
|
"grad_norm": 14.786463692836161, |
|
"learning_rate": 4.053368270797164e-10, |
|
"logits/chosen": -0.5029186010360718, |
|
"logits/rejected": -0.5265758037567139, |
|
"logps/chosen": -459.8993225097656, |
|
"logps/rejected": -595.8419189453125, |
|
"loss": 0.4479, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.2223143577575684, |
|
"rewards/margins": 1.3676447868347168, |
|
"rewards/rejected": -3.589958667755127, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.986084452975048, |
|
"grad_norm": 10.752700482691958, |
|
"learning_rate": 2.949077693545354e-10, |
|
"logits/chosen": -0.46297192573547363, |
|
"logits/rejected": -0.522523045539856, |
|
"logps/chosen": -502.4552307128906, |
|
"logps/rejected": -644.851318359375, |
|
"loss": 0.4713, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.2377562522888184, |
|
"rewards/margins": 1.0717887878417969, |
|
"rewards/rejected": -3.3095450401306152, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 0.9884836852207294, |
|
"grad_norm": 13.109993080936734, |
|
"learning_rate": 2.0199453178471047e-10, |
|
"logits/chosen": -0.5104750394821167, |
|
"logits/rejected": -0.5807961225509644, |
|
"logps/chosen": -529.2632446289062, |
|
"logps/rejected": -605.51416015625, |
|
"loss": 0.4171, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.1927380561828613, |
|
"rewards/margins": 1.1302746534347534, |
|
"rewards/rejected": -3.323012590408325, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 0.9908829174664108, |
|
"grad_norm": 14.68142360061518, |
|
"learning_rate": 1.266036318647301e-10, |
|
"logits/chosen": -0.5655652284622192, |
|
"logits/rejected": -0.5766940116882324, |
|
"logps/chosen": -503.4109802246094, |
|
"logps/rejected": -674.8923950195312, |
|
"loss": 0.4159, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.8927507400512695, |
|
"rewards/margins": 1.9182952642440796, |
|
"rewards/rejected": -3.8110461235046387, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 0.9932821497120922, |
|
"grad_norm": 16.922904240171835, |
|
"learning_rate": 6.874035796672339e-11, |
|
"logits/chosen": -0.5960877537727356, |
|
"logits/rejected": -0.5835133194923401, |
|
"logps/chosen": -456.58172607421875, |
|
"logps/rejected": -612.15087890625, |
|
"loss": 0.4225, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.7158613204956055, |
|
"rewards/margins": 2.088901996612549, |
|
"rewards/rejected": -3.8047633171081543, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 0.9956813819577736, |
|
"grad_norm": 16.596879517738124, |
|
"learning_rate": 2.8408768969423458e-11, |
|
"logits/chosen": -0.6005167961120605, |
|
"logits/rejected": -0.6138831377029419, |
|
"logps/chosen": -461.8202209472656, |
|
"logps/rejected": -612.9940185546875, |
|
"loss": 0.4126, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.8562465906143188, |
|
"rewards/margins": 1.3878483772277832, |
|
"rewards/rejected": -3.2440948486328125, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.9980806142034548, |
|
"grad_norm": 15.505557355450042, |
|
"learning_rate": 5.611693973617271e-12, |
|
"logits/chosen": -0.5526952743530273, |
|
"logits/rejected": -0.5355725288391113, |
|
"logps/chosen": -412.329345703125, |
|
"logps/rejected": -580.66357421875, |
|
"loss": 0.4472, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.9485257863998413, |
|
"rewards/margins": 1.4926297664642334, |
|
"rewards/rejected": -3.4411556720733643, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 4168, |
|
"total_flos": 0.0, |
|
"train_loss": 0.49609584714538074, |
|
"train_runtime": 16148.8615, |
|
"train_samples_per_second": 8.259, |
|
"train_steps_per_second": 0.258 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 4168, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 5000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|