|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9997167941093175, |
|
"eval_steps": 100000, |
|
"global_step": 1765, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0005664117813650524, |
|
"grad_norm": 8.920203173210156, |
|
"learning_rate": 2.824858757062147e-09, |
|
"logits/chosen": 0.09167595952749252, |
|
"logits/rejected": 0.08131548762321472, |
|
"logps/chosen": -324.3444519042969, |
|
"logps/rejected": -319.4935607910156, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.005664117813650524, |
|
"grad_norm": 9.052636069881236, |
|
"learning_rate": 2.8248587570621467e-08, |
|
"logits/chosen": -0.11684032529592514, |
|
"logits/rejected": -0.11305296421051025, |
|
"logps/chosen": -549.3125, |
|
"logps/rejected": -521.6323852539062, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.4722222089767456, |
|
"rewards/chosen": 0.00030700574279762805, |
|
"rewards/margins": 0.00035680277505889535, |
|
"rewards/rejected": -4.9797094106907025e-05, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.011328235627301049, |
|
"grad_norm": 9.28894369536706, |
|
"learning_rate": 5.6497175141242935e-08, |
|
"logits/chosen": 0.052977461367845535, |
|
"logits/rejected": 0.03228786215186119, |
|
"logps/chosen": -330.4373779296875, |
|
"logps/rejected": -324.8442077636719, |
|
"loss": 0.6934, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.0014236660208553076, |
|
"rewards/margins": -0.0023983772844076157, |
|
"rewards/rejected": 0.000974711321759969, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.016992353440951572, |
|
"grad_norm": 8.995558692774207, |
|
"learning_rate": 8.47457627118644e-08, |
|
"logits/chosen": -0.1682974398136139, |
|
"logits/rejected": -0.17686393857002258, |
|
"logps/chosen": -577.6813354492188, |
|
"logps/rejected": -540.6315307617188, |
|
"loss": 0.6933, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": 0.00039245429798029363, |
|
"rewards/margins": -0.0011196346022188663, |
|
"rewards/rejected": 0.0015120886964723468, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.022656471254602097, |
|
"grad_norm": 7.350404748510338, |
|
"learning_rate": 1.1299435028248587e-07, |
|
"logits/chosen": -0.004499862901866436, |
|
"logits/rejected": 0.0006781384581699967, |
|
"logps/chosen": -402.2951354980469, |
|
"logps/rejected": -402.3536071777344, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": 0.001362536335363984, |
|
"rewards/margins": 2.0598527044057846e-05, |
|
"rewards/rejected": 0.0013419378083199263, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.02832058906825262, |
|
"grad_norm": 9.877657993004474, |
|
"learning_rate": 1.4124293785310734e-07, |
|
"logits/chosen": -0.17724382877349854, |
|
"logits/rejected": -0.14357277750968933, |
|
"logps/chosen": -485.1160583496094, |
|
"logps/rejected": -462.67364501953125, |
|
"loss": 0.6925, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": 0.004250611178576946, |
|
"rewards/margins": -0.0006281146197579801, |
|
"rewards/rejected": 0.0048787253908813, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.033984706881903144, |
|
"grad_norm": 9.281132819647825, |
|
"learning_rate": 1.694915254237288e-07, |
|
"logits/chosen": -0.042291849851608276, |
|
"logits/rejected": -0.05441279336810112, |
|
"logps/chosen": -504.1480407714844, |
|
"logps/rejected": -505.86505126953125, |
|
"loss": 0.6922, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.011762259528040886, |
|
"rewards/margins": 0.00283249793574214, |
|
"rewards/rejected": 0.008929761126637459, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.03964882469555367, |
|
"grad_norm": 9.053471169113529, |
|
"learning_rate": 1.9774011299435027e-07, |
|
"logits/chosen": -0.13191935420036316, |
|
"logits/rejected": -0.09768908470869064, |
|
"logps/chosen": -506.79461669921875, |
|
"logps/rejected": -472.0877380371094, |
|
"loss": 0.6919, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.019224129617214203, |
|
"rewards/margins": 0.0018368273740634322, |
|
"rewards/rejected": 0.017387302592396736, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.045312942509204195, |
|
"grad_norm": 8.967160357325453, |
|
"learning_rate": 2.2598870056497174e-07, |
|
"logits/chosen": -0.011566092260181904, |
|
"logits/rejected": -0.0008640438318252563, |
|
"logps/chosen": -416.4815979003906, |
|
"logps/rejected": -403.4819030761719, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": 0.02151104062795639, |
|
"rewards/margins": -0.0011001474922522902, |
|
"rewards/rejected": 0.022611189633607864, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.05097706032285471, |
|
"grad_norm": 11.210156815835907, |
|
"learning_rate": 2.542372881355932e-07, |
|
"logits/chosen": -0.09484784305095673, |
|
"logits/rejected": -0.10812617838382721, |
|
"logps/chosen": -490.9479064941406, |
|
"logps/rejected": -515.5724487304688, |
|
"loss": 0.6904, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.03245864063501358, |
|
"rewards/margins": -0.0015371677000075579, |
|
"rewards/rejected": 0.03399580717086792, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.05664117813650524, |
|
"grad_norm": 8.92543373085589, |
|
"learning_rate": 2.824858757062147e-07, |
|
"logits/chosen": -0.03574278578162193, |
|
"logits/rejected": -0.04354934021830559, |
|
"logps/chosen": -447.6690979003906, |
|
"logps/rejected": -444.5801696777344, |
|
"loss": 0.6912, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.05052490904927254, |
|
"rewards/margins": 0.0031251353211700916, |
|
"rewards/rejected": 0.04739977791905403, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.06230529595015576, |
|
"grad_norm": 8.246981680201595, |
|
"learning_rate": 3.1073446327683617e-07, |
|
"logits/chosen": 0.07561548054218292, |
|
"logits/rejected": 0.05347290635108948, |
|
"logps/chosen": -462.77081298828125, |
|
"logps/rejected": -458.2320251464844, |
|
"loss": 0.6903, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.07939761877059937, |
|
"rewards/margins": -0.00035358889726921916, |
|
"rewards/rejected": 0.07975120842456818, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.06796941376380629, |
|
"grad_norm": 7.813418682487144, |
|
"learning_rate": 3.389830508474576e-07, |
|
"logits/chosen": 0.0744490697979927, |
|
"logits/rejected": 0.06200702115893364, |
|
"logps/chosen": -463.47918701171875, |
|
"logps/rejected": -434.5580139160156, |
|
"loss": 0.6885, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.10037367045879364, |
|
"rewards/margins": 0.022612569853663445, |
|
"rewards/rejected": 0.07776109874248505, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.0736335315774568, |
|
"grad_norm": 8.352060171431797, |
|
"learning_rate": 3.672316384180791e-07, |
|
"logits/chosen": 0.09073454886674881, |
|
"logits/rejected": 0.05861488729715347, |
|
"logps/chosen": -421.78839111328125, |
|
"logps/rejected": -408.28045654296875, |
|
"loss": 0.6921, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.10209091752767563, |
|
"rewards/margins": 0.013570049777626991, |
|
"rewards/rejected": 0.08852086216211319, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.07929764939110734, |
|
"grad_norm": 7.350219776962672, |
|
"learning_rate": 3.9548022598870054e-07, |
|
"logits/chosen": 0.0739302709698677, |
|
"logits/rejected": 0.08551234006881714, |
|
"logps/chosen": -451.78851318359375, |
|
"logps/rejected": -444.747802734375, |
|
"loss": 0.685, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.13255102932453156, |
|
"rewards/margins": 0.01697494462132454, |
|
"rewards/rejected": 0.11557607352733612, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.08496176720475786, |
|
"grad_norm": 7.368496064412902, |
|
"learning_rate": 4.23728813559322e-07, |
|
"logits/chosen": 0.01890203356742859, |
|
"logits/rejected": 0.05110060051083565, |
|
"logps/chosen": -414.914306640625, |
|
"logps/rejected": -387.0083312988281, |
|
"loss": 0.6864, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.13907325267791748, |
|
"rewards/margins": 0.002160780131816864, |
|
"rewards/rejected": 0.1369124799966812, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.09062588501840839, |
|
"grad_norm": 7.548722136067434, |
|
"learning_rate": 4.519774011299435e-07, |
|
"logits/chosen": -0.04512980952858925, |
|
"logits/rejected": -0.008629368618130684, |
|
"logps/chosen": -533.8591918945312, |
|
"logps/rejected": -540.7676391601562, |
|
"loss": 0.6834, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.18351757526397705, |
|
"rewards/margins": 0.02642633020877838, |
|
"rewards/rejected": 0.15709123015403748, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.09629000283205891, |
|
"grad_norm": 7.70075563520064, |
|
"learning_rate": 4.80225988700565e-07, |
|
"logits/chosen": 0.03039904311299324, |
|
"logits/rejected": 0.04146042466163635, |
|
"logps/chosen": -396.4923400878906, |
|
"logps/rejected": -347.1358642578125, |
|
"loss": 0.6793, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.165240079164505, |
|
"rewards/margins": 0.061240702867507935, |
|
"rewards/rejected": 0.10399937629699707, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.10195412064570943, |
|
"grad_norm": 5.761017969269167, |
|
"learning_rate": 4.999955969867048e-07, |
|
"logits/chosen": -0.025510674342513084, |
|
"logits/rejected": -0.03141719102859497, |
|
"logps/chosen": -445.02813720703125, |
|
"logps/rejected": -441.5028381347656, |
|
"loss": 0.6769, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.2438725233078003, |
|
"rewards/margins": 0.04211033508181572, |
|
"rewards/rejected": 0.20176219940185547, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.10761823845935996, |
|
"grad_norm": 8.147395627823983, |
|
"learning_rate": 4.999173255092139e-07, |
|
"logits/chosen": -0.12435302883386612, |
|
"logits/rejected": -0.05531524866819382, |
|
"logps/chosen": -551.278076171875, |
|
"logps/rejected": -506.13812255859375, |
|
"loss": 0.6769, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.288453072309494, |
|
"rewards/margins": 0.029089733958244324, |
|
"rewards/rejected": 0.2593633830547333, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.11328235627301048, |
|
"grad_norm": 6.8306403873848875, |
|
"learning_rate": 4.997412445518907e-07, |
|
"logits/chosen": 0.037917762994766235, |
|
"logits/rejected": 0.05893224477767944, |
|
"logps/chosen": -392.0666198730469, |
|
"logps/rejected": -373.0283203125, |
|
"loss": 0.6819, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.21145746111869812, |
|
"rewards/margins": 0.06931839883327484, |
|
"rewards/rejected": 0.14213906228542328, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.11894647408666101, |
|
"grad_norm": 6.633087457971285, |
|
"learning_rate": 4.994674230270714e-07, |
|
"logits/chosen": -0.04183816909790039, |
|
"logits/rejected": -0.008494583889842033, |
|
"logps/chosen": -475.88885498046875, |
|
"logps/rejected": -419.31805419921875, |
|
"loss": 0.6809, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.2550015449523926, |
|
"rewards/margins": 0.057939767837524414, |
|
"rewards/rejected": 0.19706180691719055, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.12461059190031153, |
|
"grad_norm": 6.222645381570541, |
|
"learning_rate": 4.990959680995591e-07, |
|
"logits/chosen": 0.019654836505651474, |
|
"logits/rejected": -0.009270086884498596, |
|
"logps/chosen": -439.2235412597656, |
|
"logps/rejected": -480.212158203125, |
|
"loss": 0.674, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.29933345317840576, |
|
"rewards/margins": 0.08341382443904877, |
|
"rewards/rejected": 0.21591965854167938, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.13027470971396204, |
|
"grad_norm": 7.247831481359495, |
|
"learning_rate": 4.986270251446819e-07, |
|
"logits/chosen": -0.016120824962854385, |
|
"logits/rejected": 0.020067866891622543, |
|
"logps/chosen": -443.7124938964844, |
|
"logps/rejected": -426.8396911621094, |
|
"loss": 0.6708, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.24699635803699493, |
|
"rewards/margins": 0.05648481845855713, |
|
"rewards/rejected": 0.190511554479599, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.13593882752761258, |
|
"grad_norm": 7.731105957944728, |
|
"learning_rate": 4.980607776913984e-07, |
|
"logits/chosen": -0.01023712195456028, |
|
"logits/rejected": 0.029122397303581238, |
|
"logps/chosen": -424.93963623046875, |
|
"logps/rejected": -387.46197509765625, |
|
"loss": 0.6677, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.2441348135471344, |
|
"rewards/margins": 0.052381645888090134, |
|
"rewards/rejected": 0.19175319373607635, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.1416029453412631, |
|
"grad_norm": 7.76875586139277, |
|
"learning_rate": 4.973974473504705e-07, |
|
"logits/chosen": -0.1571817398071289, |
|
"logits/rejected": -0.13499276340007782, |
|
"logps/chosen": -498.03692626953125, |
|
"logps/rejected": -460.9757385253906, |
|
"loss": 0.6908, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.2809702754020691, |
|
"rewards/margins": 0.04384630173444748, |
|
"rewards/rejected": 0.23712392151355743, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.1472670631549136, |
|
"grad_norm": 7.046439253317865, |
|
"learning_rate": 4.966372937277314e-07, |
|
"logits/chosen": 0.008683884516358376, |
|
"logits/rejected": 0.03034578636288643, |
|
"logps/chosen": -427.47283935546875, |
|
"logps/rejected": -423.66033935546875, |
|
"loss": 0.6672, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.23050042986869812, |
|
"rewards/margins": 0.05516275018453598, |
|
"rewards/rejected": 0.17533767223358154, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.15293118096856415, |
|
"grad_norm": 8.273099722403256, |
|
"learning_rate": 4.957806143224855e-07, |
|
"logits/chosen": -0.02415129914879799, |
|
"logits/rejected": -0.027190949767827988, |
|
"logps/chosen": -503.2647399902344, |
|
"logps/rejected": -487.62213134765625, |
|
"loss": 0.6875, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.2844032049179077, |
|
"rewards/margins": 0.020670583471655846, |
|
"rewards/rejected": 0.2637326121330261, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.15859529878221468, |
|
"grad_norm": 6.632746838428199, |
|
"learning_rate": 4.94827744411076e-07, |
|
"logits/chosen": -0.049058981239795685, |
|
"logits/rejected": -0.08267603814601898, |
|
"logps/chosen": -398.5274658203125, |
|
"logps/rejected": -403.6279296875, |
|
"loss": 0.6657, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.21822825074195862, |
|
"rewards/margins": 0.038919974118471146, |
|
"rewards/rejected": 0.17930825054645538, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.16425941659586518, |
|
"grad_norm": 7.218507745358829, |
|
"learning_rate": 4.937790569156689e-07, |
|
"logits/chosen": 0.029155444353818893, |
|
"logits/rejected": 0.05234605073928833, |
|
"logps/chosen": -451.5755920410156, |
|
"logps/rejected": -424.02484130859375, |
|
"loss": 0.6792, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.23795238137245178, |
|
"rewards/margins": 0.07340480387210846, |
|
"rewards/rejected": 0.16454759240150452, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.16992353440951571, |
|
"grad_norm": 7.6926913086077935, |
|
"learning_rate": 4.926349622583038e-07, |
|
"logits/chosen": -0.05263520032167435, |
|
"logits/rejected": -0.0020814030431210995, |
|
"logps/chosen": -406.317626953125, |
|
"logps/rejected": -372.07769775390625, |
|
"loss": 0.6738, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.19559504091739655, |
|
"rewards/margins": 0.06914862990379333, |
|
"rewards/rejected": 0.1264464110136032, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.17558765222316625, |
|
"grad_norm": 6.5222551125823935, |
|
"learning_rate": 4.913959082002677e-07, |
|
"logits/chosen": -0.0497591607272625, |
|
"logits/rejected": 0.0054474459029734135, |
|
"logps/chosen": -370.96685791015625, |
|
"logps/rejected": -358.66912841796875, |
|
"loss": 0.6678, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.2062123566865921, |
|
"rewards/margins": 0.04517778381705284, |
|
"rewards/rejected": 0.16103455424308777, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.18125177003681678, |
|
"grad_norm": 6.40916314482675, |
|
"learning_rate": 4.900623796668559e-07, |
|
"logits/chosen": -0.12707683444023132, |
|
"logits/rejected": -0.13572077453136444, |
|
"logps/chosen": -479.86346435546875, |
|
"logps/rejected": -423.7693786621094, |
|
"loss": 0.6626, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.21128025650978088, |
|
"rewards/margins": 0.09720635414123535, |
|
"rewards/rejected": 0.11407390981912613, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.18691588785046728, |
|
"grad_norm": 6.645268185687199, |
|
"learning_rate": 4.886348985575884e-07, |
|
"logits/chosen": -0.10542762279510498, |
|
"logits/rejected": -0.12311786413192749, |
|
"logps/chosen": -457.22589111328125, |
|
"logps/rejected": -476.0166015625, |
|
"loss": 0.6589, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.19297997653484344, |
|
"rewards/margins": 0.037894029170274734, |
|
"rewards/rejected": 0.15508592128753662, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.19258000566411781, |
|
"grad_norm": 8.79307170795037, |
|
"learning_rate": 4.871140235419551e-07, |
|
"logits/chosen": 0.011003658175468445, |
|
"logits/rejected": -0.03487353399395943, |
|
"logps/chosen": -303.2231750488281, |
|
"logps/rejected": -321.04071044921875, |
|
"loss": 0.6687, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.1460239738225937, |
|
"rewards/margins": 0.10739554464817047, |
|
"rewards/rejected": 0.03862842172384262, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.19824412347776835, |
|
"grad_norm": 6.624600376940614, |
|
"learning_rate": 4.85500349840771e-07, |
|
"logits/chosen": 0.048854436725378036, |
|
"logits/rejected": 0.012037856504321098, |
|
"logps/chosen": -453.3700256347656, |
|
"logps/rejected": -483.99005126953125, |
|
"loss": 0.6697, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.20664839446544647, |
|
"rewards/margins": 0.08074460178613663, |
|
"rewards/rejected": 0.12590381503105164, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.20390824129141885, |
|
"grad_norm": 6.596193590739345, |
|
"learning_rate": 4.837945089932261e-07, |
|
"logits/chosen": 0.11997655779123306, |
|
"logits/rejected": 0.18299253284931183, |
|
"logps/chosen": -460.0072326660156, |
|
"logps/rejected": -421.7837829589844, |
|
"loss": 0.6679, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.24212124943733215, |
|
"rewards/margins": 0.09361882507801056, |
|
"rewards/rejected": 0.1485023945569992, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.20957235910506938, |
|
"grad_norm": 7.905418678010064, |
|
"learning_rate": 4.819971686097217e-07, |
|
"logits/chosen": 0.026029860600829124, |
|
"logits/rejected": 0.06209796667098999, |
|
"logps/chosen": -451.7420349121094, |
|
"logps/rejected": -457.9381408691406, |
|
"loss": 0.6714, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.13577082753181458, |
|
"rewards/margins": 0.06413926184177399, |
|
"rewards/rejected": 0.07163156569004059, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.21523647691871992, |
|
"grad_norm": 8.79182824488632, |
|
"learning_rate": 4.801090321105896e-07, |
|
"logits/chosen": -0.1300145834684372, |
|
"logits/rejected": -0.11859152466058731, |
|
"logps/chosen": -547.6380004882812, |
|
"logps/rejected": -494.87689208984375, |
|
"loss": 0.6534, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.23817840218544006, |
|
"rewards/margins": 0.1310252845287323, |
|
"rewards/rejected": 0.10715309530496597, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.22090059473237042, |
|
"grad_norm": 7.386220719656124, |
|
"learning_rate": 4.781308384507959e-07, |
|
"logits/chosen": -0.09379091113805771, |
|
"logits/rejected": -0.034805141389369965, |
|
"logps/chosen": -480.873779296875, |
|
"logps/rejected": -466.55517578125, |
|
"loss": 0.6588, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.2688544988632202, |
|
"rewards/margins": 0.1596641093492508, |
|
"rewards/rejected": 0.10919040441513062, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.22656471254602095, |
|
"grad_norm": 8.951091336105508, |
|
"learning_rate": 4.760633618307386e-07, |
|
"logits/chosen": -0.03577841818332672, |
|
"logits/rejected": -0.03505768999457359, |
|
"logps/chosen": -410.76617431640625, |
|
"logps/rejected": -404.79296875, |
|
"loss": 0.6672, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.12271402776241302, |
|
"rewards/margins": 0.09359300136566162, |
|
"rewards/rejected": 0.02912103570997715, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.23222883035967148, |
|
"grad_norm": 7.867464061099172, |
|
"learning_rate": 4.7390741139325063e-07, |
|
"logits/chosen": -0.13187697529792786, |
|
"logits/rejected": -0.1505255401134491, |
|
"logps/chosen": -442.73736572265625, |
|
"logps/rejected": -453.91497802734375, |
|
"loss": 0.6551, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.1331796944141388, |
|
"rewards/margins": 0.108833447098732, |
|
"rewards/rejected": 0.024346251040697098, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.23789294817332202, |
|
"grad_norm": 8.897905773307174, |
|
"learning_rate": 4.7166383090692797e-07, |
|
"logits/chosen": -0.05865805596113205, |
|
"logits/rejected": -0.04420238733291626, |
|
"logps/chosen": -510.46112060546875, |
|
"logps/rejected": -491.46368408203125, |
|
"loss": 0.6571, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.13560162484645844, |
|
"rewards/margins": 0.12260621786117554, |
|
"rewards/rejected": 0.01299543958157301, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.24355706598697252, |
|
"grad_norm": 8.480862384495287, |
|
"learning_rate": 4.693334984359059e-07, |
|
"logits/chosen": -0.03462984040379524, |
|
"logits/rejected": -0.004300132393836975, |
|
"logps/chosen": -276.5245666503906, |
|
"logps/rejected": -281.5501708984375, |
|
"loss": 0.6735, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.024655651301145554, |
|
"rewards/margins": 0.03693979233503342, |
|
"rewards/rejected": -0.012284127995371819, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.24922118380062305, |
|
"grad_norm": 8.050427726048714, |
|
"learning_rate": 4.6691732599621365e-07, |
|
"logits/chosen": -0.10734639316797256, |
|
"logits/rejected": -0.11525185406208038, |
|
"logps/chosen": -359.0762023925781, |
|
"logps/rejected": -346.8539733886719, |
|
"loss": 0.6723, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.06632296741008759, |
|
"rewards/margins": 0.0617077462375164, |
|
"rewards/rejected": 0.004615230951458216, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.2548853016142736, |
|
"grad_norm": 7.895428149064327, |
|
"learning_rate": 4.6441625919884083e-07, |
|
"logits/chosen": -0.19662366807460785, |
|
"logits/rejected": -0.15727293491363525, |
|
"logps/chosen": -459.8854064941406, |
|
"logps/rejected": -464.7665100097656, |
|
"loss": 0.6619, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.1074344739317894, |
|
"rewards/margins": 0.06199081987142563, |
|
"rewards/rejected": 0.04544364660978317, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.2605494194279241, |
|
"grad_norm": 9.122379390128415, |
|
"learning_rate": 4.6183127687965634e-07, |
|
"logits/chosen": -0.242090106010437, |
|
"logits/rejected": -0.23871174454689026, |
|
"logps/chosen": -543.6893920898438, |
|
"logps/rejected": -521.69677734375, |
|
"loss": 0.6572, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.136667862534523, |
|
"rewards/margins": 0.09476637840270996, |
|
"rewards/rejected": 0.04190149903297424, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.26621353724157465, |
|
"grad_norm": 8.854419843076306, |
|
"learning_rate": 4.5916339071632407e-07, |
|
"logits/chosen": -0.1867067664861679, |
|
"logits/rejected": -0.18054267764091492, |
|
"logps/chosen": -450.6224670410156, |
|
"logps/rejected": -452.8955078125, |
|
"loss": 0.6538, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.014217564836144447, |
|
"rewards/margins": 0.020388774573802948, |
|
"rewards/rejected": -0.0061712078750133514, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.27187765505522515, |
|
"grad_norm": 7.9942888923716495, |
|
"learning_rate": 4.564136448323651e-07, |
|
"logits/chosen": -0.18171334266662598, |
|
"logits/rejected": -0.2345585823059082, |
|
"logps/chosen": -452.36871337890625, |
|
"logps/rejected": -476.38970947265625, |
|
"loss": 0.6448, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.09399638324975967, |
|
"rewards/margins": 0.21560397744178772, |
|
"rewards/rejected": -0.12160757929086685, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.27754177286887566, |
|
"grad_norm": 7.356252784701507, |
|
"learning_rate": 4.535831153885219e-07, |
|
"logits/chosen": -0.13978612422943115, |
|
"logits/rejected": -0.05468880012631416, |
|
"logps/chosen": -514.1116943359375, |
|
"logps/rejected": -457.27496337890625, |
|
"loss": 0.6582, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.011979525908827782, |
|
"rewards/margins": 0.09640632569789886, |
|
"rewards/rejected": -0.10838586091995239, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.2832058906825262, |
|
"grad_norm": 8.844130628791795, |
|
"learning_rate": 4.5067291016158415e-07, |
|
"logits/chosen": -0.19480007886886597, |
|
"logits/rejected": -0.19345858693122864, |
|
"logps/chosen": -500.5552673339844, |
|
"logps/rejected": -494.234130859375, |
|
"loss": 0.6411, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.0037498758174479008, |
|
"rewards/margins": 0.25029653310775757, |
|
"rewards/rejected": -0.2540464401245117, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.2888700084961767, |
|
"grad_norm": 8.61728399806357, |
|
"learning_rate": 4.476841681108412e-07, |
|
"logits/chosen": 0.1435726284980774, |
|
"logits/rejected": 0.17776526510715485, |
|
"logps/chosen": -373.3977355957031, |
|
"logps/rejected": -370.46435546875, |
|
"loss": 0.6397, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.09569720178842545, |
|
"rewards/margins": 0.2144606113433838, |
|
"rewards/rejected": -0.31015780568122864, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.2945341263098272, |
|
"grad_norm": 8.533003411384605, |
|
"learning_rate": 4.4461805893233056e-07, |
|
"logits/chosen": -0.07490365207195282, |
|
"logits/rejected": -0.0622992217540741, |
|
"logps/chosen": -473.82781982421875, |
|
"logps/rejected": -462.300537109375, |
|
"loss": 0.6701, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.19030603766441345, |
|
"rewards/margins": 0.06755149364471436, |
|
"rewards/rejected": -0.2578575015068054, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.3001982441234778, |
|
"grad_norm": 7.979882700781325, |
|
"learning_rate": 4.414757826010569e-07, |
|
"logits/chosen": -0.32479414343833923, |
|
"logits/rejected": -0.295619934797287, |
|
"logps/chosen": -658.0303955078125, |
|
"logps/rejected": -624.2590942382812, |
|
"loss": 0.6598, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.11945202201604843, |
|
"rewards/margins": 0.11993242800235748, |
|
"rewards/rejected": -0.2393844574689865, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.3058623619371283, |
|
"grad_norm": 8.080193283084418, |
|
"learning_rate": 4.3825856890136127e-07, |
|
"logits/chosen": -0.06019747257232666, |
|
"logits/rejected": -0.047830767929553986, |
|
"logps/chosen": -387.4117736816406, |
|
"logps/rejected": -392.5433349609375, |
|
"loss": 0.6472, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.19570650160312653, |
|
"rewards/margins": 0.10248363018035889, |
|
"rewards/rejected": -0.2981901466846466, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.3115264797507788, |
|
"grad_norm": 7.907791715789903, |
|
"learning_rate": 4.3496767694562337e-07, |
|
"logits/chosen": -0.16734859347343445, |
|
"logits/rejected": -0.15815582871437073, |
|
"logps/chosen": -517.5618896484375, |
|
"logps/rejected": -519.4200439453125, |
|
"loss": 0.6358, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.18276867270469666, |
|
"rewards/margins": 0.1622096300125122, |
|
"rewards/rejected": -0.34497830271720886, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.31719059756442936, |
|
"grad_norm": 12.308275450379226, |
|
"learning_rate": 4.316043946814865e-07, |
|
"logits/chosen": -0.1519699990749359, |
|
"logits/rejected": -0.07414064556360245, |
|
"logps/chosen": -554.9448852539062, |
|
"logps/rejected": -555.5247802734375, |
|
"loss": 0.67, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.12103432416915894, |
|
"rewards/margins": 0.14699925482273102, |
|
"rewards/rejected": -0.26803356409072876, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.32285471537807986, |
|
"grad_norm": 9.151118233428134, |
|
"learning_rate": 4.281700383877963e-07, |
|
"logits/chosen": -0.07585703581571579, |
|
"logits/rejected": -0.07688557356595993, |
|
"logps/chosen": -523.4305419921875, |
|
"logps/rejected": -514.0325927734375, |
|
"loss": 0.6523, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.2873981297016144, |
|
"rewards/margins": 0.24022600054740906, |
|
"rewards/rejected": -0.5276241302490234, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.32851883319173036, |
|
"grad_norm": 10.100605813404021, |
|
"learning_rate": 4.2466595215945304e-07, |
|
"logits/chosen": -0.11299272626638412, |
|
"logits/rejected": -0.11114968359470367, |
|
"logps/chosen": -460.5335998535156, |
|
"logps/rejected": -500.4085388183594, |
|
"loss": 0.6558, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.37247395515441895, |
|
"rewards/margins": 0.21001112461090088, |
|
"rewards/rejected": -0.5824850797653198, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.3341829510053809, |
|
"grad_norm": 7.561427128198726, |
|
"learning_rate": 4.21093507381376e-07, |
|
"logits/chosen": -0.15097267925739288, |
|
"logits/rejected": -0.17012974619865417, |
|
"logps/chosen": -486.6136169433594, |
|
"logps/rejected": -479.129150390625, |
|
"loss": 0.6412, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.2694224417209625, |
|
"rewards/margins": 0.13754698634147644, |
|
"rewards/rejected": -0.4069693982601166, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.33984706881903143, |
|
"grad_norm": 9.803340241849913, |
|
"learning_rate": 4.1745410219178846e-07, |
|
"logits/chosen": -0.16447195410728455, |
|
"logits/rejected": -0.1477648913860321, |
|
"logps/chosen": -575.5458984375, |
|
"logps/rejected": -575.5802001953125, |
|
"loss": 0.6354, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.302044153213501, |
|
"rewards/margins": 0.1591712236404419, |
|
"rewards/rejected": -0.4612153470516205, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.34551118663268193, |
|
"grad_norm": 8.394709669140541, |
|
"learning_rate": 4.137491609350322e-07, |
|
"logits/chosen": -0.2764771282672882, |
|
"logits/rejected": -0.25419965386390686, |
|
"logps/chosen": -449.9400329589844, |
|
"logps/rejected": -436.87664794921875, |
|
"loss": 0.6401, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.33376139402389526, |
|
"rewards/margins": 0.040792159736156464, |
|
"rewards/rejected": -0.3745535612106323, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.3511753044463325, |
|
"grad_norm": 10.478475585898996, |
|
"learning_rate": 4.099801336041255e-07, |
|
"logits/chosen": -0.09448835998773575, |
|
"logits/rejected": -0.1009138971567154, |
|
"logps/chosen": -545.4580078125, |
|
"logps/rejected": -574.4044799804688, |
|
"loss": 0.6436, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.4826622009277344, |
|
"rewards/margins": 0.22007235884666443, |
|
"rewards/rejected": -0.7027345895767212, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.356839422259983, |
|
"grad_norm": 9.229820112984584, |
|
"learning_rate": 4.0614849527328334e-07, |
|
"logits/chosen": -0.1743849813938141, |
|
"logits/rejected": -0.11722008883953094, |
|
"logps/chosen": -528.2041625976562, |
|
"logps/rejected": -509.2850646972656, |
|
"loss": 0.6289, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.43195396661758423, |
|
"rewards/margins": 0.06749050319194794, |
|
"rewards/rejected": -0.4994444251060486, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.36250354007363356, |
|
"grad_norm": 14.872985983217255, |
|
"learning_rate": 4.022557455206211e-07, |
|
"logits/chosen": -0.1340872049331665, |
|
"logits/rejected": -0.18022653460502625, |
|
"logps/chosen": -468.09320068359375, |
|
"logps/rejected": -506.20281982421875, |
|
"loss": 0.6604, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.4429725110530853, |
|
"rewards/margins": 0.22569486498832703, |
|
"rewards/rejected": -0.6686673760414124, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.36816765788728406, |
|
"grad_norm": 9.563384752062458, |
|
"learning_rate": 3.9830340784126935e-07, |
|
"logits/chosen": 0.0594959631562233, |
|
"logits/rejected": 0.06747709214687347, |
|
"logps/chosen": -519.6448974609375, |
|
"logps/rejected": -527.3829345703125, |
|
"loss": 0.6339, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.602764368057251, |
|
"rewards/margins": 0.2741518020629883, |
|
"rewards/rejected": -0.8769161105155945, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.37383177570093457, |
|
"grad_norm": 9.321968230497752, |
|
"learning_rate": 3.942930290511272e-07, |
|
"logits/chosen": -0.1886819303035736, |
|
"logits/rejected": -0.15237857401371002, |
|
"logps/chosen": -434.3822326660156, |
|
"logps/rejected": -435.7581481933594, |
|
"loss": 0.6297, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.34810423851013184, |
|
"rewards/margins": 0.15688088536262512, |
|
"rewards/rejected": -0.5049852132797241, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.3794958935145851, |
|
"grad_norm": 10.82596879655571, |
|
"learning_rate": 3.902261786814889e-07, |
|
"logits/chosen": 0.005353009793907404, |
|
"logits/rejected": 0.0019110903376713395, |
|
"logps/chosen": -452.5858459472656, |
|
"logps/rejected": -470.3968811035156, |
|
"loss": 0.656, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.4791645109653473, |
|
"rewards/margins": 0.18721643090248108, |
|
"rewards/rejected": -0.6663809418678284, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.38516001132823563, |
|
"grad_norm": 11.466215492392923, |
|
"learning_rate": 3.8610444836478097e-07, |
|
"logits/chosen": -0.06002754718065262, |
|
"logits/rejected": -0.024013454094529152, |
|
"logps/chosen": -430.5911560058594, |
|
"logps/rejected": -400.8475646972656, |
|
"loss": 0.6485, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.42653408646583557, |
|
"rewards/margins": 0.1883515566587448, |
|
"rewards/rejected": -0.6148856282234192, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.39082412914188613, |
|
"grad_norm": 9.942937076630836, |
|
"learning_rate": 3.8192945121164886e-07, |
|
"logits/chosen": -0.12901607155799866, |
|
"logits/rejected": -0.12658382952213287, |
|
"logps/chosen": -471.71307373046875, |
|
"logps/rejected": -470.94598388671875, |
|
"loss": 0.6208, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.38932669162750244, |
|
"rewards/margins": 0.18685248494148254, |
|
"rewards/rejected": -0.5761792063713074, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.3964882469555367, |
|
"grad_norm": 10.327977471567648, |
|
"learning_rate": 3.777028211796386e-07, |
|
"logits/chosen": -0.1806946098804474, |
|
"logits/rejected": -0.1935458481311798, |
|
"logps/chosen": -635.4529418945312, |
|
"logps/rejected": -643.0109252929688, |
|
"loss": 0.656, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.6373857855796814, |
|
"rewards/margins": 0.19065091013908386, |
|
"rewards/rejected": -0.8280366659164429, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.4021523647691872, |
|
"grad_norm": 9.863883005146906, |
|
"learning_rate": 3.734262124337185e-07, |
|
"logits/chosen": -0.036530423909425735, |
|
"logits/rejected": 0.05539344623684883, |
|
"logps/chosen": -425.04248046875, |
|
"logps/rejected": -418.19989013671875, |
|
"loss": 0.6271, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.5510110259056091, |
|
"rewards/margins": 0.2016453742980957, |
|
"rewards/rejected": -0.7526563405990601, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.4078164825828377, |
|
"grad_norm": 10.91303076089921, |
|
"learning_rate": 3.691012986988936e-07, |
|
"logits/chosen": -0.14848558604717255, |
|
"logits/rejected": -0.0716543048620224, |
|
"logps/chosen": -499.07293701171875, |
|
"logps/rejected": -508.61669921875, |
|
"loss": 0.6297, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.6473081707954407, |
|
"rewards/margins": 0.08113422244787216, |
|
"rewards/rejected": -0.7284424304962158, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.41348060039648826, |
|
"grad_norm": 10.638718740555106, |
|
"learning_rate": 3.647297726051641e-07, |
|
"logits/chosen": -0.16046440601348877, |
|
"logits/rejected": -0.1986566036939621, |
|
"logps/chosen": -564.6318359375, |
|
"logps/rejected": -551.3790283203125, |
|
"loss": 0.6292, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.7726220488548279, |
|
"rewards/margins": 0.1552344411611557, |
|
"rewards/rejected": -0.9278565645217896, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.41914471821013877, |
|
"grad_norm": 9.82408592787301, |
|
"learning_rate": 3.6031334502508524e-07, |
|
"logits/chosen": -0.22338561713695526, |
|
"logits/rejected": -0.2281237542629242, |
|
"logps/chosen": -592.9271240234375, |
|
"logps/rejected": -604.35009765625, |
|
"loss": 0.6244, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.7858235239982605, |
|
"rewards/margins": 0.24623067677021027, |
|
"rewards/rejected": -1.0320541858673096, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.42480883602378927, |
|
"grad_norm": 11.946249923246347, |
|
"learning_rate": 3.558537444041879e-07, |
|
"logits/chosen": -0.35307231545448303, |
|
"logits/rejected": -0.28875821828842163, |
|
"logps/chosen": -557.7034912109375, |
|
"logps/rejected": -546.9818725585938, |
|
"loss": 0.6343, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.8830374479293823, |
|
"rewards/margins": 0.13727910816669464, |
|
"rewards/rejected": -1.020316481590271, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.43047295383743983, |
|
"grad_norm": 12.375783232448724, |
|
"learning_rate": 3.513527160845209e-07, |
|
"logits/chosen": -0.17869731783866882, |
|
"logits/rejected": -0.2512727975845337, |
|
"logps/chosen": -587.8796997070312, |
|
"logps/rejected": -617.7659912109375, |
|
"loss": 0.6246, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.8949899673461914, |
|
"rewards/margins": 0.2545369267463684, |
|
"rewards/rejected": -1.1495269536972046, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.43613707165109034, |
|
"grad_norm": 9.798605023058345, |
|
"learning_rate": 3.4681202162158173e-07, |
|
"logits/chosen": -0.07858623564243317, |
|
"logits/rejected": -0.08539044857025146, |
|
"logps/chosen": -487.92791748046875, |
|
"logps/rejected": -499.284912109375, |
|
"loss": 0.6411, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.9030693173408508, |
|
"rewards/margins": 0.18724389374256134, |
|
"rewards/rejected": -1.090313196182251, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.44180118946474084, |
|
"grad_norm": 12.27412569981436, |
|
"learning_rate": 3.4223343809490103e-07, |
|
"logits/chosen": -0.052330244332551956, |
|
"logits/rejected": -0.11102048307657242, |
|
"logps/chosen": -478.4798889160156, |
|
"logps/rejected": -495.06396484375, |
|
"loss": 0.635, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.7351385951042175, |
|
"rewards/margins": 0.2151581346988678, |
|
"rewards/rejected": -0.9502967000007629, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.4474653072783914, |
|
"grad_norm": 12.52089221779439, |
|
"learning_rate": 3.3761875741255155e-07, |
|
"logits/chosen": -0.09298163652420044, |
|
"logits/rejected": -0.08369234949350357, |
|
"logps/chosen": -604.6654663085938, |
|
"logps/rejected": -636.544677734375, |
|
"loss": 0.6409, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -1.117897868156433, |
|
"rewards/margins": 0.055749792605638504, |
|
"rewards/rejected": -1.1736476421356201, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.4531294250920419, |
|
"grad_norm": 12.055368493983458, |
|
"learning_rate": 3.32969785609854e-07, |
|
"logits/chosen": -0.14993992447853088, |
|
"logits/rejected": -0.1439143717288971, |
|
"logps/chosen": -581.8445434570312, |
|
"logps/rejected": -543.5735473632812, |
|
"loss": 0.6345, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.0062134265899658, |
|
"rewards/margins": 0.28742164373397827, |
|
"rewards/rejected": -1.2936350107192993, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.45879354290569246, |
|
"grad_norm": 12.482925557658405, |
|
"learning_rate": 3.2828834214255396e-07, |
|
"logits/chosen": -0.12979525327682495, |
|
"logits/rejected": -0.08011293411254883, |
|
"logps/chosen": -733.0359497070312, |
|
"logps/rejected": -733.6185302734375, |
|
"loss": 0.6143, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.321715235710144, |
|
"rewards/margins": 0.10633653402328491, |
|
"rewards/rejected": -1.4280518293380737, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.46445766071934297, |
|
"grad_norm": 11.273282654052506, |
|
"learning_rate": 3.235762591747458e-07, |
|
"logits/chosen": -0.16923761367797852, |
|
"logits/rejected": -0.161187082529068, |
|
"logps/chosen": -494.4414978027344, |
|
"logps/rejected": -488.517578125, |
|
"loss": 0.6139, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.0632091760635376, |
|
"rewards/margins": 0.09481721371412277, |
|
"rewards/rejected": -1.1580263376235962, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.4701217785329935, |
|
"grad_norm": 12.585012235836388, |
|
"learning_rate": 3.188353808618241e-07, |
|
"logits/chosen": -0.09800489246845245, |
|
"logits/rejected": -0.09487877786159515, |
|
"logps/chosen": -541.4532470703125, |
|
"logps/rejected": -552.1514892578125, |
|
"loss": 0.6201, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -1.165722131729126, |
|
"rewards/margins": 0.1106475368142128, |
|
"rewards/rejected": -1.2763696908950806, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.47578589634664403, |
|
"grad_norm": 12.896022597342322, |
|
"learning_rate": 3.1406756262874097e-07, |
|
"logits/chosen": -0.12023751437664032, |
|
"logits/rejected": -0.19186559319496155, |
|
"logps/chosen": -506.29705810546875, |
|
"logps/rejected": -528.5136108398438, |
|
"loss": 0.6289, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.0864802598953247, |
|
"rewards/margins": 0.25540152192115784, |
|
"rewards/rejected": -1.3418817520141602, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.48145001416029454, |
|
"grad_norm": 14.37685541871016, |
|
"learning_rate": 3.0927467044385364e-07, |
|
"logits/chosen": -0.1970689296722412, |
|
"logits/rejected": -0.20069988071918488, |
|
"logps/chosen": -616.935791015625, |
|
"logps/rejected": -660.5993041992188, |
|
"loss": 0.6094, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -1.5820796489715576, |
|
"rewards/margins": 0.2973397374153137, |
|
"rewards/rejected": -1.8794193267822266, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.48711413197394504, |
|
"grad_norm": 13.003776590794605, |
|
"learning_rate": 3.044585800886452e-07, |
|
"logits/chosen": -0.2083606719970703, |
|
"logits/rejected": -0.21461403369903564, |
|
"logps/chosen": -601.05859375, |
|
"logps/rejected": -628.2822265625, |
|
"loss": 0.6325, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -1.4588840007781982, |
|
"rewards/margins": 0.1004204973578453, |
|
"rewards/rejected": -1.5593047142028809, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.4927782497875956, |
|
"grad_norm": 15.852933713937096, |
|
"learning_rate": 2.996211764236051e-07, |
|
"logits/chosen": -0.198094442486763, |
|
"logits/rejected": -0.15960830450057983, |
|
"logps/chosen": -638.4063110351562, |
|
"logps/rejected": -647.378173828125, |
|
"loss": 0.6139, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.6155242919921875, |
|
"rewards/margins": 0.23866060376167297, |
|
"rewards/rejected": -1.8541847467422485, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.4984423676012461, |
|
"grad_norm": 12.409431772391303, |
|
"learning_rate": 2.947643526505562e-07, |
|
"logits/chosen": -0.2549227476119995, |
|
"logits/rejected": -0.2773335576057434, |
|
"logps/chosen": -583.142578125, |
|
"logps/rejected": -609.6130981445312, |
|
"loss": 0.6237, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.3404490947723389, |
|
"rewards/margins": 0.16823282837867737, |
|
"rewards/rejected": -1.5086817741394043, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.5041064854148967, |
|
"grad_norm": 11.633974731071602, |
|
"learning_rate": 2.8989000957171727e-07, |
|
"logits/chosen": -0.10305686295032501, |
|
"logits/rejected": -0.1576327383518219, |
|
"logps/chosen": -641.8757934570312, |
|
"logps/rejected": -665.0620727539062, |
|
"loss": 0.5978, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.3931224346160889, |
|
"rewards/margins": 0.32287827134132385, |
|
"rewards/rejected": -1.7160007953643799, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.5097706032285472, |
|
"grad_norm": 16.050034972192993, |
|
"learning_rate": 2.850000548457917e-07, |
|
"logits/chosen": 0.14430885016918182, |
|
"logits/rejected": 0.1290302276611328, |
|
"logps/chosen": -486.8666076660156, |
|
"logps/rejected": -488.3501892089844, |
|
"loss": 0.6073, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.0954620838165283, |
|
"rewards/margins": 0.26246118545532227, |
|
"rewards/rejected": -1.3579232692718506, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.5154347210421977, |
|
"grad_norm": 19.840618375098327, |
|
"learning_rate": 2.8009640224137114e-07, |
|
"logits/chosen": -0.22643284499645233, |
|
"logits/rejected": -0.25430920720100403, |
|
"logps/chosen": -601.1445922851562, |
|
"logps/rejected": -614.4564208984375, |
|
"loss": 0.6495, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.603725790977478, |
|
"rewards/margins": 0.3380531966686249, |
|
"rewards/rejected": -1.9417788982391357, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.5210988388558482, |
|
"grad_norm": 17.51073505222395, |
|
"learning_rate": 2.751809708879502e-07, |
|
"logits/chosen": -0.21161291003227234, |
|
"logits/rejected": -0.17318451404571533, |
|
"logps/chosen": -563.7437133789062, |
|
"logps/rejected": -568.7862548828125, |
|
"loss": 0.6246, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -1.2936795949935913, |
|
"rewards/margins": 0.2570227384567261, |
|
"rewards/rejected": -1.550702452659607, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.5267629566694987, |
|
"grad_norm": 12.714429256653643, |
|
"learning_rate": 2.7025568452484067e-07, |
|
"logits/chosen": -0.047567375004291534, |
|
"logits/rejected": -0.04497741162776947, |
|
"logps/chosen": -527.0328979492188, |
|
"logps/rejected": -553.7757568359375, |
|
"loss": 0.6113, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -1.0018929243087769, |
|
"rewards/margins": 0.2291610985994339, |
|
"rewards/rejected": -1.2310539484024048, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.5324270744831493, |
|
"grad_norm": 15.205234986430579, |
|
"learning_rate": 2.653224707482835e-07, |
|
"logits/chosen": -0.07980841398239136, |
|
"logits/rejected": -0.024791846051812172, |
|
"logps/chosen": -734.6407470703125, |
|
"logps/rejected": -734.0439453125, |
|
"loss": 0.6008, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.6005531549453735, |
|
"rewards/margins": 0.21221895515918732, |
|
"rewards/rejected": -1.8127720355987549, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.5380911922967998, |
|
"grad_norm": 13.78792024492432, |
|
"learning_rate": 2.603832602570505e-07, |
|
"logits/chosen": -0.19060659408569336, |
|
"logits/rejected": -0.08821268379688263, |
|
"logps/chosen": -565.0804443359375, |
|
"logps/rejected": -584.7576293945312, |
|
"loss": 0.6236, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.112985372543335, |
|
"rewards/margins": 0.2864472568035126, |
|
"rewards/rejected": -1.39943265914917, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.5437553101104503, |
|
"grad_norm": 14.265593088211887, |
|
"learning_rate": 2.554399860968316e-07, |
|
"logits/chosen": -0.0522037036716938, |
|
"logits/rejected": -0.00027574002160690725, |
|
"logps/chosen": -639.2886962890625, |
|
"logps/rejected": -651.7282104492188, |
|
"loss": 0.5918, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -1.2711105346679688, |
|
"rewards/margins": 0.2477395236492157, |
|
"rewards/rejected": -1.5188500881195068, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.5494194279241008, |
|
"grad_norm": 11.543996534625997, |
|
"learning_rate": 2.504945829037042e-07, |
|
"logits/chosen": -0.1183534637093544, |
|
"logits/rejected": -0.1250295788049698, |
|
"logps/chosen": -552.4560546875, |
|
"logps/rejected": -575.94775390625, |
|
"loss": 0.5996, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.3370610475540161, |
|
"rewards/margins": 0.22345292568206787, |
|
"rewards/rejected": -1.5605138540267944, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.5550835457377513, |
|
"grad_norm": 14.687067318658848, |
|
"learning_rate": 2.4554898614697943e-07, |
|
"logits/chosen": -0.09519994258880615, |
|
"logits/rejected": 0.008313467726111412, |
|
"logps/chosen": -533.7589111328125, |
|
"logps/rejected": -561.341064453125, |
|
"loss": 0.609, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.5500489473342896, |
|
"rewards/margins": 0.150363028049469, |
|
"rewards/rejected": -1.7004121541976929, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.5607476635514018, |
|
"grad_norm": 14.406590393647319, |
|
"learning_rate": 2.406051313717232e-07, |
|
"logits/chosen": -0.15122143924236298, |
|
"logits/rejected": -0.18891175091266632, |
|
"logps/chosen": -624.895263671875, |
|
"logps/rejected": -634.48193359375, |
|
"loss": 0.6132, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.6897090673446655, |
|
"rewards/margins": 0.36004889011383057, |
|
"rewards/rejected": -2.049757957458496, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.5664117813650524, |
|
"grad_norm": 15.706647427077176, |
|
"learning_rate": 2.3566495344124662e-07, |
|
"logits/chosen": -0.13275066018104553, |
|
"logits/rejected": -0.1235559806227684, |
|
"logps/chosen": -751.436279296875, |
|
"logps/rejected": -777.4478759765625, |
|
"loss": 0.6097, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.308816909790039, |
|
"rewards/margins": 0.3510530889034271, |
|
"rewards/rejected": -2.659869909286499, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.5720758991787029, |
|
"grad_norm": 15.828215079444101, |
|
"learning_rate": 2.3073038577986357e-07, |
|
"logits/chosen": -0.07961639761924744, |
|
"logits/rejected": -0.16084156930446625, |
|
"logps/chosen": -487.7679138183594, |
|
"logps/rejected": -514.5160522460938, |
|
"loss": 0.613, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -1.5107285976409912, |
|
"rewards/margins": 0.1231117695569992, |
|
"rewards/rejected": -1.633840560913086, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.5777400169923534, |
|
"grad_norm": 16.98606079743348, |
|
"learning_rate": 2.2580335961621235e-07, |
|
"logits/chosen": -0.029454564675688744, |
|
"logits/rejected": -0.06869341433048248, |
|
"logps/chosen": -622.4491577148438, |
|
"logps/rejected": -640.3287353515625, |
|
"loss": 0.6172, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.6914516687393188, |
|
"rewards/margins": 0.5090458989143372, |
|
"rewards/rejected": -2.2004973888397217, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.583404134806004, |
|
"grad_norm": 17.188915741370625, |
|
"learning_rate": 2.20885803227435e-07, |
|
"logits/chosen": -0.11491873115301132, |
|
"logits/rejected": -0.11692114919424057, |
|
"logps/chosen": -667.051513671875, |
|
"logps/rejected": -692.4650268554688, |
|
"loss": 0.6128, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -1.9581496715545654, |
|
"rewards/margins": 0.2536167800426483, |
|
"rewards/rejected": -2.2117667198181152, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.5890682526196545, |
|
"grad_norm": 18.64862832145123, |
|
"learning_rate": 2.159796411845128e-07, |
|
"logits/chosen": -0.2661534249782562, |
|
"logits/rejected": -0.26083916425704956, |
|
"logps/chosen": -598.6723022460938, |
|
"logps/rejected": -628.9426879882812, |
|
"loss": 0.5988, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.5122573375701904, |
|
"rewards/margins": 0.391846239566803, |
|
"rewards/rejected": -1.9041036367416382, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.594732370433305, |
|
"grad_norm": 34.61759080017131, |
|
"learning_rate": 2.110867935990524e-07, |
|
"logits/chosen": -0.14623607695102692, |
|
"logits/rejected": -0.08916531503200531, |
|
"logps/chosen": -743.6148681640625, |
|
"logps/rejected": -772.8529052734375, |
|
"loss": 0.6145, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.8820927143096924, |
|
"rewards/margins": 0.48489370942115784, |
|
"rewards/rejected": -2.366986036300659, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.6003964882469556, |
|
"grad_norm": 15.508947196938045, |
|
"learning_rate": 2.0620917537181646e-07, |
|
"logits/chosen": -0.10365153849124908, |
|
"logits/rejected": -0.0691133588552475, |
|
"logps/chosen": -673.80224609375, |
|
"logps/rejected": -674.360595703125, |
|
"loss": 0.6345, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.8425861597061157, |
|
"rewards/margins": 0.2413545399904251, |
|
"rewards/rejected": -2.0839409828186035, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.6060606060606061, |
|
"grad_norm": 16.329701704182607, |
|
"learning_rate": 2.013486954432943e-07, |
|
"logits/chosen": -0.10045067965984344, |
|
"logits/rejected": -0.051240742206573486, |
|
"logps/chosen": -639.800537109375, |
|
"logps/rejected": -659.7952880859375, |
|
"loss": 0.6148, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.61115300655365, |
|
"rewards/margins": 0.4051777720451355, |
|
"rewards/rejected": -2.0163307189941406, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.6117247238742566, |
|
"grad_norm": 19.231855038307156, |
|
"learning_rate": 1.9650725604660473e-07, |
|
"logits/chosen": -0.10707108676433563, |
|
"logits/rejected": -0.11871937662363052, |
|
"logps/chosen": -561.498291015625, |
|
"logps/rejected": -613.8865966796875, |
|
"loss": 0.5976, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.4580614566802979, |
|
"rewards/margins": 0.1603337824344635, |
|
"rewards/rejected": -1.618395209312439, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.6173888416879071, |
|
"grad_norm": 14.596940988757039, |
|
"learning_rate": 1.9168675196302411e-07, |
|
"logits/chosen": -0.080962173640728, |
|
"logits/rejected": -0.1715216189622879, |
|
"logps/chosen": -638.506591796875, |
|
"logps/rejected": -675.8438720703125, |
|
"loss": 0.594, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.5369634628295898, |
|
"rewards/margins": 0.35168346762657166, |
|
"rewards/rejected": -1.8886468410491943, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.6230529595015576, |
|
"grad_norm": 15.698411262500759, |
|
"learning_rate": 1.8688906978043097e-07, |
|
"logits/chosen": -0.08788567781448364, |
|
"logits/rejected": -0.09263203293085098, |
|
"logps/chosen": -656.8270874023438, |
|
"logps/rejected": -675.0738525390625, |
|
"loss": 0.5837, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.9104950428009033, |
|
"rewards/margins": 0.25849801301956177, |
|
"rewards/rejected": -2.168992757797241, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.6287170773152082, |
|
"grad_norm": 11.248760224035648, |
|
"learning_rate": 1.8211608715495725e-07, |
|
"logits/chosen": -0.20944643020629883, |
|
"logits/rejected": -0.21442022919654846, |
|
"logps/chosen": -639.87109375, |
|
"logps/rejected": -680.1671142578125, |
|
"loss": 0.561, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.946523666381836, |
|
"rewards/margins": 0.565606951713562, |
|
"rewards/rejected": -2.5121307373046875, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.6343811951288587, |
|
"grad_norm": 23.371346801612088, |
|
"learning_rate": 1.7736967207613456e-07, |
|
"logits/chosen": -0.1694624125957489, |
|
"logits/rejected": -0.20123568177223206, |
|
"logps/chosen": -699.4913330078125, |
|
"logps/rejected": -709.8638916015625, |
|
"loss": 0.5982, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.20566987991333, |
|
"rewards/margins": 0.46030035614967346, |
|
"rewards/rejected": -2.6659703254699707, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.6400453129425092, |
|
"grad_norm": 14.972751738612004, |
|
"learning_rate": 1.7265168213582442e-07, |
|
"logits/chosen": -0.17189843952655792, |
|
"logits/rejected": -0.051060281693935394, |
|
"logps/chosen": -745.7001953125, |
|
"logps/rejected": -743.2514038085938, |
|
"loss": 0.5806, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -2.1485393047332764, |
|
"rewards/margins": 0.38365238904953003, |
|
"rewards/rejected": -2.532191753387451, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.6457094307561597, |
|
"grad_norm": 15.350450735594052, |
|
"learning_rate": 1.679639638012175e-07, |
|
"logits/chosen": -0.2607277035713196, |
|
"logits/rejected": -0.25059252977371216, |
|
"logps/chosen": -670.5557861328125, |
|
"logps/rejected": -712.263916015625, |
|
"loss": 0.5772, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.157758951187134, |
|
"rewards/margins": 0.6318100094795227, |
|
"rewards/rejected": -2.7895689010620117, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.6513735485698102, |
|
"grad_norm": 18.101523725425658, |
|
"learning_rate": 1.6330835169218643e-07, |
|
"logits/chosen": -0.17243096232414246, |
|
"logits/rejected": -0.12170116603374481, |
|
"logps/chosen": -694.1895141601562, |
|
"logps/rejected": -697.4880981445312, |
|
"loss": 0.5791, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.00701642036438, |
|
"rewards/margins": 0.3331385552883148, |
|
"rewards/rejected": -2.3401551246643066, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.6570376663834607, |
|
"grad_norm": 18.372994873462016, |
|
"learning_rate": 1.5868666786327576e-07, |
|
"logits/chosen": -0.21193864941596985, |
|
"logits/rejected": -0.2452685534954071, |
|
"logps/chosen": -725.9470825195312, |
|
"logps/rejected": -772.846923828125, |
|
"loss": 0.6111, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.013594627380371, |
|
"rewards/margins": 0.4714154303073883, |
|
"rewards/rejected": -2.4850101470947266, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.6627017841971113, |
|
"grad_norm": 21.510900198503908, |
|
"learning_rate": 1.5410072109060908e-07, |
|
"logits/chosen": -0.2692334055900574, |
|
"logits/rejected": -0.20904116332530975, |
|
"logps/chosen": -731.9447631835938, |
|
"logps/rejected": -718.5181884765625, |
|
"loss": 0.6129, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.5587196350097656, |
|
"rewards/margins": 0.21226339042186737, |
|
"rewards/rejected": -2.7709832191467285, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.6683659020107618, |
|
"grad_norm": 18.927344667610054, |
|
"learning_rate": 1.4955230616399316e-07, |
|
"logits/chosen": -0.3498212993144989, |
|
"logits/rejected": -0.33364471793174744, |
|
"logps/chosen": -781.1145629882812, |
|
"logps/rejected": -797.6188354492188, |
|
"loss": 0.6615, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -2.542229175567627, |
|
"rewards/margins": 0.2951570153236389, |
|
"rewards/rejected": -2.837385892868042, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.6740300198244124, |
|
"grad_norm": 14.255891622779398, |
|
"learning_rate": 1.450432031844959e-07, |
|
"logits/chosen": -0.23423922061920166, |
|
"logits/rejected": -0.25003939867019653, |
|
"logps/chosen": -525.1193237304688, |
|
"logps/rejected": -553.8635864257812, |
|
"loss": 0.5995, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.4713220596313477, |
|
"rewards/margins": 0.4264037013053894, |
|
"rewards/rejected": -1.8977254629135132, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.6796941376380629, |
|
"grad_norm": 33.28066042950332, |
|
"learning_rate": 1.405751768677732e-07, |
|
"logits/chosen": -0.15600809454917908, |
|
"logits/rejected": -0.1329360455274582, |
|
"logps/chosen": -642.7808837890625, |
|
"logps/rejected": -663.9035034179688, |
|
"loss": 0.5752, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.7750027179718018, |
|
"rewards/margins": 0.4877189099788666, |
|
"rewards/rejected": -2.262721300125122, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.6853582554517134, |
|
"grad_norm": 16.003547501120885, |
|
"learning_rate": 1.3614997585341592e-07, |
|
"logits/chosen": -0.08609099686145782, |
|
"logits/rejected": -0.06985644996166229, |
|
"logps/chosen": -608.2687377929688, |
|
"logps/rejected": -635.8214721679688, |
|
"loss": 0.5796, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.749746561050415, |
|
"rewards/margins": 0.35603052377700806, |
|
"rewards/rejected": -2.1057772636413574, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.6910223732653639, |
|
"grad_norm": 15.622558143952144, |
|
"learning_rate": 1.3176933202059066e-07, |
|
"logits/chosen": -0.03839196264743805, |
|
"logits/rejected": -0.09653138369321823, |
|
"logps/chosen": -697.4569702148438, |
|
"logps/rejected": -703.6799926757812, |
|
"loss": 0.5842, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.8517509698867798, |
|
"rewards/margins": 0.5271908640861511, |
|
"rewards/rejected": -2.378941774368286, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.6966864910790145, |
|
"grad_norm": 25.321745533517717, |
|
"learning_rate": 1.2743495981023782e-07, |
|
"logits/chosen": -0.1990116387605667, |
|
"logits/rejected": -0.20654082298278809, |
|
"logps/chosen": -585.44580078125, |
|
"logps/rejected": -625.1630859375, |
|
"loss": 0.6205, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.6379293203353882, |
|
"rewards/margins": 0.3536279797554016, |
|
"rewards/rejected": -1.9915573596954346, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.702350608892665, |
|
"grad_norm": 17.503280710653502, |
|
"learning_rate": 1.2314855555409628e-07, |
|
"logits/chosen": 0.002707863226532936, |
|
"logits/rejected": -0.1227339655160904, |
|
"logps/chosen": -765.3572998046875, |
|
"logps/rejected": -813.5208129882812, |
|
"loss": 0.5777, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.487704038619995, |
|
"rewards/margins": 0.45925870537757874, |
|
"rewards/rejected": -2.946962594985962, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.7080147267063155, |
|
"grad_norm": 18.496058504632952, |
|
"learning_rate": 1.1891179681081375e-07, |
|
"logits/chosen": -0.29699820280075073, |
|
"logits/rejected": -0.21896013617515564, |
|
"logps/chosen": -615.5306396484375, |
|
"logps/rejected": -633.4918212890625, |
|
"loss": 0.5981, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.971556305885315, |
|
"rewards/margins": 0.4856339991092682, |
|
"rewards/rejected": -2.4571902751922607, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.713678844519966, |
|
"grad_norm": 15.706262251859547, |
|
"learning_rate": 1.1472634170940554e-07, |
|
"logits/chosen": -0.13945288956165314, |
|
"logits/rejected": -0.12554016709327698, |
|
"logps/chosen": -695.7122802734375, |
|
"logps/rejected": -725.1580810546875, |
|
"loss": 0.5713, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.980285882949829, |
|
"rewards/margins": 0.4433578848838806, |
|
"rewards/rejected": -2.4236435890197754, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.7193429623336165, |
|
"grad_norm": 23.487633259948737, |
|
"learning_rate": 1.1059382830031699e-07, |
|
"logits/chosen": -0.06917256116867065, |
|
"logits/rejected": -0.1285679042339325, |
|
"logps/chosen": -598.0589599609375, |
|
"logps/rejected": -606.0611572265625, |
|
"loss": 0.6216, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.9320385456085205, |
|
"rewards/margins": 0.06984461843967438, |
|
"rewards/rejected": -2.001883029937744, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.7250070801472671, |
|
"grad_norm": 30.32460919379098, |
|
"learning_rate": 1.0651587391434364e-07, |
|
"logits/chosen": -0.01632758043706417, |
|
"logits/rejected": 0.0047612241469323635, |
|
"logps/chosen": -612.826171875, |
|
"logps/rejected": -657.8508911132812, |
|
"loss": 0.6225, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.194361448287964, |
|
"rewards/margins": 0.47857794165611267, |
|
"rewards/rejected": -2.6729393005371094, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.7306711979609176, |
|
"grad_norm": 17.40955248228261, |
|
"learning_rate": 1.0249407452966156e-07, |
|
"logits/chosen": -0.11788008362054825, |
|
"logits/rejected": -0.14579714834690094, |
|
"logps/chosen": -577.5438232421875, |
|
"logps/rejected": -614.2926025390625, |
|
"loss": 0.5966, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.6694962978363037, |
|
"rewards/margins": 0.33409008383750916, |
|
"rewards/rejected": -2.0035862922668457, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.7363353157745681, |
|
"grad_norm": 18.249339537864724, |
|
"learning_rate": 9.853000414721278e-08, |
|
"logits/chosen": -0.14815405011177063, |
|
"logits/rejected": -0.16669398546218872, |
|
"logps/chosen": -727.6856689453125, |
|
"logps/rejected": -736.8724975585938, |
|
"loss": 0.6142, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.2588837146759033, |
|
"rewards/margins": 0.05782442167401314, |
|
"rewards/rejected": -2.3167080879211426, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.7419994335882186, |
|
"grad_norm": 19.72793699039372, |
|
"learning_rate": 9.462521417469318e-08, |
|
"logits/chosen": -0.18796461820602417, |
|
"logits/rejected": -0.14377792179584503, |
|
"logps/chosen": -631.5921020507812, |
|
"logps/rejected": -615.8783569335938, |
|
"loss": 0.6164, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -1.9607959985733032, |
|
"rewards/margins": 0.09862452745437622, |
|
"rewards/rejected": -2.059420347213745, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.7476635514018691, |
|
"grad_norm": 15.97453620320132, |
|
"learning_rate": 9.078123281938208e-08, |
|
"logits/chosen": -0.02748515084385872, |
|
"logits/rejected": -0.026533063501119614, |
|
"logps/chosen": -657.2438354492188, |
|
"logps/rejected": -698.208984375, |
|
"loss": 0.6275, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.9977624416351318, |
|
"rewards/margins": 0.47537675499916077, |
|
"rewards/rejected": -2.4731392860412598, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.7533276692155196, |
|
"grad_norm": 18.344545537200407, |
|
"learning_rate": 8.699956449005178e-08, |
|
"logits/chosen": -0.09640650451183319, |
|
"logits/rejected": -0.1486753523349762, |
|
"logps/chosen": -665.8047485351562, |
|
"logps/rejected": -741.2623291015625, |
|
"loss": 0.5892, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.8255144357681274, |
|
"rewards/margins": 0.7320832014083862, |
|
"rewards/rejected": -2.5575976371765137, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.7589917870291703, |
|
"grad_norm": 18.92039821837182, |
|
"learning_rate": 8.328168920819112e-08, |
|
"logits/chosen": -0.14237159490585327, |
|
"logits/rejected": -0.09421875327825546, |
|
"logps/chosen": -598.2032470703125, |
|
"logps/rejected": -603.7655639648438, |
|
"loss": 0.5898, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -1.6649417877197266, |
|
"rewards/margins": 0.15654519200325012, |
|
"rewards/rejected": -1.8214870691299438, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.7646559048428208, |
|
"grad_norm": 18.64061644797886, |
|
"learning_rate": 7.962906202877345e-08, |
|
"logits/chosen": -0.2773872911930084, |
|
"logits/rejected": -0.33335018157958984, |
|
"logps/chosen": -691.8497314453125, |
|
"logps/rejected": -737.4361572265625, |
|
"loss": 0.6093, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.9401991367340088, |
|
"rewards/margins": 0.4151093363761902, |
|
"rewards/rejected": -2.3553085327148438, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.7703200226564713, |
|
"grad_norm": 21.726379131857296, |
|
"learning_rate": 7.604311247079553e-08, |
|
"logits/chosen": 0.04578697308897972, |
|
"logits/rejected": 0.061678219586610794, |
|
"logps/chosen": -688.2077026367188, |
|
"logps/rejected": -728.4756469726562, |
|
"loss": 0.6249, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.2095139026641846, |
|
"rewards/margins": 0.3764384388923645, |
|
"rewards/rejected": -2.5859522819519043, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.7759841404701218, |
|
"grad_norm": 17.40731293276627, |
|
"learning_rate": 7.252524395781051e-08, |
|
"logits/chosen": -0.07461674511432648, |
|
"logits/rejected": -0.13030676543712616, |
|
"logps/chosen": -713.3646240234375, |
|
"logps/rejected": -774.5071411132812, |
|
"loss": 0.6029, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.1679880619049072, |
|
"rewards/margins": 0.5716091394424438, |
|
"rewards/rejected": -2.7395970821380615, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.7816482582837723, |
|
"grad_norm": 21.79464945664344, |
|
"learning_rate": 6.907683326867397e-08, |
|
"logits/chosen": -0.12649454176425934, |
|
"logits/rejected": -0.1244879737496376, |
|
"logps/chosen": -536.6094970703125, |
|
"logps/rejected": -555.4559936523438, |
|
"loss": 0.5841, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.5893771648406982, |
|
"rewards/margins": 0.261711984872818, |
|
"rewards/rejected": -1.8510891199111938, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.7873123760974228, |
|
"grad_norm": 20.63309990944421, |
|
"learning_rate": 6.569922999871735e-08, |
|
"logits/chosen": -0.05570756644010544, |
|
"logits/rejected": -0.08853740990161896, |
|
"logps/chosen": -629.28515625, |
|
"logps/rejected": -664.021728515625, |
|
"loss": 0.6065, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.2577807903289795, |
|
"rewards/margins": 0.6096078753471375, |
|
"rewards/rejected": -2.8673884868621826, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.7929764939110734, |
|
"grad_norm": 17.52906274578837, |
|
"learning_rate": 6.239375603156042e-08, |
|
"logits/chosen": -0.22038058936595917, |
|
"logits/rejected": -0.16050884127616882, |
|
"logps/chosen": -776.4019165039062, |
|
"logps/rejected": -753.0433959960938, |
|
"loss": 0.5981, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.1483802795410156, |
|
"rewards/margins": 0.5532088875770569, |
|
"rewards/rejected": -2.7015891075134277, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.7986406117247239, |
|
"grad_norm": 16.192935277358306, |
|
"learning_rate": 5.916170502176937e-08, |
|
"logits/chosen": -0.171335369348526, |
|
"logits/rejected": -0.09694649279117584, |
|
"logps/chosen": -679.2623291015625, |
|
"logps/rejected": -678.6334228515625, |
|
"loss": 0.6244, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -2.268268585205078, |
|
"rewards/margins": 0.08156970143318176, |
|
"rewards/rejected": -2.3498384952545166, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.8043047295383744, |
|
"grad_norm": 14.541878638529214, |
|
"learning_rate": 5.6004341888562816e-08, |
|
"logits/chosen": -0.13256961107254028, |
|
"logits/rejected": -0.14669382572174072, |
|
"logps/chosen": -630.8463745117188, |
|
"logps/rejected": -676.73779296875, |
|
"loss": 0.6054, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.901886224746704, |
|
"rewards/margins": 0.3245258331298828, |
|
"rewards/rejected": -2.226411819458008, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.8099688473520249, |
|
"grad_norm": 22.202056701225032, |
|
"learning_rate": 5.2922902320763296e-08, |
|
"logits/chosen": -0.11466534435749054, |
|
"logits/rejected": -0.17642728984355927, |
|
"logps/chosen": -626.3448486328125, |
|
"logps/rejected": -645.9719848632812, |
|
"loss": 0.5945, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.7750495672225952, |
|
"rewards/margins": 0.3986364006996155, |
|
"rewards/rejected": -2.1736862659454346, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.8156329651656754, |
|
"grad_norm": 18.29898787871736, |
|
"learning_rate": 4.9918592293189206e-08, |
|
"logits/chosen": -0.23186799883842468, |
|
"logits/rejected": -0.20798341929912567, |
|
"logps/chosen": -752.2689819335938, |
|
"logps/rejected": -765.903564453125, |
|
"loss": 0.5843, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -2.287684440612793, |
|
"rewards/margins": 0.38349679112434387, |
|
"rewards/rejected": -2.6711812019348145, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.821297082979326, |
|
"grad_norm": 23.001989193030713, |
|
"learning_rate": 4.6992587594675806e-08, |
|
"logits/chosen": -0.15347278118133545, |
|
"logits/rejected": -0.16635027527809143, |
|
"logps/chosen": -672.3074951171875, |
|
"logps/rejected": -702.2915649414062, |
|
"loss": 0.5987, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.0705018043518066, |
|
"rewards/margins": 0.5524295568466187, |
|
"rewards/rejected": -2.622931480407715, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.8269612007929765, |
|
"grad_norm": 14.552405766755014, |
|
"learning_rate": 4.414603336790959e-08, |
|
"logits/chosen": -0.29983657598495483, |
|
"logits/rejected": -0.28856927156448364, |
|
"logps/chosen": -660.117919921875, |
|
"logps/rejected": -722.7196655273438, |
|
"loss": 0.5645, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.8668142557144165, |
|
"rewards/margins": 0.7347536683082581, |
|
"rewards/rejected": -2.6015677452087402, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.832625318606627, |
|
"grad_norm": 19.99667624232941, |
|
"learning_rate": 4.1380043661257024e-08, |
|
"logits/chosen": 0.08385223895311356, |
|
"logits/rejected": 0.006908579729497433, |
|
"logps/chosen": -622.146240234375, |
|
"logps/rejected": -680.0650634765625, |
|
"loss": 0.5934, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.9830541610717773, |
|
"rewards/margins": 0.4676002562046051, |
|
"rewards/rejected": -2.4506545066833496, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.8382894364202775, |
|
"grad_norm": 16.875286112617857, |
|
"learning_rate": 3.86957009927624e-08, |
|
"logits/chosen": -0.03332146629691124, |
|
"logits/rejected": -0.046583205461502075, |
|
"logps/chosen": -722.2545166015625, |
|
"logps/rejected": -746.12060546875, |
|
"loss": 0.576, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.3616933822631836, |
|
"rewards/margins": 0.3179056942462921, |
|
"rewards/rejected": -2.6795990467071533, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.843953554233928, |
|
"grad_norm": 32.05017847240802, |
|
"learning_rate": 3.609405592648543e-08, |
|
"logits/chosen": -0.001988898264244199, |
|
"logits/rejected": -0.06228378415107727, |
|
"logps/chosen": -650.3690185546875, |
|
"logps/rejected": -709.6563720703125, |
|
"loss": 0.5915, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.5558290481567383, |
|
"rewards/margins": 0.3824766278266907, |
|
"rewards/rejected": -2.938305616378784, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.8496176720475785, |
|
"grad_norm": 15.255726599915509, |
|
"learning_rate": 3.357612666134496e-08, |
|
"logits/chosen": -0.03047587350010872, |
|
"logits/rejected": -0.05225413292646408, |
|
"logps/chosen": -561.4085693359375, |
|
"logps/rejected": -593.854248046875, |
|
"loss": 0.5601, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.7476489543914795, |
|
"rewards/margins": 0.5453365445137024, |
|
"rewards/rejected": -2.292985439300537, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.8552817898612292, |
|
"grad_norm": 18.890827754495124, |
|
"learning_rate": 3.1142898632629285e-08, |
|
"logits/chosen": -0.18943175673484802, |
|
"logits/rejected": -0.31268787384033203, |
|
"logps/chosen": -611.166259765625, |
|
"logps/rejected": -627.4059448242188, |
|
"loss": 0.5847, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.0698297023773193, |
|
"rewards/margins": 0.39990168809890747, |
|
"rewards/rejected": -2.469731330871582, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.8609459076748797, |
|
"grad_norm": 28.452391944889012, |
|
"learning_rate": 2.8795324126328596e-08, |
|
"logits/chosen": -0.16797076165676117, |
|
"logits/rejected": -0.18733422458171844, |
|
"logps/chosen": -828.4105224609375, |
|
"logps/rejected": -889.1057739257812, |
|
"loss": 0.6415, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.8144371509552, |
|
"rewards/margins": 0.8011550903320312, |
|
"rewards/rejected": -3.6155917644500732, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.8666100254885302, |
|
"grad_norm": 17.872239456429195, |
|
"learning_rate": 2.653432190644156e-08, |
|
"logits/chosen": -0.14598588645458221, |
|
"logits/rejected": -0.1877167820930481, |
|
"logps/chosen": -615.3109130859375, |
|
"logps/rejected": -689.0770874023438, |
|
"loss": 0.5953, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.064265727996826, |
|
"rewards/margins": 0.5991795659065247, |
|
"rewards/rejected": -2.663445472717285, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.8722741433021807, |
|
"grad_norm": 16.92421864322488, |
|
"learning_rate": 2.4360776855401084e-08, |
|
"logits/chosen": -0.24489791691303253, |
|
"logits/rejected": -0.28221431374549866, |
|
"logps/chosen": -726.8218994140625, |
|
"logps/rejected": -759.2882080078125, |
|
"loss": 0.5991, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -2.599058151245117, |
|
"rewards/margins": 0.23930224776268005, |
|
"rewards/rejected": -2.838360548019409, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.8779382611158312, |
|
"grad_norm": 39.14715681223177, |
|
"learning_rate": 2.2275539627760214e-08, |
|
"logits/chosen": -0.3715534806251526, |
|
"logits/rejected": -0.3498302400112152, |
|
"logps/chosen": -844.0606689453125, |
|
"logps/rejected": -857.6305541992188, |
|
"loss": 0.6096, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.6733736991882324, |
|
"rewards/margins": 0.4572853147983551, |
|
"rewards/rejected": -3.1306586265563965, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.8836023789294817, |
|
"grad_norm": 23.939306601700995, |
|
"learning_rate": 2.0279426317273835e-08, |
|
"logits/chosen": -0.3317481577396393, |
|
"logits/rejected": -0.2864743173122406, |
|
"logps/chosen": -599.5152587890625, |
|
"logps/rejected": -619.390869140625, |
|
"loss": 0.5896, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.6302772760391235, |
|
"rewards/margins": 0.24534249305725098, |
|
"rewards/rejected": -1.875619649887085, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.8892664967431323, |
|
"grad_norm": 17.542398459595375, |
|
"learning_rate": 1.8373218137506004e-08, |
|
"logits/chosen": 0.07265366613864899, |
|
"logits/rejected": 0.0759335532784462, |
|
"logps/chosen": -578.22802734375, |
|
"logps/rejected": -618.4689331054688, |
|
"loss": 0.6003, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.9392379522323608, |
|
"rewards/margins": 0.2717141807079315, |
|
"rewards/rejected": -2.210952043533325, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.8949306145567828, |
|
"grad_norm": 23.722900999719876, |
|
"learning_rate": 1.6557661116088585e-08, |
|
"logits/chosen": -0.03199579566717148, |
|
"logits/rejected": -0.03713482245802879, |
|
"logps/chosen": -615.804443359375, |
|
"logps/rejected": -641.7705688476562, |
|
"loss": 0.6303, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -2.0657472610473633, |
|
"rewards/margins": 0.09946224838495255, |
|
"rewards/rejected": -2.1652092933654785, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.9005947323704333, |
|
"grad_norm": 15.176785050154727, |
|
"learning_rate": 1.4833465802750383e-08, |
|
"logits/chosen": -0.12174008041620255, |
|
"logits/rejected": -0.16289708018302917, |
|
"logps/chosen": -617.8139038085938, |
|
"logps/rejected": -683.48193359375, |
|
"loss": 0.5725, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.916107416152954, |
|
"rewards/margins": 0.5993362665176392, |
|
"rewards/rejected": -2.515443801879883, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.9062588501840838, |
|
"grad_norm": 19.26980288418955, |
|
"learning_rate": 1.3201306991231259e-08, |
|
"logits/chosen": -0.20962023735046387, |
|
"logits/rejected": -0.25265225768089294, |
|
"logps/chosen": -677.2613525390625, |
|
"logps/rejected": -693.0345458984375, |
|
"loss": 0.6109, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.8539336919784546, |
|
"rewards/margins": 0.4784063398838043, |
|
"rewards/rejected": -2.3323402404785156, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.9119229679977343, |
|
"grad_norm": 20.376813706730847, |
|
"learning_rate": 1.166182345518979e-08, |
|
"logits/chosen": -0.044456273317337036, |
|
"logits/rejected": -0.03623102977871895, |
|
"logps/chosen": -552.9375610351562, |
|
"logps/rejected": -566.2003784179688, |
|
"loss": 0.6278, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -1.7421352863311768, |
|
"rewards/margins": 0.16987690329551697, |
|
"rewards/rejected": -1.9120118618011475, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.9175870858113849, |
|
"grad_norm": 20.363736058976666, |
|
"learning_rate": 1.021561769820814e-08, |
|
"logits/chosen": -0.09859003871679306, |
|
"logits/rejected": -0.07184389978647232, |
|
"logps/chosen": -692.3455810546875, |
|
"logps/rejected": -704.27294921875, |
|
"loss": 0.5858, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.0336098670959473, |
|
"rewards/margins": 0.43381887674331665, |
|
"rewards/rejected": -2.467428684234619, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.9232512036250354, |
|
"grad_norm": 23.0620695553384, |
|
"learning_rate": 8.86325571799193e-09, |
|
"logits/chosen": 0.0027242780197411776, |
|
"logits/rejected": -0.07530532777309418, |
|
"logps/chosen": -589.6121215820312, |
|
"logps/rejected": -641.5111694335938, |
|
"loss": 0.6068, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -2.1123204231262207, |
|
"rewards/margins": 0.17711400985717773, |
|
"rewards/rejected": -2.2894344329833984, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.9289153214386859, |
|
"grad_norm": 17.922993935961088, |
|
"learning_rate": 7.60526678485704e-09, |
|
"logits/chosen": -0.12710081040859222, |
|
"logits/rejected": -0.11238690465688705, |
|
"logps/chosen": -620.1683349609375, |
|
"logps/rejected": -702.4774780273438, |
|
"loss": 0.6412, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.9170653820037842, |
|
"rewards/margins": 0.896059513092041, |
|
"rewards/rejected": -2.813124895095825, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.9345794392523364, |
|
"grad_norm": 14.970821420891314, |
|
"learning_rate": 6.4421432345906915e-09, |
|
"logits/chosen": -0.3055119216442108, |
|
"logits/rejected": -0.28563088178634644, |
|
"logps/chosen": -870.13818359375, |
|
"logps/rejected": -924.6994018554688, |
|
"loss": 0.5671, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.5290729999542236, |
|
"rewards/margins": 0.7610968351364136, |
|
"rewards/rejected": -3.2901699542999268, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.940243557065987, |
|
"grad_norm": 15.233194399042373, |
|
"learning_rate": 5.374340275767136e-09, |
|
"logits/chosen": -0.14172212779521942, |
|
"logits/rejected": -0.286018043756485, |
|
"logps/chosen": -591.59814453125, |
|
"logps/rejected": -662.8147583007812, |
|
"loss": 0.5868, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.7677208185195923, |
|
"rewards/margins": 0.6580394506454468, |
|
"rewards/rejected": -2.425760269165039, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.9459076748796375, |
|
"grad_norm": 22.291131982136136, |
|
"learning_rate": 4.402275811593997e-09, |
|
"logits/chosen": -0.10256578773260117, |
|
"logits/rejected": 0.006013460457324982, |
|
"logps/chosen": -710.4451904296875, |
|
"logps/rejected": -692.7806396484375, |
|
"loss": 0.6044, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.21944522857666, |
|
"rewards/margins": 0.39656540751457214, |
|
"rewards/rejected": -2.6160104274749756, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.9515717926932881, |
|
"grad_norm": 23.960509854892667, |
|
"learning_rate": 3.5263302763585133e-09, |
|
"logits/chosen": -0.2649417519569397, |
|
"logits/rejected": -0.25996100902557373, |
|
"logps/chosen": -680.4586181640625, |
|
"logps/rejected": -733.28564453125, |
|
"loss": 0.5712, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.0978713035583496, |
|
"rewards/margins": 0.47943735122680664, |
|
"rewards/rejected": -2.5773086547851562, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.9572359105069386, |
|
"grad_norm": 22.75318099495045, |
|
"learning_rate": 2.7468464865381124e-09, |
|
"logits/chosen": -0.13250204920768738, |
|
"logits/rejected": -0.12757354974746704, |
|
"logps/chosen": -799.2764892578125, |
|
"logps/rejected": -853.9093627929688, |
|
"loss": 0.5613, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.5642991065979004, |
|
"rewards/margins": 0.6402150392532349, |
|
"rewards/rejected": -3.204514265060425, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.9629000283205891, |
|
"grad_norm": 18.819415650946482, |
|
"learning_rate": 2.064129506633011e-09, |
|
"logits/chosen": -0.06593064218759537, |
|
"logits/rejected": -0.17728903889656067, |
|
"logps/chosen": -702.3214111328125, |
|
"logps/rejected": -763.5635986328125, |
|
"loss": 0.5817, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.0367045402526855, |
|
"rewards/margins": 0.4802042841911316, |
|
"rewards/rejected": -2.516909122467041, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.9685641461342396, |
|
"grad_norm": 22.185275104009445, |
|
"learning_rate": 1.4784465297741632e-09, |
|
"logits/chosen": -0.1805897057056427, |
|
"logits/rejected": -0.153991237282753, |
|
"logps/chosen": -840.6116333007812, |
|
"logps/rejected": -830.1062622070312, |
|
"loss": 0.6013, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.6172409057617188, |
|
"rewards/margins": 0.34190982580184937, |
|
"rewards/rejected": -2.959150552749634, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.9742282639478901, |
|
"grad_norm": 17.50705579397618, |
|
"learning_rate": 9.900267731524914e-10, |
|
"logits/chosen": -0.030229410156607628, |
|
"logits/rejected": -0.12910275161266327, |
|
"logps/chosen": -693.0966796875, |
|
"logps/rejected": -743.1204833984375, |
|
"loss": 0.5858, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.129164695739746, |
|
"rewards/margins": 0.5310593247413635, |
|
"rewards/rejected": -2.660223960876465, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.9798923817615406, |
|
"grad_norm": 21.78741888106313, |
|
"learning_rate": 5.990613883107565e-10, |
|
"logits/chosen": -0.3164052367210388, |
|
"logits/rejected": -0.26196470856666565, |
|
"logps/chosen": -797.2408447265625, |
|
"logps/rejected": -829.5914306640625, |
|
"loss": 0.6084, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.587916612625122, |
|
"rewards/margins": 0.5189841389656067, |
|
"rewards/rejected": -3.106900691986084, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.9855564995751912, |
|
"grad_norm": 26.170987265565298, |
|
"learning_rate": 3.0570338633312266e-10, |
|
"logits/chosen": -0.04128523916006088, |
|
"logits/rejected": -0.0052064331248402596, |
|
"logps/chosen": -754.0946655273438, |
|
"logps/rejected": -769.3212890625, |
|
"loss": 0.5983, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -2.7384681701660156, |
|
"rewards/margins": 0.2529481053352356, |
|
"rewards/rejected": -2.9914162158966064, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.9912206173888417, |
|
"grad_norm": 17.716998560167845, |
|
"learning_rate": 1.1006757796153121e-10, |
|
"logits/chosen": -0.19896100461483002, |
|
"logits/rejected": -0.23088839650154114, |
|
"logps/chosen": -755.6218872070312, |
|
"logps/rejected": -767.3886108398438, |
|
"loss": 0.6111, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.123622417449951, |
|
"rewards/margins": 0.8624438047409058, |
|
"rewards/rejected": -2.9860663414001465, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.9968847352024922, |
|
"grad_norm": 21.859141173839067, |
|
"learning_rate": 1.2230528662698913e-11, |
|
"logits/chosen": -0.1374054104089737, |
|
"logits/rejected": -0.1221609115600586, |
|
"logps/chosen": -711.783935546875, |
|
"logps/rejected": -736.2469482421875, |
|
"loss": 0.6195, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -2.2918009757995605, |
|
"rewards/margins": 0.32683807611465454, |
|
"rewards/rejected": -2.6186389923095703, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.9997167941093175, |
|
"step": 1765, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6297533516167919, |
|
"train_runtime": 10521.5341, |
|
"train_samples_per_second": 5.369, |
|
"train_steps_per_second": 0.168 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1765, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|