|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9997382884061764, |
|
"eval_steps": 100, |
|
"global_step": 955, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0010468463752944255, |
|
"grad_norm": 1.1945625860018705, |
|
"learning_rate": 5.208333333333333e-08, |
|
"logits/chosen": -2.5192830562591553, |
|
"logits/rejected": -2.3547825813293457, |
|
"logps/chosen": -297.60443115234375, |
|
"logps/rejected": -252.4619903564453, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.00020415784092620015, |
|
"rewards/margins": -0.0002505290030967444, |
|
"rewards/rejected": 4.637122037820518e-05, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.010468463752944255, |
|
"grad_norm": 1.1009278086693854, |
|
"learning_rate": 5.208333333333334e-07, |
|
"logits/chosen": -2.2455766201019287, |
|
"logits/rejected": -2.215245008468628, |
|
"logps/chosen": -275.6755065917969, |
|
"logps/rejected": -254.76722717285156, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.6111111044883728, |
|
"rewards/chosen": 0.004448441788554192, |
|
"rewards/margins": 0.0008290203404612839, |
|
"rewards/rejected": 0.0036194208078086376, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02093692750588851, |
|
"grad_norm": 1.165704750760885, |
|
"learning_rate": 1.0416666666666667e-06, |
|
"logits/chosen": -2.2313215732574463, |
|
"logits/rejected": -2.114736795425415, |
|
"logps/chosen": -277.5883483886719, |
|
"logps/rejected": -255.2056427001953, |
|
"loss": 0.6907, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.026263630017638206, |
|
"rewards/margins": 0.005699009168893099, |
|
"rewards/rejected": 0.020564619451761246, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.031405391258832765, |
|
"grad_norm": 1.1872789709669673, |
|
"learning_rate": 1.5625e-06, |
|
"logits/chosen": -2.3138914108276367, |
|
"logits/rejected": -2.2109274864196777, |
|
"logps/chosen": -281.3846740722656, |
|
"logps/rejected": -262.41693115234375, |
|
"loss": 0.6858, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.04216086491942406, |
|
"rewards/margins": 0.014992751181125641, |
|
"rewards/rejected": 0.027168119326233864, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.04187385501177702, |
|
"grad_norm": 1.1753743538686479, |
|
"learning_rate": 2.0833333333333334e-06, |
|
"logits/chosen": -2.307976245880127, |
|
"logits/rejected": -2.2140889167785645, |
|
"logps/chosen": -268.4030456542969, |
|
"logps/rejected": -255.44882202148438, |
|
"loss": 0.6814, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": 0.05265005677938461, |
|
"rewards/margins": 0.026613134890794754, |
|
"rewards/rejected": 0.026036927476525307, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.05234231876472128, |
|
"grad_norm": 1.1656515626034387, |
|
"learning_rate": 2.604166666666667e-06, |
|
"logits/chosen": -2.2787346839904785, |
|
"logits/rejected": -2.175128936767578, |
|
"logps/chosen": -227.7914581298828, |
|
"logps/rejected": -206.5706024169922, |
|
"loss": 0.6764, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.05774398520588875, |
|
"rewards/margins": 0.042526550590991974, |
|
"rewards/rejected": 0.015217426232993603, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.06281078251766553, |
|
"grad_norm": 1.324103463890143, |
|
"learning_rate": 3.125e-06, |
|
"logits/chosen": -2.2864975929260254, |
|
"logits/rejected": -2.185832977294922, |
|
"logps/chosen": -264.6636657714844, |
|
"logps/rejected": -228.9823455810547, |
|
"loss": 0.6687, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.04733141511678696, |
|
"rewards/margins": 0.05985499545931816, |
|
"rewards/rejected": -0.012523581273853779, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.07327924627060979, |
|
"grad_norm": 1.4908224965531012, |
|
"learning_rate": 3.6458333333333333e-06, |
|
"logits/chosen": -2.128446102142334, |
|
"logits/rejected": -2.080828905105591, |
|
"logps/chosen": -256.41363525390625, |
|
"logps/rejected": -262.66949462890625, |
|
"loss": 0.6557, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.0010238643735647202, |
|
"rewards/margins": 0.10270833969116211, |
|
"rewards/rejected": -0.10373219102621078, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.08374771002355404, |
|
"grad_norm": 2.5274509004519445, |
|
"learning_rate": 4.166666666666667e-06, |
|
"logits/chosen": -2.2609763145446777, |
|
"logits/rejected": -2.1039209365844727, |
|
"logps/chosen": -263.5312194824219, |
|
"logps/rejected": -256.4105529785156, |
|
"loss": 0.6402, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.12527017295360565, |
|
"rewards/margins": 0.11860889196395874, |
|
"rewards/rejected": -0.2438790500164032, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.0942161737764983, |
|
"grad_norm": 2.5638356680875156, |
|
"learning_rate": 4.6875000000000004e-06, |
|
"logits/chosen": -2.1489036083221436, |
|
"logits/rejected": -2.081789493560791, |
|
"logps/chosen": -270.69091796875, |
|
"logps/rejected": -291.17962646484375, |
|
"loss": 0.6304, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.1973474770784378, |
|
"rewards/margins": 0.19165988266468048, |
|
"rewards/rejected": -0.3890073299407959, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.10468463752944256, |
|
"grad_norm": 2.7331412184768507, |
|
"learning_rate": 4.9997324926814375e-06, |
|
"logits/chosen": -2.193147659301758, |
|
"logits/rejected": -2.0836679935455322, |
|
"logps/chosen": -303.36590576171875, |
|
"logps/rejected": -320.6068420410156, |
|
"loss": 0.6212, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.31098657846450806, |
|
"rewards/margins": 0.2229224145412445, |
|
"rewards/rejected": -0.5339089632034302, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.10468463752944256, |
|
"eval_logits/chosen": -2.0932729244232178, |
|
"eval_logits/rejected": -2.008643627166748, |
|
"eval_logps/chosen": -298.0505676269531, |
|
"eval_logps/rejected": -299.1472473144531, |
|
"eval_loss": 0.6321468353271484, |
|
"eval_rewards/accuracies": 0.6944444179534912, |
|
"eval_rewards/chosen": -0.3312907814979553, |
|
"eval_rewards/margins": 0.213734969496727, |
|
"eval_rewards/rejected": -0.5450257658958435, |
|
"eval_runtime": 321.7711, |
|
"eval_samples_per_second": 6.216, |
|
"eval_steps_per_second": 0.196, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.11515310128238682, |
|
"grad_norm": 2.3956891782186904, |
|
"learning_rate": 4.996723692767927e-06, |
|
"logits/chosen": -2.1808857917785645, |
|
"logits/rejected": -2.05126690864563, |
|
"logps/chosen": -281.98193359375, |
|
"logps/rejected": -265.5942687988281, |
|
"loss": 0.6236, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.3388732373714447, |
|
"rewards/margins": 0.2357875108718872, |
|
"rewards/rejected": -0.5746607184410095, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.12562156503533106, |
|
"grad_norm": 4.49315717191337, |
|
"learning_rate": 4.9903757462135984e-06, |
|
"logits/chosen": -2.1513657569885254, |
|
"logits/rejected": -2.0812501907348633, |
|
"logps/chosen": -287.4888610839844, |
|
"logps/rejected": -328.67156982421875, |
|
"loss": 0.5888, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.41489553451538086, |
|
"rewards/margins": 0.31543681025505066, |
|
"rewards/rejected": -0.7303323149681091, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.1360900287882753, |
|
"grad_norm": 3.426776723546068, |
|
"learning_rate": 4.980697142834315e-06, |
|
"logits/chosen": -2.1264045238494873, |
|
"logits/rejected": -2.0155797004699707, |
|
"logps/chosen": -381.717041015625, |
|
"logps/rejected": -350.6758728027344, |
|
"loss": 0.5946, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.8191909790039062, |
|
"rewards/margins": 0.3090600371360779, |
|
"rewards/rejected": -1.1282509565353394, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.14655849254121958, |
|
"grad_norm": 3.004217560772131, |
|
"learning_rate": 4.967700826904229e-06, |
|
"logits/chosen": -2.0693180561065674, |
|
"logits/rejected": -2.010124683380127, |
|
"logps/chosen": -363.1732482910156, |
|
"logps/rejected": -400.6826171875, |
|
"loss": 0.5746, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.9419809579849243, |
|
"rewards/margins": 0.41463392972946167, |
|
"rewards/rejected": -1.3566150665283203, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.15702695629416383, |
|
"grad_norm": 4.5725215394059004, |
|
"learning_rate": 4.951404179843963e-06, |
|
"logits/chosen": -2.154370069503784, |
|
"logits/rejected": -2.016098976135254, |
|
"logps/chosen": -362.11627197265625, |
|
"logps/rejected": -358.12823486328125, |
|
"loss": 0.5888, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.7362005710601807, |
|
"rewards/margins": 0.44253450632095337, |
|
"rewards/rejected": -1.1787351369857788, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.16749542004710807, |
|
"grad_norm": 7.788576778528557, |
|
"learning_rate": 4.931828996974498e-06, |
|
"logits/chosen": -2.099804162979126, |
|
"logits/rejected": -1.9289453029632568, |
|
"logps/chosen": -348.2688903808594, |
|
"logps/rejected": -349.8648986816406, |
|
"loss": 0.5524, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.6180304288864136, |
|
"rewards/margins": 0.5966934561729431, |
|
"rewards/rejected": -1.2147239446640015, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.17796388380005235, |
|
"grad_norm": 3.73369227909531, |
|
"learning_rate": 4.909001458367867e-06, |
|
"logits/chosen": -2.005589008331299, |
|
"logits/rejected": -1.8591816425323486, |
|
"logps/chosen": -357.9122619628906, |
|
"logps/rejected": -401.5499267578125, |
|
"loss": 0.5578, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.9827691912651062, |
|
"rewards/margins": 0.5925935506820679, |
|
"rewards/rejected": -1.5753626823425293, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.1884323475529966, |
|
"grad_norm": 5.142786695761387, |
|
"learning_rate": 4.882952093833628e-06, |
|
"logits/chosen": -1.906904935836792, |
|
"logits/rejected": -1.8502197265625, |
|
"logps/chosen": -384.1532287597656, |
|
"logps/rejected": -441.2289123535156, |
|
"loss": 0.5357, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.2132524251937866, |
|
"rewards/margins": 0.6486458778381348, |
|
"rewards/rejected": -1.861898422241211, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.19890081130594087, |
|
"grad_norm": 4.970082596077688, |
|
"learning_rate": 4.853715742087947e-06, |
|
"logits/chosen": -1.7953016757965088, |
|
"logits/rejected": -1.749053716659546, |
|
"logps/chosen": -331.3048400878906, |
|
"logps/rejected": -420.5511779785156, |
|
"loss": 0.5344, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.8520873188972473, |
|
"rewards/margins": 0.6949248313903809, |
|
"rewards/rejected": -1.5470120906829834, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.2093692750588851, |
|
"grad_norm": 4.380709251136357, |
|
"learning_rate": 4.821331504159906e-06, |
|
"logits/chosen": -1.931880235671997, |
|
"logits/rejected": -1.7673060894012451, |
|
"logps/chosen": -395.6458435058594, |
|
"logps/rejected": -393.07818603515625, |
|
"loss": 0.5618, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.8311824798583984, |
|
"rewards/margins": 0.598331868648529, |
|
"rewards/rejected": -1.4295144081115723, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.2093692750588851, |
|
"eval_logits/chosen": -1.7550567388534546, |
|
"eval_logits/rejected": -1.669370174407959, |
|
"eval_logps/chosen": -346.90643310546875, |
|
"eval_logps/rejected": -381.2445983886719, |
|
"eval_loss": 0.5600804686546326, |
|
"eval_rewards/accuracies": 0.7222222089767456, |
|
"eval_rewards/chosen": -0.8198498487472534, |
|
"eval_rewards/margins": 0.5461496114730835, |
|
"eval_rewards/rejected": -1.365999460220337, |
|
"eval_runtime": 319.6036, |
|
"eval_samples_per_second": 6.258, |
|
"eval_steps_per_second": 0.197, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.21983773881182936, |
|
"grad_norm": 4.390548860079351, |
|
"learning_rate": 4.7858426910973435e-06, |
|
"logits/chosen": -1.82965886592865, |
|
"logits/rejected": -1.7543909549713135, |
|
"logps/chosen": -382.2540588378906, |
|
"logps/rejected": -415.6708984375, |
|
"loss": 0.5746, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -1.0466556549072266, |
|
"rewards/margins": 0.42583417892456055, |
|
"rewards/rejected": -1.4724897146224976, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.23030620256477363, |
|
"grad_norm": 5.154433364870237, |
|
"learning_rate": 4.747296766042161e-06, |
|
"logits/chosen": -1.791577696800232, |
|
"logits/rejected": -1.676790475845337, |
|
"logps/chosen": -430.439697265625, |
|
"logps/rejected": -446.4627380371094, |
|
"loss": 0.5594, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.3720312118530273, |
|
"rewards/margins": 0.6089810132980347, |
|
"rewards/rejected": -1.9810121059417725, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.24077466631771788, |
|
"grad_norm": 5.067287557540918, |
|
"learning_rate": 4.705745280752586e-06, |
|
"logits/chosen": -1.6005672216415405, |
|
"logits/rejected": -1.5293024778366089, |
|
"logps/chosen": -364.32769775390625, |
|
"logps/rejected": -393.8982849121094, |
|
"loss": 0.5585, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.1689074039459229, |
|
"rewards/margins": 0.4595082402229309, |
|
"rewards/rejected": -1.6284157037734985, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.2512431300706621, |
|
"grad_norm": 4.187057753178847, |
|
"learning_rate": 4.661243806657256e-06, |
|
"logits/chosen": -1.6649366617202759, |
|
"logits/rejected": -1.6055676937103271, |
|
"logps/chosen": -377.30517578125, |
|
"logps/rejected": -407.87017822265625, |
|
"loss": 0.5306, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.2085318565368652, |
|
"rewards/margins": 0.6119499802589417, |
|
"rewards/rejected": -1.8204820156097412, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.26171159382360637, |
|
"grad_norm": 4.31385003289797, |
|
"learning_rate": 4.613851860533367e-06, |
|
"logits/chosen": -1.6592628955841064, |
|
"logits/rejected": -1.5831575393676758, |
|
"logps/chosen": -392.666259765625, |
|
"logps/rejected": -415.00933837890625, |
|
"loss": 0.5341, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.2613078355789185, |
|
"rewards/margins": 0.6352987289428711, |
|
"rewards/rejected": -1.896606683731079, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.2721800575765506, |
|
"grad_norm": 6.08274430115293, |
|
"learning_rate": 4.563632824908252e-06, |
|
"logits/chosen": -1.5671354532241821, |
|
"logits/rejected": -1.4325566291809082, |
|
"logps/chosen": -446.5708923339844, |
|
"logps/rejected": -492.8990173339844, |
|
"loss": 0.5228, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.7604526281356812, |
|
"rewards/margins": 0.6336302161216736, |
|
"rewards/rejected": -2.394083261489868, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.2826485213294949, |
|
"grad_norm": 4.56724778147284, |
|
"learning_rate": 4.510653863290871e-06, |
|
"logits/chosen": -1.456993579864502, |
|
"logits/rejected": -1.3645284175872803, |
|
"logps/chosen": -401.00244140625, |
|
"logps/rejected": -461.67816162109375, |
|
"loss": 0.5277, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.2706773281097412, |
|
"rewards/margins": 0.8170326352119446, |
|
"rewards/rejected": -2.087709903717041, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.29311698508243916, |
|
"grad_norm": 3.947817632488332, |
|
"learning_rate": 4.454985830346574e-06, |
|
"logits/chosen": -1.5088273286819458, |
|
"logits/rejected": -1.3900407552719116, |
|
"logps/chosen": -385.47772216796875, |
|
"logps/rejected": -426.82049560546875, |
|
"loss": 0.5485, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.9744614362716675, |
|
"rewards/margins": 0.75079345703125, |
|
"rewards/rejected": -1.725255012512207, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.3035854488353834, |
|
"grad_norm": 4.730568661724723, |
|
"learning_rate": 4.396703177135262e-06, |
|
"logits/chosen": -1.517740249633789, |
|
"logits/rejected": -1.351360559463501, |
|
"logps/chosen": -388.90789794921875, |
|
"logps/rejected": -416.22052001953125, |
|
"loss": 0.526, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.0588579177856445, |
|
"rewards/margins": 0.6205722093582153, |
|
"rewards/rejected": -1.6794300079345703, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.31405391258832765, |
|
"grad_norm": 5.829501633867066, |
|
"learning_rate": 4.335883851539693e-06, |
|
"logits/chosen": -1.2372510433197021, |
|
"logits/rejected": -1.2240632772445679, |
|
"logps/chosen": -386.8778381347656, |
|
"logps/rejected": -448.6904296875, |
|
"loss": 0.54, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.441900610923767, |
|
"rewards/margins": 0.6276523470878601, |
|
"rewards/rejected": -2.0695528984069824, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.31405391258832765, |
|
"eval_logits/chosen": -1.1714633703231812, |
|
"eval_logits/rejected": -1.0704221725463867, |
|
"eval_logps/chosen": -417.12750244140625, |
|
"eval_logps/rejected": -478.07476806640625, |
|
"eval_loss": 0.5264545679092407, |
|
"eval_rewards/accuracies": 0.7460317611694336, |
|
"eval_rewards/chosen": -1.5220601558685303, |
|
"eval_rewards/margins": 0.8122406601905823, |
|
"eval_rewards/rejected": -2.334300994873047, |
|
"eval_runtime": 318.2186, |
|
"eval_samples_per_second": 6.285, |
|
"eval_steps_per_second": 0.198, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.3245223763412719, |
|
"grad_norm": 4.0412766328088585, |
|
"learning_rate": 4.2726091940171055e-06, |
|
"logits/chosen": -1.3419673442840576, |
|
"logits/rejected": -1.1779625415802002, |
|
"logps/chosen": -421.1036682128906, |
|
"logps/rejected": -466.26275634765625, |
|
"loss": 0.5369, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -1.4869836568832397, |
|
"rewards/margins": 0.8415184020996094, |
|
"rewards/rejected": -2.3285021781921387, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.33499084009421615, |
|
"grad_norm": 7.228642390991299, |
|
"learning_rate": 4.206963828813555e-06, |
|
"logits/chosen": -1.3606576919555664, |
|
"logits/rejected": -1.266966700553894, |
|
"logps/chosen": -374.7814025878906, |
|
"logps/rejected": -450.57489013671875, |
|
"loss": 0.5399, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.3771132230758667, |
|
"rewards/margins": 0.7092502117156982, |
|
"rewards/rejected": -2.0863633155822754, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.34545930384716045, |
|
"grad_norm": 5.998674043872773, |
|
"learning_rate": 4.139035550786495e-06, |
|
"logits/chosen": -1.305854082107544, |
|
"logits/rejected": -1.2263256311416626, |
|
"logps/chosen": -429.99188232421875, |
|
"logps/rejected": -497.3521423339844, |
|
"loss": 0.5206, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.8139232397079468, |
|
"rewards/margins": 0.7226920127868652, |
|
"rewards/rejected": -2.5366153717041016, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.3559277676001047, |
|
"grad_norm": 6.244527338235992, |
|
"learning_rate": 4.068915207986931e-06, |
|
"logits/chosen": -1.0698177814483643, |
|
"logits/rejected": -0.9774864315986633, |
|
"logps/chosen": -438.493408203125, |
|
"logps/rejected": -487.9413146972656, |
|
"loss": 0.5553, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.8784738779067993, |
|
"rewards/margins": 0.7664145231246948, |
|
"rewards/rejected": -2.644888401031494, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.36639623135304894, |
|
"grad_norm": 6.05372732646654, |
|
"learning_rate": 3.996696580158211e-06, |
|
"logits/chosen": -1.419528603553772, |
|
"logits/rejected": -1.3026950359344482, |
|
"logps/chosen": -399.3412170410156, |
|
"logps/rejected": -459.59063720703125, |
|
"loss": 0.5144, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.4473296403884888, |
|
"rewards/margins": 0.7528419494628906, |
|
"rewards/rejected": -2.2001712322235107, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.3768646951059932, |
|
"grad_norm": 7.477190042107064, |
|
"learning_rate": 3.922476253313921e-06, |
|
"logits/chosen": -1.3192278146743774, |
|
"logits/rejected": -1.2596288919448853, |
|
"logps/chosen": -388.9505920410156, |
|
"logps/rejected": -450.23748779296875, |
|
"loss": 0.476, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.3493094444274902, |
|
"rewards/margins": 0.7934570908546448, |
|
"rewards/rejected": -2.1427664756774902, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.38733315885893743, |
|
"grad_norm": 5.411077344670459, |
|
"learning_rate": 3.846353490562664e-06, |
|
"logits/chosen": -1.3181861639022827, |
|
"logits/rejected": -1.2644492387771606, |
|
"logps/chosen": -406.28851318359375, |
|
"logps/rejected": -484.81646728515625, |
|
"loss": 0.5189, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.618009328842163, |
|
"rewards/margins": 0.912137508392334, |
|
"rewards/rejected": -2.530146837234497, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.39780162261188173, |
|
"grad_norm": 6.255769900471538, |
|
"learning_rate": 3.768430099352445e-06, |
|
"logits/chosen": -1.2576202154159546, |
|
"logits/rejected": -1.1985712051391602, |
|
"logps/chosen": -426.02978515625, |
|
"logps/rejected": -499.4483337402344, |
|
"loss": 0.5291, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.724285364151001, |
|
"rewards/margins": 0.7168464660644531, |
|
"rewards/rejected": -2.441131830215454, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.408270086364826, |
|
"grad_norm": 5.374401251386735, |
|
"learning_rate": 3.6888102953122307e-06, |
|
"logits/chosen": -1.3139002323150635, |
|
"logits/rejected": -1.2482521533966064, |
|
"logps/chosen": -366.7402648925781, |
|
"logps/rejected": -402.67144775390625, |
|
"loss": 0.5445, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.0828481912612915, |
|
"rewards/margins": 0.5530051589012146, |
|
"rewards/rejected": -1.6358531713485718, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.4187385501177702, |
|
"grad_norm": 5.700868192112112, |
|
"learning_rate": 3.607600562872785e-06, |
|
"logits/chosen": -1.2806731462478638, |
|
"logits/rejected": -1.2076570987701416, |
|
"logps/chosen": -397.6805725097656, |
|
"logps/rejected": -457.99578857421875, |
|
"loss": 0.5261, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.534794807434082, |
|
"rewards/margins": 0.6010990738868713, |
|
"rewards/rejected": -2.1358938217163086, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.4187385501177702, |
|
"eval_logits/chosen": -1.201329231262207, |
|
"eval_logits/rejected": -1.1013988256454468, |
|
"eval_logps/chosen": -430.45263671875, |
|
"eval_logps/rejected": -497.2759094238281, |
|
"eval_loss": 0.5082111954689026, |
|
"eval_rewards/accuracies": 0.7539682388305664, |
|
"eval_rewards/chosen": -1.6553115844726562, |
|
"eval_rewards/margins": 0.8710008859634399, |
|
"eval_rewards/rejected": -2.5263123512268066, |
|
"eval_runtime": 314.7811, |
|
"eval_samples_per_second": 6.354, |
|
"eval_steps_per_second": 0.2, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.42920701387071447, |
|
"grad_norm": 4.78368697455832, |
|
"learning_rate": 3.5249095128531863e-06, |
|
"logits/chosen": -1.1789991855621338, |
|
"logits/rejected": -1.0596911907196045, |
|
"logps/chosen": -449.33709716796875, |
|
"logps/rejected": -515.829833984375, |
|
"loss": 0.5275, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -1.8276984691619873, |
|
"rewards/margins": 0.9437017440795898, |
|
"rewards/rejected": -2.7714004516601562, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.4396754776236587, |
|
"grad_norm": 5.718886468906387, |
|
"learning_rate": 3.4408477372034743e-06, |
|
"logits/chosen": -1.3089560270309448, |
|
"logits/rejected": -1.111132025718689, |
|
"logps/chosen": -451.94354248046875, |
|
"logps/rejected": -472.0818786621094, |
|
"loss": 0.5212, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.6895692348480225, |
|
"rewards/margins": 0.77433842420578, |
|
"rewards/rejected": -2.4639077186584473, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.45014394137660296, |
|
"grad_norm": 5.4739752222845, |
|
"learning_rate": 3.355527661097728e-06, |
|
"logits/chosen": -1.2457327842712402, |
|
"logits/rejected": -1.1701027154922485, |
|
"logps/chosen": -408.38629150390625, |
|
"logps/rejected": -495.34906005859375, |
|
"loss": 0.5228, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.8043445348739624, |
|
"rewards/margins": 0.8327828645706177, |
|
"rewards/rejected": -2.63712739944458, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.46061240512954726, |
|
"grad_norm": 6.85581768096026, |
|
"learning_rate": 3.269063392575352e-06, |
|
"logits/chosen": -1.2453696727752686, |
|
"logits/rejected": -1.202413558959961, |
|
"logps/chosen": -399.9681091308594, |
|
"logps/rejected": -447.87744140625, |
|
"loss": 0.5256, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.5209221839904785, |
|
"rewards/margins": 0.6652868986129761, |
|
"rewards/rejected": -2.186208963394165, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.4710808688824915, |
|
"grad_norm": 5.776991001419176, |
|
"learning_rate": 3.181570569931697e-06, |
|
"logits/chosen": -1.3612130880355835, |
|
"logits/rejected": -1.2961633205413818, |
|
"logps/chosen": -399.5633239746094, |
|
"logps/rejected": -510.95513916015625, |
|
"loss": 0.5105, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.5496407747268677, |
|
"rewards/margins": 0.9577849507331848, |
|
"rewards/rejected": -2.507425308227539, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.48154933263543576, |
|
"grad_norm": 5.051312773178446, |
|
"learning_rate": 3.09316620706208e-06, |
|
"logits/chosen": -1.2932283878326416, |
|
"logits/rejected": -1.2328197956085205, |
|
"logps/chosen": -472.6172790527344, |
|
"logps/rejected": -538.5615234375, |
|
"loss": 0.5286, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.122169256210327, |
|
"rewards/margins": 0.7884066104888916, |
|
"rewards/rejected": -2.9105758666992188, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.49201779638838, |
|
"grad_norm": 4.905312405134056, |
|
"learning_rate": 3.0039685369660785e-06, |
|
"logits/chosen": -1.299459457397461, |
|
"logits/rejected": -1.164813756942749, |
|
"logps/chosen": -433.37554931640625, |
|
"logps/rejected": -470.384521484375, |
|
"loss": 0.4942, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.7430375814437866, |
|
"rewards/margins": 0.6865721940994263, |
|
"rewards/rejected": -2.429609775543213, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.5024862601413242, |
|
"grad_norm": 6.146340380005326, |
|
"learning_rate": 2.91409685362137e-06, |
|
"logits/chosen": -1.2403475046157837, |
|
"logits/rejected": -1.0839545726776123, |
|
"logps/chosen": -513.665771484375, |
|
"logps/rejected": -583.8973388671875, |
|
"loss": 0.5046, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -2.4166836738586426, |
|
"rewards/margins": 1.000232219696045, |
|
"rewards/rejected": -3.4169158935546875, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.5129547238942685, |
|
"grad_norm": 5.112445065707855, |
|
"learning_rate": 2.8236713524386085e-06, |
|
"logits/chosen": -1.2714743614196777, |
|
"logits/rejected": -1.0941110849380493, |
|
"logps/chosen": -532.9962158203125, |
|
"logps/rejected": -588.236328125, |
|
"loss": 0.5025, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.491058826446533, |
|
"rewards/margins": 0.900057315826416, |
|
"rewards/rejected": -3.39111590385437, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.5234231876472127, |
|
"grad_norm": 5.578978194196055, |
|
"learning_rate": 2.7328129695107205e-06, |
|
"logits/chosen": -1.2111141681671143, |
|
"logits/rejected": -1.1049137115478516, |
|
"logps/chosen": -476.11932373046875, |
|
"logps/rejected": -544.0899658203125, |
|
"loss": 0.5107, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.1048712730407715, |
|
"rewards/margins": 0.9492176175117493, |
|
"rewards/rejected": -3.054089069366455, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.5234231876472127, |
|
"eval_logits/chosen": -1.0955979824066162, |
|
"eval_logits/rejected": -0.9851866364479065, |
|
"eval_logps/chosen": -509.9847717285156, |
|
"eval_logps/rejected": -587.1475830078125, |
|
"eval_loss": 0.5058528184890747, |
|
"eval_rewards/accuracies": 0.75, |
|
"eval_rewards/chosen": -2.4506328105926514, |
|
"eval_rewards/margins": 0.9743964076042175, |
|
"eval_rewards/rejected": -3.4250295162200928, |
|
"eval_runtime": 306.1826, |
|
"eval_samples_per_second": 6.532, |
|
"eval_steps_per_second": 0.206, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.533891651400157, |
|
"grad_norm": 5.171650655838442, |
|
"learning_rate": 2.641643219871597e-06, |
|
"logits/chosen": -1.1813862323760986, |
|
"logits/rejected": -1.0965768098831177, |
|
"logps/chosen": -508.13092041015625, |
|
"logps/rejected": -570.53759765625, |
|
"loss": 0.4794, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.4878056049346924, |
|
"rewards/margins": 0.8628519773483276, |
|
"rewards/rejected": -3.3506579399108887, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.5443601151531012, |
|
"grad_norm": 6.785478355965952, |
|
"learning_rate": 2.5502840349805074e-06, |
|
"logits/chosen": -1.1492969989776611, |
|
"logits/rejected": -1.0285675525665283, |
|
"logps/chosen": -577.2451171875, |
|
"logps/rejected": -618.67236328125, |
|
"loss": 0.536, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -3.018739938735962, |
|
"rewards/margins": 0.8701656460762024, |
|
"rewards/rejected": -3.8889052867889404, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.5548285789060455, |
|
"grad_norm": 5.9061132192029975, |
|
"learning_rate": 2.4588575996495797e-06, |
|
"logits/chosen": -1.1598259210586548, |
|
"logits/rejected": -1.0490505695343018, |
|
"logps/chosen": -515.8734130859375, |
|
"logps/rejected": -585.2592163085938, |
|
"loss": 0.5089, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.4302353858947754, |
|
"rewards/margins": 0.9659484624862671, |
|
"rewards/rejected": -3.396183729171753, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.5652970426589898, |
|
"grad_norm": 4.6033180245939604, |
|
"learning_rate": 2.367486188632446e-06, |
|
"logits/chosen": -1.1840062141418457, |
|
"logits/rejected": -1.05405592918396, |
|
"logps/chosen": -469.9341735839844, |
|
"logps/rejected": -503.66156005859375, |
|
"loss": 0.5311, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.1539134979248047, |
|
"rewards/margins": 0.7668038606643677, |
|
"rewards/rejected": -2.920717239379883, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.575765506411934, |
|
"grad_norm": 4.876993529338895, |
|
"learning_rate": 2.276292003092593e-06, |
|
"logits/chosen": -1.1908996105194092, |
|
"logits/rejected": -1.03009033203125, |
|
"logps/chosen": -489.2470703125, |
|
"logps/rejected": -519.9906616210938, |
|
"loss": 0.5265, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -2.272523880004883, |
|
"rewards/margins": 0.6718829870223999, |
|
"rewards/rejected": -2.9444069862365723, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.5862339701648783, |
|
"grad_norm": 6.4140589895092734, |
|
"learning_rate": 2.1853970071701415e-06, |
|
"logits/chosen": -1.0713722705841064, |
|
"logits/rejected": -0.9895181655883789, |
|
"logps/chosen": -517.431396484375, |
|
"logps/rejected": -573.0247802734375, |
|
"loss": 0.527, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.533311605453491, |
|
"rewards/margins": 0.8953849077224731, |
|
"rewards/rejected": -3.428696393966675, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.5967024339178225, |
|
"grad_norm": 6.376011574902463, |
|
"learning_rate": 2.0949227648656194e-06, |
|
"logits/chosen": -1.129098653793335, |
|
"logits/rejected": -1.0245158672332764, |
|
"logps/chosen": -559.3487548828125, |
|
"logps/rejected": -636.18310546875, |
|
"loss": 0.5053, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -2.986375331878662, |
|
"rewards/margins": 0.8951998949050903, |
|
"rewards/rejected": -3.8815758228302, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.6071708976707668, |
|
"grad_norm": 5.245103814265102, |
|
"learning_rate": 2.00499027745888e-06, |
|
"logits/chosen": -1.081526517868042, |
|
"logits/rejected": -0.9703742861747742, |
|
"logps/chosen": -521.400146484375, |
|
"logps/rejected": -581.4735107421875, |
|
"loss": 0.5167, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.8668112754821777, |
|
"rewards/margins": 0.8452316522598267, |
|
"rewards/rejected": -3.712043046951294, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.6176393614237111, |
|
"grad_norm": 5.7527775937649, |
|
"learning_rate": 1.915719821680624e-06, |
|
"logits/chosen": -1.287595510482788, |
|
"logits/rejected": -1.2315866947174072, |
|
"logps/chosen": -470.63995361328125, |
|
"logps/rejected": -567.0520629882812, |
|
"loss": 0.5018, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.246957778930664, |
|
"rewards/margins": 0.9269870519638062, |
|
"rewards/rejected": -3.1739444732666016, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.6281078251766553, |
|
"grad_norm": 4.552980884850473, |
|
"learning_rate": 1.8272307888529276e-06, |
|
"logits/chosen": -1.1632342338562012, |
|
"logits/rejected": -1.0386791229248047, |
|
"logps/chosen": -524.4891357421875, |
|
"logps/rejected": -568.3989868164062, |
|
"loss": 0.4851, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.2760519981384277, |
|
"rewards/margins": 0.7800448536872864, |
|
"rewards/rejected": -3.0560970306396484, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.6281078251766553, |
|
"eval_logits/chosen": -1.107803463935852, |
|
"eval_logits/rejected": -0.9969872832298279, |
|
"eval_logps/chosen": -492.1783447265625, |
|
"eval_logps/rejected": -567.8048706054688, |
|
"eval_loss": 0.5023476481437683, |
|
"eval_rewards/accuracies": 0.7678571343421936, |
|
"eval_rewards/chosen": -2.272569179534912, |
|
"eval_rewards/margins": 0.9590328931808472, |
|
"eval_rewards/rejected": -3.231602191925049, |
|
"eval_runtime": 309.4155, |
|
"eval_samples_per_second": 6.464, |
|
"eval_steps_per_second": 0.204, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.6385762889295996, |
|
"grad_norm": 5.93434046945835, |
|
"learning_rate": 1.739641525213929e-06, |
|
"logits/chosen": -1.1899484395980835, |
|
"logits/rejected": -1.128395438194275, |
|
"logps/chosen": -505.1640625, |
|
"logps/rejected": -569.9411010742188, |
|
"loss": 0.5131, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -2.3301897048950195, |
|
"rewards/margins": 1.009691834449768, |
|
"rewards/rejected": -3.339881420135498, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.6490447526825438, |
|
"grad_norm": 6.7592991694860345, |
|
"learning_rate": 1.6530691736402317e-06, |
|
"logits/chosen": -1.3041086196899414, |
|
"logits/rejected": -1.1299916505813599, |
|
"logps/chosen": -520.1345825195312, |
|
"logps/rejected": -570.2188110351562, |
|
"loss": 0.4665, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -2.1966605186462402, |
|
"rewards/margins": 1.0572277307510376, |
|
"rewards/rejected": -3.253887891769409, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.6595132164354881, |
|
"grad_norm": 6.939788728853941, |
|
"learning_rate": 1.5676295169786864e-06, |
|
"logits/chosen": -1.2350585460662842, |
|
"logits/rejected": -1.1085925102233887, |
|
"logps/chosen": -506.601806640625, |
|
"logps/rejected": -558.4306640625, |
|
"loss": 0.4883, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.2913689613342285, |
|
"rewards/margins": 0.9455882906913757, |
|
"rewards/rejected": -3.23695707321167, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.6699816801884323, |
|
"grad_norm": 6.885905876648702, |
|
"learning_rate": 1.4834368231970922e-06, |
|
"logits/chosen": -1.1120684146881104, |
|
"logits/rejected": -0.9865466952323914, |
|
"logps/chosen": -484.1168518066406, |
|
"logps/rejected": -577.2811279296875, |
|
"loss": 0.4707, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -2.489487648010254, |
|
"rewards/margins": 1.0948190689086914, |
|
"rewards/rejected": -3.5843067169189453, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.6804501439413766, |
|
"grad_norm": 5.355709320338061, |
|
"learning_rate": 1.4006036925609245e-06, |
|
"logits/chosen": -1.2093435525894165, |
|
"logits/rejected": -1.1428296566009521, |
|
"logps/chosen": -484.78680419921875, |
|
"logps/rejected": -564.5701293945312, |
|
"loss": 0.5169, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.2595038414001465, |
|
"rewards/margins": 0.8043657541275024, |
|
"rewards/rejected": -3.0638692378997803, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.6909186076943209, |
|
"grad_norm": 5.122013631042651, |
|
"learning_rate": 1.3192409070404582e-06, |
|
"logits/chosen": -1.2926921844482422, |
|
"logits/rejected": -1.1912837028503418, |
|
"logps/chosen": -462.49700927734375, |
|
"logps/rejected": -535.156005859375, |
|
"loss": 0.4902, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.105215311050415, |
|
"rewards/margins": 0.9164140820503235, |
|
"rewards/rejected": -3.0216293334960938, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.7013870714472651, |
|
"grad_norm": 7.049199430649973, |
|
"learning_rate": 1.2394572821496953e-06, |
|
"logits/chosen": -1.144698977470398, |
|
"logits/rejected": -0.9912746548652649, |
|
"logps/chosen": -518.4602661132812, |
|
"logps/rejected": -603.5948486328125, |
|
"loss": 0.4773, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.6358141899108887, |
|
"rewards/margins": 1.113600492477417, |
|
"rewards/rejected": -3.7494144439697266, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.7118555352002094, |
|
"grad_norm": 7.264843779332727, |
|
"learning_rate": 1.1613595214152713e-06, |
|
"logits/chosen": -1.1309657096862793, |
|
"logits/rejected": -1.0589998960494995, |
|
"logps/chosen": -539.7699584960938, |
|
"logps/rejected": -620.6878662109375, |
|
"loss": 0.5266, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.733842134475708, |
|
"rewards/margins": 0.9637987017631531, |
|
"rewards/rejected": -3.6976406574249268, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.7223239989531536, |
|
"grad_norm": 5.030210764921467, |
|
"learning_rate": 1.0850520736699362e-06, |
|
"logits/chosen": -1.288641333580017, |
|
"logits/rejected": -1.134615182876587, |
|
"logps/chosen": -510.186767578125, |
|
"logps/rejected": -550.937744140625, |
|
"loss": 0.4574, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.3800137042999268, |
|
"rewards/margins": 0.9033814668655396, |
|
"rewards/rejected": -3.2833950519561768, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.7327924627060979, |
|
"grad_norm": 5.9328337595528495, |
|
"learning_rate": 1.0106369933615043e-06, |
|
"logits/chosen": -1.1679879426956177, |
|
"logits/rejected": -1.1002038717269897, |
|
"logps/chosen": -487.10821533203125, |
|
"logps/rejected": -576.1431884765625, |
|
"loss": 0.4681, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.3741798400878906, |
|
"rewards/margins": 0.9901224970817566, |
|
"rewards/rejected": -3.364302158355713, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.7327924627060979, |
|
"eval_logits/chosen": -1.119031548500061, |
|
"eval_logits/rejected": -1.006774663925171, |
|
"eval_logps/chosen": -496.6231994628906, |
|
"eval_logps/rejected": -581.5197143554688, |
|
"eval_loss": 0.49932044744491577, |
|
"eval_rewards/accuracies": 0.7678571343421936, |
|
"eval_rewards/chosen": -2.3170175552368164, |
|
"eval_rewards/margins": 1.0517328977584839, |
|
"eval_rewards/rejected": -3.3687500953674316, |
|
"eval_runtime": 280.6333, |
|
"eval_samples_per_second": 7.127, |
|
"eval_steps_per_second": 0.224, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.7432609264590422, |
|
"grad_norm": 5.3428016079168215, |
|
"learning_rate": 9.382138040640714e-07, |
|
"logits/chosen": -1.1798994541168213, |
|
"logits/rejected": -1.021723985671997, |
|
"logps/chosen": -505.2616271972656, |
|
"logps/rejected": -571.1856689453125, |
|
"loss": 0.547, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.396669864654541, |
|
"rewards/margins": 0.9654728174209595, |
|
"rewards/rejected": -3.362142562866211, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.7537293902119864, |
|
"grad_norm": 5.0438516064442505, |
|
"learning_rate": 8.678793653740633e-07, |
|
"logits/chosen": -1.3297260999679565, |
|
"logits/rejected": -1.1736423969268799, |
|
"logps/chosen": -527.0916748046875, |
|
"logps/rejected": -579.3074340820312, |
|
"loss": 0.478, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.070934534072876, |
|
"rewards/margins": 1.0328184366226196, |
|
"rewards/rejected": -3.103752851486206, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.7641978539649307, |
|
"grad_norm": 5.464567536353577, |
|
"learning_rate": 7.997277433690984e-07, |
|
"logits/chosen": -1.2094228267669678, |
|
"logits/rejected": -1.076755404472351, |
|
"logps/chosen": -460.71563720703125, |
|
"logps/rejected": -538.8247680664062, |
|
"loss": 0.4966, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.2227933406829834, |
|
"rewards/margins": 0.9530885815620422, |
|
"rewards/rejected": -3.175881862640381, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.7746663177178749, |
|
"grad_norm": 5.377248875033102, |
|
"learning_rate": 7.338500848029603e-07, |
|
"logits/chosen": -1.1969387531280518, |
|
"logits/rejected": -1.0555990934371948, |
|
"logps/chosen": -529.8873291015625, |
|
"logps/rejected": -568.1295166015625, |
|
"loss": 0.5065, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.4915201663970947, |
|
"rewards/margins": 0.9319826364517212, |
|
"rewards/rejected": -3.4235024452209473, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.7851347814708192, |
|
"grad_norm": 5.342695362281337, |
|
"learning_rate": 6.70334495204884e-07, |
|
"logits/chosen": -1.0425455570220947, |
|
"logits/rejected": -0.9723536372184753, |
|
"logps/chosen": -487.095947265625, |
|
"logps/rejected": -599.2386474609375, |
|
"loss": 0.4857, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -2.589409112930298, |
|
"rewards/margins": 1.040583848953247, |
|
"rewards/rejected": -3.629992723464966, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.7956032452237635, |
|
"grad_norm": 4.96165517698307, |
|
"learning_rate": 6.092659210462232e-07, |
|
"logits/chosen": -1.172639012336731, |
|
"logits/rejected": -1.0221275091171265, |
|
"logps/chosen": -510.41253662109375, |
|
"logps/rejected": -564.8956909179688, |
|
"loss": 0.5034, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.53062105178833, |
|
"rewards/margins": 0.8949772119522095, |
|
"rewards/rejected": -3.42559814453125, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.8060717089767077, |
|
"grad_norm": 4.707914305263311, |
|
"learning_rate": 5.507260361320738e-07, |
|
"logits/chosen": -1.2693157196044922, |
|
"logits/rejected": -1.2365710735321045, |
|
"logps/chosen": -522.3062744140625, |
|
"logps/rejected": -619.4640502929688, |
|
"loss": 0.4736, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.363852024078369, |
|
"rewards/margins": 0.9652940034866333, |
|
"rewards/rejected": -3.329145908355713, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.816540172729652, |
|
"grad_norm": 5.592342234404946, |
|
"learning_rate": 4.947931323697983e-07, |
|
"logits/chosen": -1.2886607646942139, |
|
"logits/rejected": -1.0684127807617188, |
|
"logps/chosen": -534.323486328125, |
|
"logps/rejected": -556.2030639648438, |
|
"loss": 0.5036, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.277695894241333, |
|
"rewards/margins": 0.8380621075630188, |
|
"rewards/rejected": -3.115757942199707, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.8270086364825961, |
|
"grad_norm": 6.076661367759154, |
|
"learning_rate": 4.4154201506053985e-07, |
|
"logits/chosen": -1.1675662994384766, |
|
"logits/rejected": -1.065953254699707, |
|
"logps/chosen": -489.9781188964844, |
|
"logps/rejected": -578.2179565429688, |
|
"loss": 0.5128, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -2.482016086578369, |
|
"rewards/margins": 0.9305012822151184, |
|
"rewards/rejected": -3.4125168323516846, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.8374771002355405, |
|
"grad_norm": 6.131830839970953, |
|
"learning_rate": 3.910439028537638e-07, |
|
"logits/chosen": -1.1610690355300903, |
|
"logits/rejected": -1.1072094440460205, |
|
"logps/chosen": -469.524658203125, |
|
"logps/rejected": -585.119140625, |
|
"loss": 0.4852, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.3968334197998047, |
|
"rewards/margins": 1.120810866355896, |
|
"rewards/rejected": -3.5176444053649902, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.8374771002355405, |
|
"eval_logits/chosen": -1.1353023052215576, |
|
"eval_logits/rejected": -1.0236940383911133, |
|
"eval_logps/chosen": -504.6183166503906, |
|
"eval_logps/rejected": -585.8155517578125, |
|
"eval_loss": 0.49497368931770325, |
|
"eval_rewards/accuracies": 0.773809552192688, |
|
"eval_rewards/chosen": -2.396967887878418, |
|
"eval_rewards/margins": 1.0147408246994019, |
|
"eval_rewards/rejected": -3.4117088317871094, |
|
"eval_runtime": 274.6399, |
|
"eval_samples_per_second": 7.282, |
|
"eval_steps_per_second": 0.229, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.8479455639884846, |
|
"grad_norm": 4.892138077626307, |
|
"learning_rate": 3.4336633249862084e-07, |
|
"logits/chosen": -1.209530234336853, |
|
"logits/rejected": -1.0349524021148682, |
|
"logps/chosen": -532.2420043945312, |
|
"logps/rejected": -591.3436279296875, |
|
"loss": 0.5065, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.4228711128234863, |
|
"rewards/margins": 0.9818238019943237, |
|
"rewards/rejected": -3.4046947956085205, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.8584140277414289, |
|
"grad_norm": 6.133736149907814, |
|
"learning_rate": 2.98573068519539e-07, |
|
"logits/chosen": -1.2474277019500732, |
|
"logits/rejected": -1.1728675365447998, |
|
"logps/chosen": -516.3637084960938, |
|
"logps/rejected": -589.2227783203125, |
|
"loss": 0.5066, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.5452017784118652, |
|
"rewards/margins": 0.8416748046875, |
|
"rewards/rejected": -3.3868765830993652, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.8688824914943732, |
|
"grad_norm": 4.448941268784857, |
|
"learning_rate": 2.5672401793681854e-07, |
|
"logits/chosen": -1.1796165704727173, |
|
"logits/rejected": -1.1301778554916382, |
|
"logps/chosen": -493.81610107421875, |
|
"logps/rejected": -576.2391357421875, |
|
"loss": 0.5119, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -2.471508502960205, |
|
"rewards/margins": 0.8166677355766296, |
|
"rewards/rejected": -3.2881767749786377, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.8793509552473174, |
|
"grad_norm": 6.561312283813159, |
|
"learning_rate": 2.178751501463036e-07, |
|
"logits/chosen": -1.2200143337249756, |
|
"logits/rejected": -1.120086431503296, |
|
"logps/chosen": -525.2439575195312, |
|
"logps/rejected": -629.3662719726562, |
|
"loss": 0.4823, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.501779317855835, |
|
"rewards/margins": 1.0803827047348022, |
|
"rewards/rejected": -3.5821621417999268, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.8898194190002617, |
|
"grad_norm": 5.675008200582371, |
|
"learning_rate": 1.820784220652766e-07, |
|
"logits/chosen": -1.148503065109253, |
|
"logits/rejected": -0.9931659698486328, |
|
"logps/chosen": -511.488037109375, |
|
"logps/rejected": -576.7986450195312, |
|
"loss": 0.4672, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -2.456411361694336, |
|
"rewards/margins": 0.9856443405151367, |
|
"rewards/rejected": -3.4420554637908936, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.9002878827532059, |
|
"grad_norm": 5.847430828911881, |
|
"learning_rate": 1.4938170864468636e-07, |
|
"logits/chosen": -1.2107369899749756, |
|
"logits/rejected": -1.0609266757965088, |
|
"logps/chosen": -531.3970947265625, |
|
"logps/rejected": -590.2992553710938, |
|
"loss": 0.4693, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.60589861869812, |
|
"rewards/margins": 0.8416641354560852, |
|
"rewards/rejected": -3.4475624561309814, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.9107563465061502, |
|
"grad_norm": 5.678184676582534, |
|
"learning_rate": 1.1982873884064466e-07, |
|
"logits/chosen": -1.24862539768219, |
|
"logits/rejected": -1.0584386587142944, |
|
"logps/chosen": -544.3020629882812, |
|
"logps/rejected": -598.3751220703125, |
|
"loss": 0.4947, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.632056713104248, |
|
"rewards/margins": 1.0834977626800537, |
|
"rewards/rejected": -3.7155539989471436, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.9212248102590945, |
|
"grad_norm": 5.3183871343635944, |
|
"learning_rate": 9.345903713082305e-08, |
|
"logits/chosen": -1.2423183917999268, |
|
"logits/rejected": -1.1057523488998413, |
|
"logps/chosen": -514.6922607421875, |
|
"logps/rejected": -574.3685302734375, |
|
"loss": 0.4801, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.4431517124176025, |
|
"rewards/margins": 0.9935353994369507, |
|
"rewards/rejected": -3.4366869926452637, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.9316932740120387, |
|
"grad_norm": 5.735081148749753, |
|
"learning_rate": 7.030787065396866e-08, |
|
"logits/chosen": -1.1413437128067017, |
|
"logits/rejected": -1.0328372716903687, |
|
"logps/chosen": -508.3773498535156, |
|
"logps/rejected": -619.8019409179688, |
|
"loss": 0.4938, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.5983500480651855, |
|
"rewards/margins": 1.2036244869232178, |
|
"rewards/rejected": -3.8019745349884033, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.942161737764983, |
|
"grad_norm": 6.618746821621782, |
|
"learning_rate": 5.0406202043228604e-08, |
|
"logits/chosen": -1.045906901359558, |
|
"logits/rejected": -0.9974561929702759, |
|
"logps/chosen": -517.8667602539062, |
|
"logps/rejected": -669.0172729492188, |
|
"loss": 0.4907, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.5165352821350098, |
|
"rewards/margins": 1.2452183961868286, |
|
"rewards/rejected": -3.761753559112549, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.942161737764983, |
|
"eval_logits/chosen": -1.1023893356323242, |
|
"eval_logits/rejected": -0.9901031255722046, |
|
"eval_logps/chosen": -521.706298828125, |
|
"eval_logps/rejected": -608.1346435546875, |
|
"eval_loss": 0.494513601064682, |
|
"eval_rewards/accuracies": 0.7777777910232544, |
|
"eval_rewards/chosen": -2.567847490310669, |
|
"eval_rewards/margins": 1.0670523643493652, |
|
"eval_rewards/rejected": -3.634899854660034, |
|
"eval_runtime": 302.7434, |
|
"eval_samples_per_second": 6.606, |
|
"eval_steps_per_second": 0.208, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.9526302015179272, |
|
"grad_norm": 6.10762031606348, |
|
"learning_rate": 3.378064801637687e-08, |
|
"logits/chosen": -1.1716662645339966, |
|
"logits/rejected": -0.9894771575927734, |
|
"logps/chosen": -494.2669982910156, |
|
"logps/rejected": -552.8900146484375, |
|
"loss": 0.4935, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.474764585494995, |
|
"rewards/margins": 1.0439238548278809, |
|
"rewards/rejected": -3.518688201904297, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.9630986652708715, |
|
"grad_norm": 5.683369015946174, |
|
"learning_rate": 2.0453443778310766e-08, |
|
"logits/chosen": -1.1771481037139893, |
|
"logits/rejected": -1.0182334184646606, |
|
"logps/chosen": -545.4282836914062, |
|
"logps/rejected": -589.4589233398438, |
|
"loss": 0.5081, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.6610922813415527, |
|
"rewards/margins": 0.9007646441459656, |
|
"rewards/rejected": -3.561856746673584, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.9735671290238157, |
|
"grad_norm": 5.950595947719059, |
|
"learning_rate": 1.0442413283435759e-08, |
|
"logits/chosen": -1.1201808452606201, |
|
"logits/rejected": -0.9143557548522949, |
|
"logps/chosen": -556.3572998046875, |
|
"logps/rejected": -596.0765380859375, |
|
"loss": 0.493, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.5759315490722656, |
|
"rewards/margins": 1.0089161396026611, |
|
"rewards/rejected": -3.5848472118377686, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.98403559277676, |
|
"grad_norm": 7.5374296673981425, |
|
"learning_rate": 3.760945397705828e-09, |
|
"logits/chosen": -1.1323813199996948, |
|
"logits/rejected": -1.0745770931243896, |
|
"logps/chosen": -517.08056640625, |
|
"logps/rejected": -605.00146484375, |
|
"loss": 0.4829, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.5434603691101074, |
|
"rewards/margins": 0.8878037333488464, |
|
"rewards/rejected": -3.4312641620635986, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.9945040565297043, |
|
"grad_norm": 5.8745190839702515, |
|
"learning_rate": 4.1797599220405605e-10, |
|
"logits/chosen": -1.1730302572250366, |
|
"logits/rejected": -0.9989528656005859, |
|
"logps/chosen": -524.8547973632812, |
|
"logps/rejected": -583.33203125, |
|
"loss": 0.4861, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.5831165313720703, |
|
"rewards/margins": 0.908293604850769, |
|
"rewards/rejected": -3.49141001701355, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.9997382884061764, |
|
"step": 955, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5319095570379527, |
|
"train_runtime": 23762.0752, |
|
"train_samples_per_second": 2.573, |
|
"train_steps_per_second": 0.04 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 955, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|