|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9982631930527722, |
|
"eval_steps": 400, |
|
"global_step": 467, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01068804275217101, |
|
"grad_norm": 84.02989783534706, |
|
"learning_rate": 1.0638297872340425e-07, |
|
"logits/chosen": -1.0397655963897705, |
|
"logits/rejected": -1.0092562437057495, |
|
"logps/chosen": -0.7628876566886902, |
|
"logps/rejected": -0.7414335012435913, |
|
"loss": 4.8657, |
|
"rewards/accuracies": 0.3687500059604645, |
|
"rewards/chosen": -7.628875732421875, |
|
"rewards/margins": -0.21454186737537384, |
|
"rewards/rejected": -7.414334774017334, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.02137608550434202, |
|
"grad_norm": 65.98135057247741, |
|
"learning_rate": 2.127659574468085e-07, |
|
"logits/chosen": -1.0671762228012085, |
|
"logits/rejected": -0.9972389340400696, |
|
"logps/chosen": -0.7587485909461975, |
|
"logps/rejected": -0.7000675201416016, |
|
"loss": 4.7692, |
|
"rewards/accuracies": 0.41874998807907104, |
|
"rewards/chosen": -7.587485313415527, |
|
"rewards/margins": -0.5868101119995117, |
|
"rewards/rejected": -7.000675201416016, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03206412825651302, |
|
"grad_norm": 81.81057773129896, |
|
"learning_rate": 3.1914893617021275e-07, |
|
"logits/chosen": -0.9940463900566101, |
|
"logits/rejected": -1.014291524887085, |
|
"logps/chosen": -0.7531558871269226, |
|
"logps/rejected": -0.7980529069900513, |
|
"loss": 5.0125, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -7.531558990478516, |
|
"rewards/margins": 0.44897016882896423, |
|
"rewards/rejected": -7.980528831481934, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.04275217100868404, |
|
"grad_norm": 131.3611224894929, |
|
"learning_rate": 4.25531914893617e-07, |
|
"logits/chosen": -0.9941411018371582, |
|
"logits/rejected": -0.9676485061645508, |
|
"logps/chosen": -0.7748786807060242, |
|
"logps/rejected": -0.7950050234794617, |
|
"loss": 4.3887, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -7.748786926269531, |
|
"rewards/margins": 0.20126314461231232, |
|
"rewards/rejected": -7.950050354003906, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.053440213760855046, |
|
"grad_norm": 41.98227986443809, |
|
"learning_rate": 5.319148936170212e-07, |
|
"logits/chosen": -1.06044602394104, |
|
"logits/rejected": -1.0300180912017822, |
|
"logps/chosen": -0.5242100358009338, |
|
"logps/rejected": -0.5184083580970764, |
|
"loss": 4.4531, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -5.242100715637207, |
|
"rewards/margins": -0.05801659822463989, |
|
"rewards/rejected": -5.184083461761475, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.06412825651302605, |
|
"grad_norm": 68.84250778408698, |
|
"learning_rate": 6.382978723404255e-07, |
|
"logits/chosen": -1.0604535341262817, |
|
"logits/rejected": -1.0147985219955444, |
|
"logps/chosen": -0.5427820086479187, |
|
"logps/rejected": -0.538798451423645, |
|
"loss": 4.0481, |
|
"rewards/accuracies": 0.4312500059604645, |
|
"rewards/chosen": -5.427820205688477, |
|
"rewards/margins": -0.03983556479215622, |
|
"rewards/rejected": -5.3879852294921875, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.07481629926519706, |
|
"grad_norm": 55.83166407300918, |
|
"learning_rate": 7.446808510638297e-07, |
|
"logits/chosen": -1.1716325283050537, |
|
"logits/rejected": -1.0896761417388916, |
|
"logps/chosen": -0.45515695214271545, |
|
"logps/rejected": -0.46327242255210876, |
|
"loss": 3.8398, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -4.55156946182251, |
|
"rewards/margins": 0.08115490525960922, |
|
"rewards/rejected": -4.632723808288574, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.08550434201736808, |
|
"grad_norm": 47.314667796032154, |
|
"learning_rate": 8.51063829787234e-07, |
|
"logits/chosen": -1.1367859840393066, |
|
"logits/rejected": -1.0874762535095215, |
|
"logps/chosen": -0.3599122166633606, |
|
"logps/rejected": -0.4211342930793762, |
|
"loss": 3.8501, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -3.5991222858428955, |
|
"rewards/margins": 0.6122205853462219, |
|
"rewards/rejected": -4.211342811584473, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.09619238476953908, |
|
"grad_norm": 43.42431109758399, |
|
"learning_rate": 9.574468085106384e-07, |
|
"logits/chosen": -1.17836332321167, |
|
"logits/rejected": -1.1281588077545166, |
|
"logps/chosen": -0.377611368894577, |
|
"logps/rejected": -0.42245084047317505, |
|
"loss": 3.7936, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -3.7761130332946777, |
|
"rewards/margins": 0.448394775390625, |
|
"rewards/rejected": -4.224508762359619, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.10688042752171009, |
|
"grad_norm": 50.23949059041095, |
|
"learning_rate": 9.998741174712533e-07, |
|
"logits/chosen": -1.1873140335083008, |
|
"logits/rejected": -1.1317476034164429, |
|
"logps/chosen": -0.34768182039260864, |
|
"logps/rejected": -0.38563376665115356, |
|
"loss": 3.7644, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -3.476818561553955, |
|
"rewards/margins": 0.3795194625854492, |
|
"rewards/rejected": -3.856337785720825, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.11756847027388109, |
|
"grad_norm": 77.86452279542212, |
|
"learning_rate": 9.991050648838675e-07, |
|
"logits/chosen": -1.2336595058441162, |
|
"logits/rejected": -1.1919711828231812, |
|
"logps/chosen": -0.33353298902511597, |
|
"logps/rejected": -0.3969067931175232, |
|
"loss": 3.6758, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -3.3353302478790283, |
|
"rewards/margins": 0.6337377429008484, |
|
"rewards/rejected": -3.9690680503845215, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.1282565130260521, |
|
"grad_norm": 48.54307450778913, |
|
"learning_rate": 9.97637968732563e-07, |
|
"logits/chosen": -1.2853938341140747, |
|
"logits/rejected": -1.2468154430389404, |
|
"logps/chosen": -0.3357910215854645, |
|
"logps/rejected": -0.36223435401916504, |
|
"loss": 3.6487, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -3.35791015625, |
|
"rewards/margins": 0.26443368196487427, |
|
"rewards/rejected": -3.6223437786102295, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.13894455577822312, |
|
"grad_norm": 67.81573981196492, |
|
"learning_rate": 9.954748808839674e-07, |
|
"logits/chosen": -1.203680396080017, |
|
"logits/rejected": -1.1713194847106934, |
|
"logps/chosen": -0.3974061906337738, |
|
"logps/rejected": -0.4787389636039734, |
|
"loss": 3.5226, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -3.9740614891052246, |
|
"rewards/margins": 0.813327431678772, |
|
"rewards/rejected": -4.787389278411865, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.14963259853039412, |
|
"grad_norm": 43.70954674356485, |
|
"learning_rate": 9.926188266120295e-07, |
|
"logits/chosen": -1.2420989274978638, |
|
"logits/rejected": -1.211631178855896, |
|
"logps/chosen": -0.43263110518455505, |
|
"logps/rejected": -0.5376982688903809, |
|
"loss": 3.6077, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -4.3263115882873535, |
|
"rewards/margins": 1.0506718158721924, |
|
"rewards/rejected": -5.376982688903809, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.16032064128256512, |
|
"grad_norm": 76.0561732944734, |
|
"learning_rate": 9.890738003669027e-07, |
|
"logits/chosen": -1.196466088294983, |
|
"logits/rejected": -1.1188781261444092, |
|
"logps/chosen": -0.4406164586544037, |
|
"logps/rejected": -0.5367096662521362, |
|
"loss": 3.5757, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -4.40616512298584, |
|
"rewards/margins": 0.9609323740005493, |
|
"rewards/rejected": -5.3670973777771, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.17100868403473615, |
|
"grad_norm": 51.31544624480531, |
|
"learning_rate": 9.848447601883433e-07, |
|
"logits/chosen": -1.189353346824646, |
|
"logits/rejected": -1.1714979410171509, |
|
"logps/chosen": -0.38350600004196167, |
|
"logps/rejected": -0.5140829682350159, |
|
"loss": 3.4182, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -3.835059642791748, |
|
"rewards/margins": 1.3057689666748047, |
|
"rewards/rejected": -5.140829563140869, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.18169672678690715, |
|
"grad_norm": 72.27870189202869, |
|
"learning_rate": 9.799376207714444e-07, |
|
"logits/chosen": -1.2069041728973389, |
|
"logits/rejected": -1.1819300651550293, |
|
"logps/chosen": -0.4050617218017578, |
|
"logps/rejected": -0.46999722719192505, |
|
"loss": 3.4683, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -4.05061674118042, |
|
"rewards/margins": 0.6493551731109619, |
|
"rewards/rejected": -4.699972152709961, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.19238476953907815, |
|
"grad_norm": 65.15839777923942, |
|
"learning_rate": 9.743592451943998e-07, |
|
"logits/chosen": -1.2592518329620361, |
|
"logits/rejected": -1.2168278694152832, |
|
"logps/chosen": -0.49755024909973145, |
|
"logps/rejected": -0.6345678567886353, |
|
"loss": 3.5477, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -4.975502014160156, |
|
"rewards/margins": 1.3701757192611694, |
|
"rewards/rejected": -6.345677852630615, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.20307281229124916, |
|
"grad_norm": 86.65912439673568, |
|
"learning_rate": 9.681174353198686e-07, |
|
"logits/chosen": -1.324328064918518, |
|
"logits/rejected": -1.2326462268829346, |
|
"logps/chosen": -0.5199041962623596, |
|
"logps/rejected": -0.5949305295944214, |
|
"loss": 3.3966, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -5.199041843414307, |
|
"rewards/margins": 0.7502638697624207, |
|
"rewards/rejected": -5.949306011199951, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.21376085504342018, |
|
"grad_norm": 84.91017386381003, |
|
"learning_rate": 9.612209208833646e-07, |
|
"logits/chosen": -1.223847508430481, |
|
"logits/rejected": -1.193149209022522, |
|
"logps/chosen": -0.5218724012374878, |
|
"logps/rejected": -0.6197377443313599, |
|
"loss": 3.4071, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -5.218724250793457, |
|
"rewards/margins": 0.978653609752655, |
|
"rewards/rejected": -6.197378158569336, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.22444889779559118, |
|
"grad_norm": 122.37976199382105, |
|
"learning_rate": 9.536793472839324e-07, |
|
"logits/chosen": -1.2392634153366089, |
|
"logits/rejected": -1.178882360458374, |
|
"logps/chosen": -0.47783392667770386, |
|
"logps/rejected": -0.6210560202598572, |
|
"loss": 3.3437, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -4.7783403396606445, |
|
"rewards/margins": 1.432220220565796, |
|
"rewards/rejected": -6.210559844970703, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.23513694054776219, |
|
"grad_norm": 117.75268692874619, |
|
"learning_rate": 9.455032620941839e-07, |
|
"logits/chosen": -1.2091577053070068, |
|
"logits/rejected": -1.1453241109848022, |
|
"logps/chosen": -0.5471813678741455, |
|
"logps/rejected": -0.6967746615409851, |
|
"loss": 3.3036, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -5.471813678741455, |
|
"rewards/margins": 1.495932936668396, |
|
"rewards/rejected": -6.967746734619141, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.2458249832999332, |
|
"grad_norm": 103.23266991280943, |
|
"learning_rate": 9.367041003085648e-07, |
|
"logits/chosen": -1.2898657321929932, |
|
"logits/rejected": -1.224526047706604, |
|
"logps/chosen": -0.5657516121864319, |
|
"logps/rejected": -0.684479296207428, |
|
"loss": 3.058, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -5.657515525817871, |
|
"rewards/margins": 1.18727707862854, |
|
"rewards/rejected": -6.84479284286499, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.2565130260521042, |
|
"grad_norm": 87.5488547010946, |
|
"learning_rate": 9.272941683504808e-07, |
|
"logits/chosen": -1.2546017169952393, |
|
"logits/rejected": -1.157871961593628, |
|
"logps/chosen": -0.6318168640136719, |
|
"logps/rejected": -0.8914499282836914, |
|
"loss": 2.963, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -6.318168640136719, |
|
"rewards/margins": 2.596330165863037, |
|
"rewards/rejected": -8.914499282836914, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.26720106880427524, |
|
"grad_norm": 136.85902662680772, |
|
"learning_rate": 9.172866268606513e-07, |
|
"logits/chosen": -1.336275339126587, |
|
"logits/rejected": -1.2872272729873657, |
|
"logps/chosen": -0.6883447766304016, |
|
"logps/rejected": -0.8685702085494995, |
|
"loss": 2.782, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -6.883447170257568, |
|
"rewards/margins": 1.8022544384002686, |
|
"rewards/rejected": -8.685701370239258, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.27788911155644624, |
|
"grad_norm": 219.78249066687138, |
|
"learning_rate": 9.066954722907638e-07, |
|
"logits/chosen": -1.365232229232788, |
|
"logits/rejected": -1.354945182800293, |
|
"logps/chosen": -0.795397162437439, |
|
"logps/rejected": -1.2445639371871948, |
|
"loss": 2.7364, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -7.953972816467285, |
|
"rewards/margins": 4.491666793823242, |
|
"rewards/rejected": -12.445638656616211, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.28857715430861725, |
|
"grad_norm": 108.29484064425847, |
|
"learning_rate": 8.955355173281707e-07, |
|
"logits/chosen": -1.3360868692398071, |
|
"logits/rejected": -1.2848550081253052, |
|
"logps/chosen": -0.8393806219100952, |
|
"logps/rejected": -1.0930553674697876, |
|
"loss": 2.7236, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -8.393805503845215, |
|
"rewards/margins": 2.536748170852661, |
|
"rewards/rejected": -10.930554389953613, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.29926519706078825, |
|
"grad_norm": 111.40981489221316, |
|
"learning_rate": 8.838223701790055e-07, |
|
"logits/chosen": -1.4011452198028564, |
|
"logits/rejected": -1.3746473789215088, |
|
"logps/chosen": -0.8652567863464355, |
|
"logps/rejected": -1.1113642454147339, |
|
"loss": 2.6512, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -8.652568817138672, |
|
"rewards/margins": 2.4610743522644043, |
|
"rewards/rejected": -11.113641738891602, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.30995323981295925, |
|
"grad_norm": 138.17602287907837, |
|
"learning_rate": 8.71572412738697e-07, |
|
"logits/chosen": -1.3082659244537354, |
|
"logits/rejected": -1.2775059938430786, |
|
"logps/chosen": -0.8726702928543091, |
|
"logps/rejected": -1.1962165832519531, |
|
"loss": 2.5784, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -8.726702690124512, |
|
"rewards/margins": 3.2354626655578613, |
|
"rewards/rejected": -11.962165832519531, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.32064128256513025, |
|
"grad_norm": 115.35023284841587, |
|
"learning_rate": 8.588027776804058e-07, |
|
"logits/chosen": -1.3314917087554932, |
|
"logits/rejected": -1.306792974472046, |
|
"logps/chosen": -1.0279521942138672, |
|
"logps/rejected": -1.3382768630981445, |
|
"loss": 2.47, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -10.279521942138672, |
|
"rewards/margins": 3.1032464504241943, |
|
"rewards/rejected": -13.382769584655762, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.33132932531730125, |
|
"grad_norm": 144.67956167588085, |
|
"learning_rate": 8.455313244934324e-07, |
|
"logits/chosen": -1.3292877674102783, |
|
"logits/rejected": -1.3038713932037354, |
|
"logps/chosen": -1.035979986190796, |
|
"logps/rejected": -1.4305903911590576, |
|
"loss": 2.4435, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -10.359800338745117, |
|
"rewards/margins": 3.946104049682617, |
|
"rewards/rejected": -14.305903434753418, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.3420173680694723, |
|
"grad_norm": 106.03488572932915, |
|
"learning_rate": 8.317766145051057e-07, |
|
"logits/chosen": -1.3299791812896729, |
|
"logits/rejected": -1.3083471059799194, |
|
"logps/chosen": -1.1912587881088257, |
|
"logps/rejected": -1.648496389389038, |
|
"loss": 2.3299, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -11.912586212158203, |
|
"rewards/margins": 4.5723772048950195, |
|
"rewards/rejected": -16.484966278076172, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.3527054108216433, |
|
"grad_norm": 124.30842086260031, |
|
"learning_rate": 8.175578849210894e-07, |
|
"logits/chosen": -1.3360395431518555, |
|
"logits/rejected": -1.3087151050567627, |
|
"logps/chosen": -1.2806798219680786, |
|
"logps/rejected": -1.7463128566741943, |
|
"loss": 2.1935, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -12.806798934936523, |
|
"rewards/margins": 4.656330108642578, |
|
"rewards/rejected": -17.463130950927734, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.3633934535738143, |
|
"grad_norm": 159.71509664144037, |
|
"learning_rate": 8.028950219204099e-07, |
|
"logits/chosen": -1.3390699625015259, |
|
"logits/rejected": -1.3161685466766357, |
|
"logps/chosen": -1.27903151512146, |
|
"logps/rejected": -1.8541208505630493, |
|
"loss": 2.1363, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -12.790315628051758, |
|
"rewards/margins": 5.750893592834473, |
|
"rewards/rejected": -18.541208267211914, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.3740814963259853, |
|
"grad_norm": 163.13967408983922, |
|
"learning_rate": 7.878085328428368e-07, |
|
"logits/chosen": -1.359133243560791, |
|
"logits/rejected": -1.3104041814804077, |
|
"logps/chosen": -1.4678348302841187, |
|
"logps/rejected": -1.8786561489105225, |
|
"loss": 2.0099, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -14.678349494934082, |
|
"rewards/margins": 4.108211517333984, |
|
"rewards/rejected": -18.786561965942383, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.3847695390781563, |
|
"grad_norm": 213.96398552844718, |
|
"learning_rate": 7.723195175075135e-07, |
|
"logits/chosen": -1.3180896043777466, |
|
"logits/rejected": -1.2993403673171997, |
|
"logps/chosen": -1.395212173461914, |
|
"logps/rejected": -1.9256620407104492, |
|
"loss": 1.8566, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -13.952122688293457, |
|
"rewards/margins": 5.304497718811035, |
|
"rewards/rejected": -19.25661849975586, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.3954575818303273, |
|
"grad_norm": 151.287610930137, |
|
"learning_rate": 7.564496387029531e-07, |
|
"logits/chosen": -1.354866623878479, |
|
"logits/rejected": -1.2994146347045898, |
|
"logps/chosen": -1.5145620107650757, |
|
"logps/rejected": -2.0945541858673096, |
|
"loss": 1.9317, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -15.14561939239502, |
|
"rewards/margins": 5.799921989440918, |
|
"rewards/rejected": -20.945541381835938, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.4061456245824983, |
|
"grad_norm": 147.29173146280465, |
|
"learning_rate": 7.402210918896689e-07, |
|
"logits/chosen": -1.348362684249878, |
|
"logits/rejected": -1.3590227365493774, |
|
"logps/chosen": -1.7428133487701416, |
|
"logps/rejected": -2.511070966720581, |
|
"loss": 1.7298, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -17.428131103515625, |
|
"rewards/margins": 7.682579040527344, |
|
"rewards/rejected": -25.1107120513916, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.4168336673346693, |
|
"grad_norm": 107.1032490301315, |
|
"learning_rate": 7.236565741578162e-07, |
|
"logits/chosen": -1.3077692985534668, |
|
"logits/rejected": -1.2952277660369873, |
|
"logps/chosen": -1.8426529169082642, |
|
"logps/rejected": -2.392831325531006, |
|
"loss": 1.9538, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -18.426528930664062, |
|
"rewards/margins": 5.5017852783203125, |
|
"rewards/rejected": -23.928314208984375, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.42752171008684037, |
|
"grad_norm": 190.9422038213508, |
|
"learning_rate": 7.067792524832603e-07, |
|
"logits/chosen": -1.3160083293914795, |
|
"logits/rejected": -1.3034440279006958, |
|
"logps/chosen": -1.7327241897583008, |
|
"logps/rejected": -2.3605778217315674, |
|
"loss": 1.6663, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -17.327241897583008, |
|
"rewards/margins": 6.278534889221191, |
|
"rewards/rejected": -23.605777740478516, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.43820975283901137, |
|
"grad_norm": 161.95141010920645, |
|
"learning_rate": 6.896127313264642e-07, |
|
"logits/chosen": -1.3677047491073608, |
|
"logits/rejected": -1.3167794942855835, |
|
"logps/chosen": -1.8963912725448608, |
|
"logps/rejected": -2.526191234588623, |
|
"loss": 1.8748, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -18.96391487121582, |
|
"rewards/margins": 6.297998905181885, |
|
"rewards/rejected": -25.261911392211914, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.44889779559118237, |
|
"grad_norm": 150.6462623725548, |
|
"learning_rate": 6.721810196195174e-07, |
|
"logits/chosen": -1.400187373161316, |
|
"logits/rejected": -1.3892953395843506, |
|
"logps/chosen": -2.041691541671753, |
|
"logps/rejected": -2.6361751556396484, |
|
"loss": 1.827, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -20.416913986206055, |
|
"rewards/margins": 5.9448370933532715, |
|
"rewards/rejected": -26.361751556396484, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.45958583834335337, |
|
"grad_norm": 126.80315819769801, |
|
"learning_rate": 6.545084971874736e-07, |
|
"logits/chosen": -1.3723266124725342, |
|
"logits/rejected": -1.356567144393921, |
|
"logps/chosen": -2.0259296894073486, |
|
"logps/rejected": -2.754607677459717, |
|
"loss": 1.6874, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -20.259296417236328, |
|
"rewards/margins": 7.286777496337891, |
|
"rewards/rejected": -27.546072006225586, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.47027388109552437, |
|
"grad_norm": 161.97036694522683, |
|
"learning_rate": 6.3661988065096e-07, |
|
"logits/chosen": -1.4270765781402588, |
|
"logits/rejected": -1.4134238958358765, |
|
"logps/chosen": -2.0706095695495605, |
|
"logps/rejected": -2.8078479766845703, |
|
"loss": 1.5613, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -20.706096649169922, |
|
"rewards/margins": 7.372382164001465, |
|
"rewards/rejected": -28.078479766845703, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.48096192384769537, |
|
"grad_norm": 119.50283654492473, |
|
"learning_rate": 6.185401888577487e-07, |
|
"logits/chosen": -1.4146029949188232, |
|
"logits/rejected": -1.3837422132492065, |
|
"logps/chosen": -2.119856595993042, |
|
"logps/rejected": -2.7663817405700684, |
|
"loss": 1.7069, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -21.198566436767578, |
|
"rewards/margins": 6.465248107910156, |
|
"rewards/rejected": -27.663818359375, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.4916499665998664, |
|
"grad_norm": 109.77939760201329, |
|
"learning_rate": 6.002947078916364e-07, |
|
"logits/chosen": -1.4831273555755615, |
|
"logits/rejected": -1.435591459274292, |
|
"logps/chosen": -2.0505659580230713, |
|
"logps/rejected": -2.685992956161499, |
|
"loss": 1.5078, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -20.505659103393555, |
|
"rewards/margins": 6.354269027709961, |
|
"rewards/rejected": -26.859928131103516, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5023380093520374, |
|
"grad_norm": 147.37811459958783, |
|
"learning_rate": 5.819089557075688e-07, |
|
"logits/chosen": -1.4949449300765991, |
|
"logits/rejected": -1.469238042831421, |
|
"logps/chosen": -2.045539617538452, |
|
"logps/rejected": -2.816204071044922, |
|
"loss": 1.4911, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -20.455394744873047, |
|
"rewards/margins": 7.706644535064697, |
|
"rewards/rejected": -28.162038803100586, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.5130260521042084, |
|
"grad_norm": 132.440512722152, |
|
"learning_rate": 5.634086464424742e-07, |
|
"logits/chosen": -1.461572289466858, |
|
"logits/rejected": -1.4638675451278687, |
|
"logps/chosen": -1.9458844661712646, |
|
"logps/rejected": -2.7108004093170166, |
|
"loss": 1.5773, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -19.458845138549805, |
|
"rewards/margins": 7.6491570472717285, |
|
"rewards/rejected": -27.10800552368164, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.5237140948563794, |
|
"grad_norm": 236.09806301372015, |
|
"learning_rate": 5.448196544517167e-07, |
|
"logits/chosen": -1.5521364212036133, |
|
"logits/rejected": -1.4946634769439697, |
|
"logps/chosen": -2.0292551517486572, |
|
"logps/rejected": -2.9242515563964844, |
|
"loss": 1.395, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -20.292551040649414, |
|
"rewards/margins": 8.949962615966797, |
|
"rewards/rejected": -29.242517471313477, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.5344021376085505, |
|
"grad_norm": 142.79859396103615, |
|
"learning_rate": 5.26167978121472e-07, |
|
"logits/chosen": -1.4808663129806519, |
|
"logits/rejected": -1.4642443656921387, |
|
"logps/chosen": -2.046252489089966, |
|
"logps/rejected": -2.9118173122406006, |
|
"loss": 1.3738, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -20.462526321411133, |
|
"rewards/margins": 8.655647277832031, |
|
"rewards/rejected": -29.1181697845459, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5450901803607214, |
|
"grad_norm": 155.0378761351227, |
|
"learning_rate": 5.074797035076318e-07, |
|
"logits/chosen": -1.5233964920043945, |
|
"logits/rejected": -1.5014541149139404, |
|
"logps/chosen": -2.2131590843200684, |
|
"logps/rejected": -2.9324965476989746, |
|
"loss": 1.5353, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -22.131589889526367, |
|
"rewards/margins": 7.193373680114746, |
|
"rewards/rejected": -29.324962615966797, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.5557782231128925, |
|
"grad_norm": 130.04315061980418, |
|
"learning_rate": 4.887809678520975e-07, |
|
"logits/chosen": -1.5009315013885498, |
|
"logits/rejected": -1.474130630493164, |
|
"logps/chosen": -2.1018893718719482, |
|
"logps/rejected": -2.8382973670959473, |
|
"loss": 1.5292, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -21.018896102905273, |
|
"rewards/margins": 7.364079475402832, |
|
"rewards/rejected": -28.382976531982422, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.5664662658650634, |
|
"grad_norm": 119.27303382130957, |
|
"learning_rate": 4.700979230274829e-07, |
|
"logits/chosen": -1.4742642641067505, |
|
"logits/rejected": -1.4596500396728516, |
|
"logps/chosen": -2.2155094146728516, |
|
"logps/rejected": -3.0179553031921387, |
|
"loss": 1.1693, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -22.15509605407715, |
|
"rewards/margins": 8.024459838867188, |
|
"rewards/rejected": -30.179553985595703, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.5771543086172345, |
|
"grad_norm": 131.97372035922703, |
|
"learning_rate": 4.514566989613559e-07, |
|
"logits/chosen": -1.4763275384902954, |
|
"logits/rejected": -1.4486229419708252, |
|
"logps/chosen": -1.990666151046753, |
|
"logps/rejected": -2.838904857635498, |
|
"loss": 1.3013, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -19.906661987304688, |
|
"rewards/margins": 8.482388496398926, |
|
"rewards/rejected": -28.389049530029297, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.5878423513694054, |
|
"grad_norm": 104.56387348722761, |
|
"learning_rate": 4.328833670911724e-07, |
|
"logits/chosen": -1.4566619396209717, |
|
"logits/rejected": -1.4216809272766113, |
|
"logps/chosen": -1.9770195484161377, |
|
"logps/rejected": -2.719089984893799, |
|
"loss": 1.4258, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -19.77019500732422, |
|
"rewards/margins": 7.4207048416137695, |
|
"rewards/rejected": -27.190898895263672, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.5985303941215765, |
|
"grad_norm": 133.9300472658157, |
|
"learning_rate": 4.144039039010124e-07, |
|
"logits/chosen": -1.542373776435852, |
|
"logits/rejected": -1.5213029384613037, |
|
"logps/chosen": -2.0355706214904785, |
|
"logps/rejected": -2.883401393890381, |
|
"loss": 1.3022, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -20.3557071685791, |
|
"rewards/margins": 8.478304862976074, |
|
"rewards/rejected": -28.834014892578125, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.6092184368737475, |
|
"grad_norm": 118.2550806898935, |
|
"learning_rate": 3.960441545911204e-07, |
|
"logits/chosen": -1.501997947692871, |
|
"logits/rejected": -1.4718389511108398, |
|
"logps/chosen": -2.043879270553589, |
|
"logps/rejected": -2.9162051677703857, |
|
"loss": 1.0025, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -20.438793182373047, |
|
"rewards/margins": 8.723258972167969, |
|
"rewards/rejected": -29.16205406188965, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.6199064796259185, |
|
"grad_norm": 112.08802157558269, |
|
"learning_rate": 3.778297969310529e-07, |
|
"logits/chosen": -1.5190720558166504, |
|
"logits/rejected": -1.4788782596588135, |
|
"logps/chosen": -2.014256000518799, |
|
"logps/rejected": -2.7785563468933105, |
|
"loss": 1.3157, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -20.142559051513672, |
|
"rewards/margins": 7.64300537109375, |
|
"rewards/rejected": -27.785564422607422, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.6305945223780896, |
|
"grad_norm": 122.16012718306597, |
|
"learning_rate": 3.5978630534699865e-07, |
|
"logits/chosen": -1.4519203901290894, |
|
"logits/rejected": -1.4377477169036865, |
|
"logps/chosen": -2.052429437637329, |
|
"logps/rejected": -2.7946228981018066, |
|
"loss": 1.2035, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -20.524295806884766, |
|
"rewards/margins": 7.421932220458984, |
|
"rewards/rejected": -27.94622802734375, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.6412825651302605, |
|
"grad_norm": 127.59960775053135, |
|
"learning_rate": 3.4193891529348795e-07, |
|
"logits/chosen": -1.3882105350494385, |
|
"logits/rejected": -1.365216612815857, |
|
"logps/chosen": -2.0868842601776123, |
|
"logps/rejected": -2.7921385765075684, |
|
"loss": 1.5608, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -20.86884307861328, |
|
"rewards/margins": 7.0525383949279785, |
|
"rewards/rejected": -27.9213809967041, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6519706078824316, |
|
"grad_norm": 104.5591334100815, |
|
"learning_rate": 3.243125879593286e-07, |
|
"logits/chosen": -1.5032074451446533, |
|
"logits/rejected": -1.458888053894043, |
|
"logps/chosen": -2.047602415084839, |
|
"logps/rejected": -2.731414794921875, |
|
"loss": 1.3552, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -20.476024627685547, |
|
"rewards/margins": 6.8381242752075195, |
|
"rewards/rejected": -27.31414794921875, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.6626586506346025, |
|
"grad_norm": 128.9435075925882, |
|
"learning_rate": 3.069319753571269e-07, |
|
"logits/chosen": -1.5344185829162598, |
|
"logits/rejected": -1.520206093788147, |
|
"logps/chosen": -2.182490587234497, |
|
"logps/rejected": -2.994981288909912, |
|
"loss": 1.3424, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -21.824905395507812, |
|
"rewards/margins": 8.124906539916992, |
|
"rewards/rejected": -29.949810028076172, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.6733466933867736, |
|
"grad_norm": 117.862174484777, |
|
"learning_rate": 2.898213858452173e-07, |
|
"logits/chosen": -1.5316812992095947, |
|
"logits/rejected": -1.4777030944824219, |
|
"logps/chosen": -2.1463074684143066, |
|
"logps/rejected": -2.8947274684906006, |
|
"loss": 1.2773, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -21.46307373046875, |
|
"rewards/margins": 7.484200954437256, |
|
"rewards/rejected": -28.947277069091797, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.6840347361389446, |
|
"grad_norm": 154.98050630680447, |
|
"learning_rate": 2.730047501302266e-07, |
|
"logits/chosen": -1.5196678638458252, |
|
"logits/rejected": -1.5156781673431396, |
|
"logps/chosen": -2.249702215194702, |
|
"logps/rejected": -3.2510273456573486, |
|
"loss": 1.0665, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -22.497024536132812, |
|
"rewards/margins": 10.013248443603516, |
|
"rewards/rejected": -32.51027297973633, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.6947227788911156, |
|
"grad_norm": 180.45567365921923, |
|
"learning_rate": 2.5650558779781635e-07, |
|
"logits/chosen": -1.549286127090454, |
|
"logits/rejected": -1.5002086162567139, |
|
"logps/chosen": -2.399672031402588, |
|
"logps/rejected": -3.504227876663208, |
|
"loss": 1.2384, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -23.996719360351562, |
|
"rewards/margins": 11.045561790466309, |
|
"rewards/rejected": -35.04228210449219, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.7054108216432866, |
|
"grad_norm": 105.90784807533655, |
|
"learning_rate": 2.403469744184154e-07, |
|
"logits/chosen": -1.4591710567474365, |
|
"logits/rejected": -1.4205106496810913, |
|
"logps/chosen": -2.2293872833251953, |
|
"logps/rejected": -3.00614857673645, |
|
"loss": 1.2359, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -22.293874740600586, |
|
"rewards/margins": 7.767613410949707, |
|
"rewards/rejected": -30.061487197875977, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.7160988643954576, |
|
"grad_norm": 124.3214763138239, |
|
"learning_rate": 2.2455150927394878e-07, |
|
"logits/chosen": -1.495527744293213, |
|
"logits/rejected": -1.4803019762039185, |
|
"logps/chosen": -2.202894449234009, |
|
"logps/rejected": -3.0441994667053223, |
|
"loss": 1.1359, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -22.02894401550293, |
|
"rewards/margins": 8.413049697875977, |
|
"rewards/rejected": -30.441997528076172, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.7267869071476286, |
|
"grad_norm": 156.03049853633283, |
|
"learning_rate": 2.0914128375069722e-07, |
|
"logits/chosen": -1.5111547708511353, |
|
"logits/rejected": -1.4789291620254517, |
|
"logps/chosen": -2.1601741313934326, |
|
"logps/rejected": -3.021275281906128, |
|
"loss": 1.2419, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -21.601741790771484, |
|
"rewards/margins": 8.611013412475586, |
|
"rewards/rejected": -30.212757110595703, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.7374749498997996, |
|
"grad_norm": 130.76746865900543, |
|
"learning_rate": 1.9413785044249676e-07, |
|
"logits/chosen": -1.5371043682098389, |
|
"logits/rejected": -1.5177440643310547, |
|
"logps/chosen": -2.2243714332580566, |
|
"logps/rejected": -3.2498092651367188, |
|
"loss": 1.2299, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -22.243711471557617, |
|
"rewards/margins": 10.25438117980957, |
|
"rewards/rejected": -32.49809265136719, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.7481629926519706, |
|
"grad_norm": 219.49500818808932, |
|
"learning_rate": 1.7956219300748792e-07, |
|
"logits/chosen": -1.5225059986114502, |
|
"logits/rejected": -1.5286823511123657, |
|
"logps/chosen": -2.12577486038208, |
|
"logps/rejected": -3.068633794784546, |
|
"loss": 1.164, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -21.257749557495117, |
|
"rewards/margins": 9.428587913513184, |
|
"rewards/rejected": -30.686336517333984, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.7588510354041417, |
|
"grad_norm": 148.30498812771748, |
|
"learning_rate": 1.6543469682057104e-07, |
|
"logits/chosen": -1.4495574235916138, |
|
"logits/rejected": -1.4652235507965088, |
|
"logps/chosen": -2.066810131072998, |
|
"logps/rejected": -2.938152313232422, |
|
"loss": 1.1351, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -20.668102264404297, |
|
"rewards/margins": 8.713422775268555, |
|
"rewards/rejected": -29.38152503967285, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.7695390781563126, |
|
"grad_norm": 116.32905868645479, |
|
"learning_rate": 1.5177512046261666e-07, |
|
"logits/chosen": -1.5009536743164062, |
|
"logits/rejected": -1.5022971630096436, |
|
"logps/chosen": -2.0494394302368164, |
|
"logps/rejected": -3.0328898429870605, |
|
"loss": 1.1972, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -20.494396209716797, |
|
"rewards/margins": 9.834505081176758, |
|
"rewards/rejected": -30.328899383544922, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.7802271209084837, |
|
"grad_norm": 108.7134031593527, |
|
"learning_rate": 1.3860256808630427e-07, |
|
"logits/chosen": -1.5464454889297485, |
|
"logits/rejected": -1.4812748432159424, |
|
"logps/chosen": -2.1187500953674316, |
|
"logps/rejected": -3.0532031059265137, |
|
"loss": 1.253, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -21.187496185302734, |
|
"rewards/margins": 9.344534873962402, |
|
"rewards/rejected": -30.532033920288086, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.7909151636606546, |
|
"grad_norm": 145.2134549922322, |
|
"learning_rate": 1.2593546269723647e-07, |
|
"logits/chosen": -1.474498987197876, |
|
"logits/rejected": -1.4640263319015503, |
|
"logps/chosen": -2.100693941116333, |
|
"logps/rejected": -2.9064152240753174, |
|
"loss": 1.1015, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -21.00693702697754, |
|
"rewards/margins": 8.05721378326416, |
|
"rewards/rejected": -29.064151763916016, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.8016032064128257, |
|
"grad_norm": 130.4190340462438, |
|
"learning_rate": 1.1379152038770029e-07, |
|
"logits/chosen": -1.4943348169326782, |
|
"logits/rejected": -1.4995317459106445, |
|
"logps/chosen": -2.2358219623565674, |
|
"logps/rejected": -3.1511778831481934, |
|
"loss": 1.1776, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -22.35822105407715, |
|
"rewards/margins": 9.153559684753418, |
|
"rewards/rejected": -31.51177978515625, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.8122912491649966, |
|
"grad_norm": 171.4769450829484, |
|
"learning_rate": 1.0218772555910954e-07, |
|
"logits/chosen": -1.5206212997436523, |
|
"logits/rejected": -1.5025417804718018, |
|
"logps/chosen": -2.1182684898376465, |
|
"logps/rejected": -2.9665935039520264, |
|
"loss": 1.2678, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -21.18268394470215, |
|
"rewards/margins": 8.48325252532959, |
|
"rewards/rejected": -29.665935516357422, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.8229792919171677, |
|
"grad_norm": 116.74061042296164, |
|
"learning_rate": 9.114030716778432e-08, |
|
"logits/chosen": -1.5152442455291748, |
|
"logits/rejected": -1.4944274425506592, |
|
"logps/chosen": -2.0695395469665527, |
|
"logps/rejected": -3.1040778160095215, |
|
"loss": 1.0078, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -20.69539451599121, |
|
"rewards/margins": 10.345380783081055, |
|
"rewards/rejected": -31.0407772064209, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.8336673346693386, |
|
"grad_norm": 112.33137236518094, |
|
"learning_rate": 8.066471602728803e-08, |
|
"logits/chosen": -1.5178992748260498, |
|
"logits/rejected": -1.5032987594604492, |
|
"logps/chosen": -2.170666456222534, |
|
"logps/rejected": -3.162553310394287, |
|
"loss": 1.2224, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -21.7066650390625, |
|
"rewards/margins": 9.918868064880371, |
|
"rewards/rejected": -31.62552833557129, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.8443553774215097, |
|
"grad_norm": 186.4853149645633, |
|
"learning_rate": 7.077560319906694e-08, |
|
"logits/chosen": -1.5229237079620361, |
|
"logits/rejected": -1.505030632019043, |
|
"logps/chosen": -2.0735864639282227, |
|
"logps/rejected": -2.9833900928497314, |
|
"loss": 1.1526, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -20.73586654663086, |
|
"rewards/margins": 9.098031997680664, |
|
"rewards/rejected": -29.833898544311523, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.8550434201736807, |
|
"grad_norm": 96.6762011696182, |
|
"learning_rate": 6.148679950161672e-08, |
|
"logits/chosen": -1.5230014324188232, |
|
"logits/rejected": -1.5108994245529175, |
|
"logps/chosen": -2.109778642654419, |
|
"logps/rejected": -2.941293716430664, |
|
"loss": 1.1544, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -21.09778594970703, |
|
"rewards/margins": 8.315154075622559, |
|
"rewards/rejected": -29.412939071655273, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8550434201736807, |
|
"eval_logits/chosen": -1.710108757019043, |
|
"eval_logits/rejected": -1.7197374105453491, |
|
"eval_logps/chosen": -2.08453369140625, |
|
"eval_logps/rejected": -2.94063138961792, |
|
"eval_loss": 1.1388919353485107, |
|
"eval_rewards/accuracies": 0.8678861856460571, |
|
"eval_rewards/chosen": -20.845340728759766, |
|
"eval_rewards/margins": 8.560976028442383, |
|
"eval_rewards/rejected": -29.406312942504883, |
|
"eval_runtime": 123.1178, |
|
"eval_samples_per_second": 15.928, |
|
"eval_steps_per_second": 0.999, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8657314629258517, |
|
"grad_norm": 143.6008976778444, |
|
"learning_rate": 5.2811296166831666e-08, |
|
"logits/chosen": -1.4884498119354248, |
|
"logits/rejected": -1.508371114730835, |
|
"logps/chosen": -2.2530012130737305, |
|
"logps/rejected": -3.071047782897949, |
|
"loss": 1.1786, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -22.530010223388672, |
|
"rewards/margins": 8.18046760559082, |
|
"rewards/rejected": -30.710479736328125, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.8764195056780227, |
|
"grad_norm": 145.0618422989251, |
|
"learning_rate": 4.4761226670592066e-08, |
|
"logits/chosen": -1.4991635084152222, |
|
"logits/rejected": -1.4899944067001343, |
|
"logps/chosen": -2.1319069862365723, |
|
"logps/rejected": -2.977902889251709, |
|
"loss": 1.073, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -21.31907081604004, |
|
"rewards/margins": 8.4599609375, |
|
"rewards/rejected": -29.77903175354004, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.8871075484301937, |
|
"grad_norm": 121.04879854574509, |
|
"learning_rate": 3.734784976300165e-08, |
|
"logits/chosen": -1.5042963027954102, |
|
"logits/rejected": -1.4524286985397339, |
|
"logps/chosen": -2.0573108196258545, |
|
"logps/rejected": -3.0475449562072754, |
|
"loss": 1.2613, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -20.573108673095703, |
|
"rewards/margins": 9.902341842651367, |
|
"rewards/rejected": -30.475452423095703, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.8977955911823647, |
|
"grad_norm": 139.53176945804844, |
|
"learning_rate": 3.058153372200695e-08, |
|
"logits/chosen": -1.5310368537902832, |
|
"logits/rejected": -1.4835598468780518, |
|
"logps/chosen": -2.005772352218628, |
|
"logps/rejected": -3.0218088626861572, |
|
"loss": 1.1364, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -20.057723999023438, |
|
"rewards/margins": 10.160362243652344, |
|
"rewards/rejected": -30.218088150024414, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.9084836339345357, |
|
"grad_norm": 136.53214771188803, |
|
"learning_rate": 2.4471741852423233e-08, |
|
"logits/chosen": -1.525810718536377, |
|
"logits/rejected": -1.51809823513031, |
|
"logps/chosen": -2.186115026473999, |
|
"logps/rejected": -3.016331911087036, |
|
"loss": 1.4659, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -21.86115074157715, |
|
"rewards/margins": 8.302164077758789, |
|
"rewards/rejected": -30.163314819335938, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.9191716766867067, |
|
"grad_norm": 159.75999561219473, |
|
"learning_rate": 1.9027019250647036e-08, |
|
"logits/chosen": -1.5137279033660889, |
|
"logits/rejected": -1.5020883083343506, |
|
"logps/chosen": -2.2320022583007812, |
|
"logps/rejected": -3.141892671585083, |
|
"loss": 1.1123, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -22.320022583007812, |
|
"rewards/margins": 9.098905563354492, |
|
"rewards/rejected": -31.418926239013672, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.9298597194388778, |
|
"grad_norm": 103.73846522782931, |
|
"learning_rate": 1.4254980853566246e-08, |
|
"logits/chosen": -1.469313383102417, |
|
"logits/rejected": -1.4278925657272339, |
|
"logps/chosen": -1.988031029701233, |
|
"logps/rejected": -2.880598545074463, |
|
"loss": 1.1778, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -19.880308151245117, |
|
"rewards/margins": 8.925679206848145, |
|
"rewards/rejected": -28.805988311767578, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.9405477621910487, |
|
"grad_norm": 103.8822744213963, |
|
"learning_rate": 1.016230078838226e-08, |
|
"logits/chosen": -1.4933596849441528, |
|
"logits/rejected": -1.4389324188232422, |
|
"logps/chosen": -2.1481704711914062, |
|
"logps/rejected": -3.0063669681549072, |
|
"loss": 1.0981, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -21.481706619262695, |
|
"rewards/margins": 8.581964492797852, |
|
"rewards/rejected": -30.063669204711914, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.9512358049432198, |
|
"grad_norm": 105.17668873683024, |
|
"learning_rate": 6.754703038239329e-09, |
|
"logits/chosen": -1.4460530281066895, |
|
"logits/rejected": -1.431138277053833, |
|
"logps/chosen": -2.159626007080078, |
|
"logps/rejected": -3.2153289318084717, |
|
"loss": 1.1378, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -21.59626007080078, |
|
"rewards/margins": 10.557029724121094, |
|
"rewards/rejected": -32.153289794921875, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.9619238476953907, |
|
"grad_norm": 149.56429045938268, |
|
"learning_rate": 4.036953436716895e-09, |
|
"logits/chosen": -1.5528205633163452, |
|
"logits/rejected": -1.5331635475158691, |
|
"logps/chosen": -2.0413358211517334, |
|
"logps/rejected": -2.8829147815704346, |
|
"loss": 1.1398, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -20.413360595703125, |
|
"rewards/margins": 8.415786743164062, |
|
"rewards/rejected": -28.829147338867188, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.9726118904475618, |
|
"grad_norm": 144.02520089734548, |
|
"learning_rate": 2.0128530023804656e-09, |
|
"logits/chosen": -1.5203301906585693, |
|
"logits/rejected": -1.4858264923095703, |
|
"logps/chosen": -2.0674800872802734, |
|
"logps/rejected": -3.0988433361053467, |
|
"loss": 1.0231, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -20.674800872802734, |
|
"rewards/margins": 10.313634872436523, |
|
"rewards/rejected": -30.988433837890625, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.9832999331997327, |
|
"grad_norm": 123.56718367330731, |
|
"learning_rate": 6.852326227130833e-10, |
|
"logits/chosen": -1.5208638906478882, |
|
"logits/rejected": -1.5143053531646729, |
|
"logps/chosen": -2.192969799041748, |
|
"logps/rejected": -3.1692872047424316, |
|
"loss": 1.1094, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -21.929697036743164, |
|
"rewards/margins": 9.763177871704102, |
|
"rewards/rejected": -31.692874908447266, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.9939879759519038, |
|
"grad_norm": 128.9716796420016, |
|
"learning_rate": 5.594909486328348e-11, |
|
"logits/chosen": -1.4912371635437012, |
|
"logits/rejected": -1.496371865272522, |
|
"logps/chosen": -2.177973508834839, |
|
"logps/rejected": -3.1520798206329346, |
|
"loss": 1.2437, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -21.779735565185547, |
|
"rewards/margins": 9.741061210632324, |
|
"rewards/rejected": -31.520797729492188, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.9982631930527722, |
|
"step": 467, |
|
"total_flos": 0.0, |
|
"train_loss": 2.0977548006004643, |
|
"train_runtime": 12789.999, |
|
"train_samples_per_second": 4.681, |
|
"train_steps_per_second": 0.037 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 467, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 1000000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|