|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 2000, |
|
"global_step": 1911, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0005232862375719519, |
|
"grad_norm": 8.49905799860688, |
|
"learning_rate": 2.6041666666666664e-09, |
|
"logits/chosen": -3.3605234622955322, |
|
"logits/rejected": -3.29974365234375, |
|
"logps/chosen": -511.38861083984375, |
|
"logps/rejected": -608.7561645507812, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0052328623757195184, |
|
"grad_norm": 7.961645909442391, |
|
"learning_rate": 2.6041666666666667e-08, |
|
"logits/chosen": -2.7466201782226562, |
|
"logits/rejected": -2.7476673126220703, |
|
"logps/chosen": -345.8751525878906, |
|
"logps/rejected": -288.6936340332031, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.4722222089767456, |
|
"rewards/chosen": 0.0007061759824864566, |
|
"rewards/margins": -0.00037131065619178116, |
|
"rewards/rejected": 0.0010774866677820683, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.010465724751439037, |
|
"grad_norm": 7.72531912443216, |
|
"learning_rate": 5.208333333333333e-08, |
|
"logits/chosen": -2.754342555999756, |
|
"logits/rejected": -2.746344804763794, |
|
"logps/chosen": -234.4208526611328, |
|
"logps/rejected": -222.6403350830078, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.0003822968283202499, |
|
"rewards/margins": -0.00010337786807212979, |
|
"rewards/rejected": -0.0002789190039038658, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.015698587127158554, |
|
"grad_norm": 7.586034767688272, |
|
"learning_rate": 7.812499999999999e-08, |
|
"logits/chosen": -2.6118738651275635, |
|
"logits/rejected": -2.587536573410034, |
|
"logps/chosen": -311.59722900390625, |
|
"logps/rejected": -283.9275817871094, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.00048152971430681646, |
|
"rewards/margins": 0.00014012036263011396, |
|
"rewards/rejected": -0.0006216500769369304, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.020931449502878074, |
|
"grad_norm": 7.914311184926244, |
|
"learning_rate": 1.0416666666666667e-07, |
|
"logits/chosen": -2.8673393726348877, |
|
"logits/rejected": -2.7446229457855225, |
|
"logps/chosen": -327.90374755859375, |
|
"logps/rejected": -314.3080139160156, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.001243409002199769, |
|
"rewards/margins": 0.0004669098125305027, |
|
"rewards/rejected": 0.0007764992187730968, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.026164311878597593, |
|
"grad_norm": 8.422463968801786, |
|
"learning_rate": 1.3020833333333334e-07, |
|
"logits/chosen": -2.904254674911499, |
|
"logits/rejected": -2.747178554534912, |
|
"logps/chosen": -304.30133056640625, |
|
"logps/rejected": -276.2442321777344, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.0010366481728851795, |
|
"rewards/margins": 0.00036621716571971774, |
|
"rewards/rejected": -0.0014028652803972363, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.03139717425431711, |
|
"grad_norm": 8.249557143178746, |
|
"learning_rate": 1.5624999999999999e-07, |
|
"logits/chosen": -2.760357618331909, |
|
"logits/rejected": -2.7226293087005615, |
|
"logps/chosen": -281.81414794921875, |
|
"logps/rejected": -275.4984130859375, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.001375377643853426, |
|
"rewards/margins": -0.0004821674956474453, |
|
"rewards/rejected": -0.0008932100608944893, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.03663003663003663, |
|
"grad_norm": 8.688716344440326, |
|
"learning_rate": 1.8229166666666666e-07, |
|
"logits/chosen": -2.8751180171966553, |
|
"logits/rejected": -2.776615619659424, |
|
"logps/chosen": -340.36224365234375, |
|
"logps/rejected": -273.48846435546875, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.0014231946552172303, |
|
"rewards/margins": 0.0012296558124944568, |
|
"rewards/rejected": 0.0001935386680997908, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.04186289900575615, |
|
"grad_norm": 8.081517631588872, |
|
"learning_rate": 2.0833333333333333e-07, |
|
"logits/chosen": -2.815150022506714, |
|
"logits/rejected": -2.7864503860473633, |
|
"logps/chosen": -316.01617431640625, |
|
"logps/rejected": -313.4715270996094, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.001057374058291316, |
|
"rewards/margins": -0.00040586115210317075, |
|
"rewards/rejected": -0.0006515128770843148, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.04709576138147567, |
|
"grad_norm": 7.21366712010598, |
|
"learning_rate": 2.3437499999999998e-07, |
|
"logits/chosen": -2.818197727203369, |
|
"logits/rejected": -2.769399404525757, |
|
"logps/chosen": -253.9597930908203, |
|
"logps/rejected": -221.6929931640625, |
|
"loss": 0.6925, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.0025865144561976194, |
|
"rewards/margins": 0.003298326628282666, |
|
"rewards/rejected": -0.000711812695953995, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.052328623757195186, |
|
"grad_norm": 7.290100558498015, |
|
"learning_rate": 2.604166666666667e-07, |
|
"logits/chosen": -2.8044304847717285, |
|
"logits/rejected": -2.6773159503936768, |
|
"logps/chosen": -229.00259399414062, |
|
"logps/rejected": -204.1415252685547, |
|
"loss": 0.692, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.0013053982984274626, |
|
"rewards/margins": 0.002726849401369691, |
|
"rewards/rejected": -0.0014214512193575501, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0575614861329147, |
|
"grad_norm": 7.806931649126581, |
|
"learning_rate": 2.864583333333333e-07, |
|
"logits/chosen": -2.7332875728607178, |
|
"logits/rejected": -2.597778797149658, |
|
"logps/chosen": -270.92083740234375, |
|
"logps/rejected": -204.1400909423828, |
|
"loss": 0.6909, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.00032228074269369245, |
|
"rewards/margins": 0.003660772694274783, |
|
"rewards/rejected": -0.0033384915441274643, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.06279434850863422, |
|
"grad_norm": 7.62049117819167, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": -2.8737130165100098, |
|
"logits/rejected": -2.8692593574523926, |
|
"logps/chosen": -380.74578857421875, |
|
"logps/rejected": -352.65606689453125, |
|
"loss": 0.691, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.0035394534934312105, |
|
"rewards/margins": 0.0011222701286897063, |
|
"rewards/rejected": 0.0024171832483261824, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.06802721088435375, |
|
"grad_norm": 7.860021138207594, |
|
"learning_rate": 3.3854166666666667e-07, |
|
"logits/chosen": -2.713139057159424, |
|
"logits/rejected": -2.6882357597351074, |
|
"logps/chosen": -235.46713256835938, |
|
"logps/rejected": -246.5643768310547, |
|
"loss": 0.6896, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.004117549862712622, |
|
"rewards/margins": 0.011698728427290916, |
|
"rewards/rejected": -0.007581179030239582, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.07326007326007326, |
|
"grad_norm": 7.725076203206082, |
|
"learning_rate": 3.645833333333333e-07, |
|
"logits/chosen": -2.5985608100891113, |
|
"logits/rejected": -2.630059003829956, |
|
"logps/chosen": -307.1208190917969, |
|
"logps/rejected": -318.78302001953125, |
|
"loss": 0.688, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.008680115453898907, |
|
"rewards/margins": 0.01735403761267662, |
|
"rewards/rejected": -0.008673924021422863, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.07849293563579278, |
|
"grad_norm": 8.282282371803712, |
|
"learning_rate": 3.9062499999999997e-07, |
|
"logits/chosen": -2.8361477851867676, |
|
"logits/rejected": -2.712515354156494, |
|
"logps/chosen": -272.6867370605469, |
|
"logps/rejected": -254.824462890625, |
|
"loss": 0.6869, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.0017812151927500963, |
|
"rewards/margins": -0.0002715282025747001, |
|
"rewards/rejected": -0.001509687164798379, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.0837257980115123, |
|
"grad_norm": 8.11583930483043, |
|
"learning_rate": 4.1666666666666667e-07, |
|
"logits/chosen": -2.823399066925049, |
|
"logits/rejected": -2.663353443145752, |
|
"logps/chosen": -273.3413391113281, |
|
"logps/rejected": -241.88015747070312, |
|
"loss": 0.6859, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.008436222560703754, |
|
"rewards/margins": 0.016978086903691292, |
|
"rewards/rejected": -0.008541865274310112, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.08895866038723181, |
|
"grad_norm": 8.8374110901132, |
|
"learning_rate": 4.427083333333333e-07, |
|
"logits/chosen": -2.767087459564209, |
|
"logits/rejected": -2.775615692138672, |
|
"logps/chosen": -246.1128387451172, |
|
"logps/rejected": -278.6310729980469, |
|
"loss": 0.6834, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.008895987644791603, |
|
"rewards/margins": 0.013028806075453758, |
|
"rewards/rejected": -0.004132818430662155, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.09419152276295134, |
|
"grad_norm": 6.680052156414976, |
|
"learning_rate": 4.6874999999999996e-07, |
|
"logits/chosen": -2.7818989753723145, |
|
"logits/rejected": -2.642585515975952, |
|
"logps/chosen": -302.69256591796875, |
|
"logps/rejected": -277.1615295410156, |
|
"loss": 0.6851, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.015633350238204002, |
|
"rewards/margins": 0.014146494679152966, |
|
"rewards/rejected": -0.029779845848679543, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.09942438513867086, |
|
"grad_norm": 8.714167716479949, |
|
"learning_rate": 4.947916666666667e-07, |
|
"logits/chosen": -2.8037378787994385, |
|
"logits/rejected": -2.6529879570007324, |
|
"logps/chosen": -314.17999267578125, |
|
"logps/rejected": -266.3314208984375, |
|
"loss": 0.677, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.007848030887544155, |
|
"rewards/margins": 0.03183719515800476, |
|
"rewards/rejected": -0.023989161476492882, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.10465724751439037, |
|
"grad_norm": 7.272360268876378, |
|
"learning_rate": 4.999732803821339e-07, |
|
"logits/chosen": -2.6897873878479004, |
|
"logits/rejected": -2.5882019996643066, |
|
"logps/chosen": -281.3874206542969, |
|
"logps/rejected": -309.0589904785156, |
|
"loss": 0.6755, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.004982407204806805, |
|
"rewards/margins": 0.03435878828167915, |
|
"rewards/rejected": -0.03934119641780853, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.10989010989010989, |
|
"grad_norm": 8.004963774369166, |
|
"learning_rate": 4.998647417232375e-07, |
|
"logits/chosen": -2.7537169456481934, |
|
"logits/rejected": -2.656438112258911, |
|
"logps/chosen": -219.36074829101562, |
|
"logps/rejected": -203.29373168945312, |
|
"loss": 0.685, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.0006419006967917085, |
|
"rewards/margins": 0.00899927131831646, |
|
"rewards/rejected": -0.008357370272278786, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.1151229722658294, |
|
"grad_norm": 7.166614652492712, |
|
"learning_rate": 4.996727502703357e-07, |
|
"logits/chosen": -2.801790952682495, |
|
"logits/rejected": -2.7394392490386963, |
|
"logps/chosen": -288.6463317871094, |
|
"logps/rejected": -255.1267547607422, |
|
"loss": 0.6772, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.01907144859433174, |
|
"rewards/margins": 0.05745015665888786, |
|
"rewards/rejected": -0.03837870433926582, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.12035583464154893, |
|
"grad_norm": 7.484085138892691, |
|
"learning_rate": 4.993973701470142e-07, |
|
"logits/chosen": -2.807175397872925, |
|
"logits/rejected": -2.7937538623809814, |
|
"logps/chosen": -252.76950073242188, |
|
"logps/rejected": -337.986328125, |
|
"loss": 0.6673, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.008308514021337032, |
|
"rewards/margins": 0.05279649421572685, |
|
"rewards/rejected": -0.044487982988357544, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.12558869701726844, |
|
"grad_norm": 8.092657592692094, |
|
"learning_rate": 4.990386933279972e-07, |
|
"logits/chosen": -2.7619621753692627, |
|
"logits/rejected": -2.6436007022857666, |
|
"logps/chosen": -241.7710723876953, |
|
"logps/rejected": -229.43887329101562, |
|
"loss": 0.6674, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.02687176689505577, |
|
"rewards/margins": 0.0386468879878521, |
|
"rewards/rejected": -0.06551866233348846, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.13082155939298795, |
|
"grad_norm": 8.037414680932423, |
|
"learning_rate": 4.985968396084284e-07, |
|
"logits/chosen": -2.663861036300659, |
|
"logits/rejected": -2.6479439735412598, |
|
"logps/chosen": -301.2739562988281, |
|
"logps/rejected": -263.560546875, |
|
"loss": 0.6652, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.02025497518479824, |
|
"rewards/margins": 0.07919565588235855, |
|
"rewards/rejected": -0.09945062547922134, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.1360544217687075, |
|
"grad_norm": 9.07391917856823, |
|
"learning_rate": 4.98071956563861e-07, |
|
"logits/chosen": -2.7942392826080322, |
|
"logits/rejected": -2.691336154937744, |
|
"logps/chosen": -301.15655517578125, |
|
"logps/rejected": -285.9658508300781, |
|
"loss": 0.664, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.003276229603216052, |
|
"rewards/margins": 0.09684257209300995, |
|
"rewards/rejected": -0.09356634318828583, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.141287284144427, |
|
"grad_norm": 7.034059586795834, |
|
"learning_rate": 4.97464219500968e-07, |
|
"logits/chosen": -2.738903045654297, |
|
"logits/rejected": -2.6665232181549072, |
|
"logps/chosen": -286.79241943359375, |
|
"logps/rejected": -288.70782470703125, |
|
"loss": 0.6518, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.0593612901866436, |
|
"rewards/margins": 0.03153805062174797, |
|
"rewards/rejected": -0.09089933335781097, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.14652014652014653, |
|
"grad_norm": 8.44862913080212, |
|
"learning_rate": 4.967738313989918e-07, |
|
"logits/chosen": -2.6916403770446777, |
|
"logits/rejected": -2.7087717056274414, |
|
"logps/chosen": -316.4515075683594, |
|
"logps/rejected": -306.18597412109375, |
|
"loss": 0.6657, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.04051298648118973, |
|
"rewards/margins": 0.08778198063373566, |
|
"rewards/rejected": -0.12829497456550598, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.15175300889586604, |
|
"grad_norm": 7.534020396192998, |
|
"learning_rate": 4.960010228419499e-07, |
|
"logits/chosen": -2.847952127456665, |
|
"logits/rejected": -2.678166151046753, |
|
"logps/chosen": -341.3750915527344, |
|
"logps/rejected": -258.5977478027344, |
|
"loss": 0.6647, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.07209452241659164, |
|
"rewards/margins": 0.06747150421142578, |
|
"rewards/rejected": -0.13956603407859802, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.15698587127158556, |
|
"grad_norm": 7.9528219784641045, |
|
"learning_rate": 4.951460519416227e-07, |
|
"logits/chosen": -2.8398098945617676, |
|
"logits/rejected": -2.7632956504821777, |
|
"logps/chosen": -333.4034118652344, |
|
"logps/rejected": -277.18292236328125, |
|
"loss": 0.6579, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.1278577297925949, |
|
"rewards/margins": 0.06354766339063644, |
|
"rewards/rejected": -0.19140538573265076, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.16221873364730507, |
|
"grad_norm": 7.866874656247983, |
|
"learning_rate": 4.942092042513458e-07, |
|
"logits/chosen": -2.875429153442383, |
|
"logits/rejected": -2.7301907539367676, |
|
"logps/chosen": -331.4903259277344, |
|
"logps/rejected": -320.02020263671875, |
|
"loss": 0.6562, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.020858686417341232, |
|
"rewards/margins": 0.12334004789590836, |
|
"rewards/rejected": -0.1441987305879593, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.1674515960230246, |
|
"grad_norm": 8.868780049478978, |
|
"learning_rate": 4.931907926706373e-07, |
|
"logits/chosen": -2.8642358779907227, |
|
"logits/rejected": -2.639753580093384, |
|
"logps/chosen": -351.0049743652344, |
|
"logps/rejected": -247.955322265625, |
|
"loss": 0.6552, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.10935908555984497, |
|
"rewards/margins": 0.09488765150308609, |
|
"rewards/rejected": -0.20424675941467285, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.1726844583987441, |
|
"grad_norm": 8.927261659158807, |
|
"learning_rate": 4.920911573406924e-07, |
|
"logits/chosen": -2.683262825012207, |
|
"logits/rejected": -2.4876301288604736, |
|
"logps/chosen": -267.5043640136719, |
|
"logps/rejected": -220.41030883789062, |
|
"loss": 0.654, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.09172272682189941, |
|
"rewards/margins": 0.11745841801166534, |
|
"rewards/rejected": -0.20918114483356476, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.17791732077446362, |
|
"grad_norm": 9.74659857793103, |
|
"learning_rate": 4.909106655307787e-07, |
|
"logits/chosen": -2.7913639545440674, |
|
"logits/rejected": -2.8080215454101562, |
|
"logps/chosen": -296.96527099609375, |
|
"logps/rejected": -329.0595703125, |
|
"loss": 0.6334, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.10264991223812103, |
|
"rewards/margins": 0.1215982437133789, |
|
"rewards/rejected": -0.22424814105033875, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.18315018315018314, |
|
"grad_norm": 9.255082450028672, |
|
"learning_rate": 4.896497115155709e-07, |
|
"logits/chosen": -2.714353561401367, |
|
"logits/rejected": -2.7949321269989014, |
|
"logps/chosen": -203.4794464111328, |
|
"logps/rejected": -258.91802978515625, |
|
"loss": 0.6314, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.11061130464076996, |
|
"rewards/margins": 0.22234594821929932, |
|
"rewards/rejected": -0.33295726776123047, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.18838304552590268, |
|
"grad_norm": 8.548593647599837, |
|
"learning_rate": 4.883087164434672e-07, |
|
"logits/chosen": -2.7885565757751465, |
|
"logits/rejected": -2.708958625793457, |
|
"logps/chosen": -290.75732421875, |
|
"logps/rejected": -304.59832763671875, |
|
"loss": 0.6372, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.19256308674812317, |
|
"rewards/margins": 0.09193383157253265, |
|
"rewards/rejected": -0.284496933221817, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.1936159079016222, |
|
"grad_norm": 8.88648816404561, |
|
"learning_rate": 4.868881281959282e-07, |
|
"logits/chosen": -2.718313694000244, |
|
"logits/rejected": -2.6233880519866943, |
|
"logps/chosen": -325.62701416015625, |
|
"logps/rejected": -320.6925354003906, |
|
"loss": 0.6319, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.2261027991771698, |
|
"rewards/margins": 0.271381139755249, |
|
"rewards/rejected": -0.49748390913009644, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.1988487702773417, |
|
"grad_norm": 9.655605608773618, |
|
"learning_rate": 4.853884212378889e-07, |
|
"logits/chosen": -2.624234676361084, |
|
"logits/rejected": -2.7070670127868652, |
|
"logps/chosen": -227.9883270263672, |
|
"logps/rejected": -373.86834716796875, |
|
"loss": 0.6227, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.19947531819343567, |
|
"rewards/margins": 0.20574016869068146, |
|
"rewards/rejected": -0.4052155017852783, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.20408163265306123, |
|
"grad_norm": 9.987383709387066, |
|
"learning_rate": 4.838100964592904e-07, |
|
"logits/chosen": -2.7791268825531006, |
|
"logits/rejected": -2.6209511756896973, |
|
"logps/chosen": -417.4608459472656, |
|
"logps/rejected": -311.443115234375, |
|
"loss": 0.6218, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.22052626311779022, |
|
"rewards/margins": 0.1696535348892212, |
|
"rewards/rejected": -0.3901798129081726, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.20931449502878074, |
|
"grad_norm": 11.1193876638059, |
|
"learning_rate": 4.821536810077878e-07, |
|
"logits/chosen": -2.834602117538452, |
|
"logits/rejected": -2.670900583267212, |
|
"logps/chosen": -331.0826416015625, |
|
"logps/rejected": -311.0018310546875, |
|
"loss": 0.6119, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.29132142663002014, |
|
"rewards/margins": 0.3050875961780548, |
|
"rewards/rejected": -0.596409022808075, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.21454735740450026, |
|
"grad_norm": 10.579644405019687, |
|
"learning_rate": 4.804197281126862e-07, |
|
"logits/chosen": -2.7097597122192383, |
|
"logits/rejected": -2.6869430541992188, |
|
"logps/chosen": -321.58221435546875, |
|
"logps/rejected": -371.677978515625, |
|
"loss": 0.616, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.3598526120185852, |
|
"rewards/margins": 0.17430773377418518, |
|
"rewards/rejected": -0.534160315990448, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.21978021978021978, |
|
"grad_norm": 12.28815874807637, |
|
"learning_rate": 4.786088169001671e-07, |
|
"logits/chosen": -2.765571355819702, |
|
"logits/rejected": -2.734116554260254, |
|
"logps/chosen": -331.1507263183594, |
|
"logps/rejected": -355.69610595703125, |
|
"loss": 0.6111, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.449202299118042, |
|
"rewards/margins": 0.12143005430698395, |
|
"rewards/rejected": -0.5706323385238647, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.2250130821559393, |
|
"grad_norm": 15.080564373802469, |
|
"learning_rate": 4.767215521998648e-07, |
|
"logits/chosen": -2.8776261806488037, |
|
"logits/rejected": -2.6981818675994873, |
|
"logps/chosen": -363.0828552246094, |
|
"logps/rejected": -341.48834228515625, |
|
"loss": 0.6015, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.419222891330719, |
|
"rewards/margins": 0.4860103130340576, |
|
"rewards/rejected": -0.9052332043647766, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.2302459445316588, |
|
"grad_norm": 13.656410694192473, |
|
"learning_rate": 4.7475856434285853e-07, |
|
"logits/chosen": -2.725877285003662, |
|
"logits/rejected": -2.6641788482666016, |
|
"logps/chosen": -334.7582702636719, |
|
"logps/rejected": -327.239501953125, |
|
"loss": 0.5937, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.5639583468437195, |
|
"rewards/margins": 0.20849399268627167, |
|
"rewards/rejected": -0.7724524736404419, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.23547880690737832, |
|
"grad_norm": 12.429252651802354, |
|
"learning_rate": 4.727205089511466e-07, |
|
"logits/chosen": -2.60087251663208, |
|
"logits/rejected": -2.6115145683288574, |
|
"logps/chosen": -300.7950134277344, |
|
"logps/rejected": -336.33642578125, |
|
"loss": 0.5608, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.6206357479095459, |
|
"rewards/margins": 0.29499614238739014, |
|
"rewards/rejected": -0.915631890296936, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.24071166928309787, |
|
"grad_norm": 12.380813117969742, |
|
"learning_rate": 4.706080667186738e-07, |
|
"logits/chosen": -2.733308792114258, |
|
"logits/rejected": -2.5995380878448486, |
|
"logps/chosen": -333.32366943359375, |
|
"logps/rejected": -338.6081848144531, |
|
"loss": 0.6083, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.369947612285614, |
|
"rewards/margins": 0.36883461475372314, |
|
"rewards/rejected": -0.7387822270393372, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.24594453165881738, |
|
"grad_norm": 12.76423468861301, |
|
"learning_rate": 4.68421943183986e-07, |
|
"logits/chosen": -2.7085180282592773, |
|
"logits/rejected": -2.6696252822875977, |
|
"logps/chosen": -344.36517333984375, |
|
"logps/rejected": -381.83001708984375, |
|
"loss": 0.563, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.5257383584976196, |
|
"rewards/margins": 0.3821314573287964, |
|
"rewards/rejected": -0.907869815826416, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.25117739403453687, |
|
"grad_norm": 13.199681450344476, |
|
"learning_rate": 4.661628684945851e-07, |
|
"logits/chosen": -2.698058605194092, |
|
"logits/rejected": -2.7044830322265625, |
|
"logps/chosen": -283.84356689453125, |
|
"logps/rejected": -313.8056945800781, |
|
"loss": 0.5839, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.36220312118530273, |
|
"rewards/margins": 0.4206410348415375, |
|
"rewards/rejected": -0.7828441858291626, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.2564102564102564, |
|
"grad_norm": 10.797426582932289, |
|
"learning_rate": 4.638315971630662e-07, |
|
"logits/chosen": -2.7037124633789062, |
|
"logits/rejected": -2.670248031616211, |
|
"logps/chosen": -349.2225341796875, |
|
"logps/rejected": -350.82635498046875, |
|
"loss": 0.5538, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.4856594502925873, |
|
"rewards/margins": 0.40579062700271606, |
|
"rewards/rejected": -0.891450047492981, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.2616431187859759, |
|
"grad_norm": 13.368448758443607, |
|
"learning_rate": 4.6142890781511635e-07, |
|
"logits/chosen": -2.660262107849121, |
|
"logits/rejected": -2.65765643119812, |
|
"logps/chosen": -311.58648681640625, |
|
"logps/rejected": -378.6731872558594, |
|
"loss": 0.5589, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.4650937616825104, |
|
"rewards/margins": 0.5874634981155396, |
|
"rewards/rejected": -1.0525572299957275, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.2668759811616955, |
|
"grad_norm": 12.2893519234881, |
|
"learning_rate": 4.5895560292945996e-07, |
|
"logits/chosen": -2.759093761444092, |
|
"logits/rejected": -2.6124329566955566, |
|
"logps/chosen": -350.0455627441406, |
|
"logps/rejected": -346.63287353515625, |
|
"loss": 0.5878, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.5623534321784973, |
|
"rewards/margins": 0.3677280843257904, |
|
"rewards/rejected": -0.9300813674926758, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.272108843537415, |
|
"grad_norm": 10.78001482797236, |
|
"learning_rate": 4.5641250856983743e-07, |
|
"logits/chosen": -2.714203357696533, |
|
"logits/rejected": -2.694481372833252, |
|
"logps/chosen": -338.79876708984375, |
|
"logps/rejected": -373.81829833984375, |
|
"loss": 0.5881, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.6571844816207886, |
|
"rewards/margins": 0.18250516057014465, |
|
"rewards/rejected": -0.8396896123886108, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.2773417059131345, |
|
"grad_norm": 12.813664857586332, |
|
"learning_rate": 4.5380047410910655e-07, |
|
"logits/chosen": -2.676630735397339, |
|
"logits/rejected": -2.646484851837158, |
|
"logps/chosen": -375.8238830566406, |
|
"logps/rejected": -336.6026611328125, |
|
"loss": 0.5643, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.2573721706867218, |
|
"rewards/margins": 0.5897037386894226, |
|
"rewards/rejected": -0.8470758199691772, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.282574568288854, |
|
"grad_norm": 13.213645717719533, |
|
"learning_rate": 4.5112037194555876e-07, |
|
"logits/chosen": -2.6308674812316895, |
|
"logits/rejected": -2.578855514526367, |
|
"logps/chosen": -367.4393005371094, |
|
"logps/rejected": -396.51495361328125, |
|
"loss": 0.5448, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.6825714707374573, |
|
"rewards/margins": 0.4664786756038666, |
|
"rewards/rejected": -1.149049997329712, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.28780743066457354, |
|
"grad_norm": 12.704805893037816, |
|
"learning_rate": 4.4837309721154536e-07, |
|
"logits/chosen": -2.7815651893615723, |
|
"logits/rejected": -2.636693239212036, |
|
"logps/chosen": -411.08270263671875, |
|
"logps/rejected": -408.10943603515625, |
|
"loss": 0.59, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.6877561807632446, |
|
"rewards/margins": 0.5920289158821106, |
|
"rewards/rejected": -1.2797850370407104, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.29304029304029305, |
|
"grad_norm": 11.138560433108708, |
|
"learning_rate": 4.4555956747451065e-07, |
|
"logits/chosen": -2.7659242153167725, |
|
"logits/rejected": -2.6903443336486816, |
|
"logps/chosen": -337.35382080078125, |
|
"logps/rejected": -373.4418029785156, |
|
"loss": 0.5562, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.41948366165161133, |
|
"rewards/margins": 0.5816941857337952, |
|
"rewards/rejected": -1.0011777877807617, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.29827315541601257, |
|
"grad_norm": 13.265582546440573, |
|
"learning_rate": 4.426807224305315e-07, |
|
"logits/chosen": -2.8295228481292725, |
|
"logits/rejected": -2.6806788444519043, |
|
"logps/chosen": -347.0091857910156, |
|
"logps/rejected": -354.14605712890625, |
|
"loss": 0.6047, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.5706046223640442, |
|
"rewards/margins": 0.3682999908924103, |
|
"rewards/rejected": -0.9389045834541321, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.3035060177917321, |
|
"grad_norm": 14.860167565465616, |
|
"learning_rate": 4.397375235904669e-07, |
|
"logits/chosen": -2.7773425579071045, |
|
"logits/rejected": -2.657461643218994, |
|
"logps/chosen": -388.76141357421875, |
|
"logps/rejected": -334.33197021484375, |
|
"loss": 0.566, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.6534782648086548, |
|
"rewards/margins": 0.4859207272529602, |
|
"rewards/rejected": -1.1393990516662598, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.3087388801674516, |
|
"grad_norm": 13.215304460006989, |
|
"learning_rate": 4.3673095395882074e-07, |
|
"logits/chosen": -2.567587375640869, |
|
"logits/rejected": -2.5626230239868164, |
|
"logps/chosen": -285.3164978027344, |
|
"logps/rejected": -334.2855224609375, |
|
"loss": 0.5694, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.6295741200447083, |
|
"rewards/margins": 0.47005924582481384, |
|
"rewards/rejected": -1.0996334552764893, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.3139717425431711, |
|
"grad_norm": 14.62861289532331, |
|
"learning_rate": 4.3366201770542687e-07, |
|
"logits/chosen": -2.642322301864624, |
|
"logits/rejected": -2.6280393600463867, |
|
"logps/chosen": -355.798095703125, |
|
"logps/rejected": -380.4391174316406, |
|
"loss": 0.5962, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.6401641964912415, |
|
"rewards/margins": 0.39123058319091797, |
|
"rewards/rejected": -1.0313947200775146, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.31920460491889063, |
|
"grad_norm": 12.639759878033539, |
|
"learning_rate": 4.3053173983006395e-07, |
|
"logits/chosen": -2.6867003440856934, |
|
"logits/rejected": -2.5773725509643555, |
|
"logps/chosen": -269.51800537109375, |
|
"logps/rejected": -340.40203857421875, |
|
"loss": 0.5495, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.5835707187652588, |
|
"rewards/margins": 0.5573822855949402, |
|
"rewards/rejected": -1.1409530639648438, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.32443746729461015, |
|
"grad_norm": 15.940327911602251, |
|
"learning_rate": 4.2734116582011403e-07, |
|
"logits/chosen": -2.793381690979004, |
|
"logits/rejected": -2.5359880924224854, |
|
"logps/chosen": -428.2900390625, |
|
"logps/rejected": -331.8592834472656, |
|
"loss": 0.5626, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.5799857974052429, |
|
"rewards/margins": 0.6037784814834595, |
|
"rewards/rejected": -1.1837642192840576, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.32967032967032966, |
|
"grad_norm": 13.004210887971325, |
|
"learning_rate": 4.2409136130137845e-07, |
|
"logits/chosen": -2.5990140438079834, |
|
"logits/rejected": -2.622506618499756, |
|
"logps/chosen": -304.9674072265625, |
|
"logps/rejected": -392.1850280761719, |
|
"loss": 0.5977, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.7310804128646851, |
|
"rewards/margins": 0.46913275122642517, |
|
"rewards/rejected": -1.2002131938934326, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.3349031920460492, |
|
"grad_norm": 13.184101561168577, |
|
"learning_rate": 4.207834116821672e-07, |
|
"logits/chosen": -2.702871799468994, |
|
"logits/rejected": -2.595551013946533, |
|
"logps/chosen": -366.40362548828125, |
|
"logps/rejected": -445.50054931640625, |
|
"loss": 0.5515, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.6911154389381409, |
|
"rewards/margins": 0.7618328332901001, |
|
"rewards/rejected": -1.4529482126235962, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.3401360544217687, |
|
"grad_norm": 13.183254894586826, |
|
"learning_rate": 4.174184217907818e-07, |
|
"logits/chosen": -2.6497156620025635, |
|
"logits/rejected": -2.5929837226867676, |
|
"logps/chosen": -361.13446044921875, |
|
"logps/rejected": -376.19097900390625, |
|
"loss": 0.5791, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.8520764112472534, |
|
"rewards/margins": 0.43589457869529724, |
|
"rewards/rejected": -1.2879711389541626, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.3453689167974882, |
|
"grad_norm": 13.891829778311674, |
|
"learning_rate": 4.1399751550651084e-07, |
|
"logits/chosen": -2.6143274307250977, |
|
"logits/rejected": -2.544797420501709, |
|
"logps/chosen": -328.31103515625, |
|
"logps/rejected": -359.98651123046875, |
|
"loss": 0.5587, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.8473265767097473, |
|
"rewards/margins": 0.4335683286190033, |
|
"rewards/rejected": -1.2808948755264282, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.35060177917320773, |
|
"grad_norm": 18.495127929360645, |
|
"learning_rate": 4.1052183538426426e-07, |
|
"logits/chosen": -2.5370559692382812, |
|
"logits/rejected": -2.501626491546631, |
|
"logps/chosen": -342.26214599609375, |
|
"logps/rejected": -351.6610412597656, |
|
"loss": 0.5772, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.7248779535293579, |
|
"rewards/margins": 0.41734808683395386, |
|
"rewards/rejected": -1.1422260999679565, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.35583464154892724, |
|
"grad_norm": 14.322596668348043, |
|
"learning_rate": 4.0699254227296884e-07, |
|
"logits/chosen": -2.47985577583313, |
|
"logits/rejected": -2.422318935394287, |
|
"logps/chosen": -352.4524841308594, |
|
"logps/rejected": -362.7362365722656, |
|
"loss": 0.5692, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.042677402496338, |
|
"rewards/margins": 0.34999677538871765, |
|
"rewards/rejected": -1.392674207687378, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.36106750392464676, |
|
"grad_norm": 12.848274791112614, |
|
"learning_rate": 4.034108149278543e-07, |
|
"logits/chosen": -2.6636533737182617, |
|
"logits/rejected": -2.5435545444488525, |
|
"logps/chosen": -335.9781799316406, |
|
"logps/rejected": -368.18707275390625, |
|
"loss": 0.5473, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.8715833425521851, |
|
"rewards/margins": 0.4915018081665039, |
|
"rewards/rejected": -1.363085150718689, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.3663003663003663, |
|
"grad_norm": 13.405499026929567, |
|
"learning_rate": 3.9977784961675833e-07, |
|
"logits/chosen": -2.5500426292419434, |
|
"logits/rejected": -2.5117454528808594, |
|
"logps/chosen": -354.26446533203125, |
|
"logps/rejected": -370.3767395019531, |
|
"loss": 0.5649, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.9094370603561401, |
|
"rewards/margins": 0.3823543190956116, |
|
"rewards/rejected": -1.2917914390563965, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.3715332286760858, |
|
"grad_norm": 15.459626489708501, |
|
"learning_rate": 3.96094859720583e-07, |
|
"logits/chosen": -2.6391563415527344, |
|
"logits/rejected": -2.5070877075195312, |
|
"logps/chosen": -416.921875, |
|
"logps/rejected": -413.1897888183594, |
|
"loss": 0.5178, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.776429295539856, |
|
"rewards/margins": 0.5824471712112427, |
|
"rewards/rejected": -1.3588765859603882, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.37676609105180536, |
|
"grad_norm": 14.411719668942082, |
|
"learning_rate": 3.923630753280357e-07, |
|
"logits/chosen": -2.3970694541931152, |
|
"logits/rejected": -2.45841646194458, |
|
"logps/chosen": -297.50933837890625, |
|
"logps/rejected": -357.93072509765625, |
|
"loss": 0.5194, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.7691371440887451, |
|
"rewards/margins": 0.49477410316467285, |
|
"rewards/rejected": -1.2639113664627075, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.3819989534275249, |
|
"grad_norm": 14.796722738483695, |
|
"learning_rate": 3.8858374282478893e-07, |
|
"logits/chosen": -2.5316245555877686, |
|
"logits/rejected": -2.5274815559387207, |
|
"logps/chosen": -352.5939025878906, |
|
"logps/rejected": -455.6580505371094, |
|
"loss": 0.5761, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.8298758268356323, |
|
"rewards/margins": 0.744354784488678, |
|
"rewards/rejected": -1.5742305517196655, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.3872318158032444, |
|
"grad_norm": 14.670377485971203, |
|
"learning_rate": 3.8475812447719823e-07, |
|
"logits/chosen": -2.4688239097595215, |
|
"logits/rejected": -2.488438844680786, |
|
"logps/chosen": -321.0882873535156, |
|
"logps/rejected": -346.17303466796875, |
|
"loss": 0.5434, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.7527046203613281, |
|
"rewards/margins": 0.4411894381046295, |
|
"rewards/rejected": -1.1938940286636353, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.3924646781789639, |
|
"grad_norm": 16.967977934939814, |
|
"learning_rate": 3.8088749801071496e-07, |
|
"logits/chosen": -2.4572207927703857, |
|
"logits/rejected": -2.408034086227417, |
|
"logps/chosen": -369.81195068359375, |
|
"logps/rejected": -413.0748596191406, |
|
"loss": 0.5082, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.8101743459701538, |
|
"rewards/margins": 0.29960817098617554, |
|
"rewards/rejected": -1.1097824573516846, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.3976975405546834, |
|
"grad_norm": 15.020786229806706, |
|
"learning_rate": 3.7697315618313644e-07, |
|
"logits/chosen": -2.480273962020874, |
|
"logits/rejected": -2.4273810386657715, |
|
"logps/chosen": -292.92742919921875, |
|
"logps/rejected": -306.0049133300781, |
|
"loss": 0.5705, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.5610066652297974, |
|
"rewards/margins": 0.5253943204879761, |
|
"rewards/rejected": -1.086401104927063, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.40293040293040294, |
|
"grad_norm": 13.249381739832709, |
|
"learning_rate": 3.7301640635283584e-07, |
|
"logits/chosen": -2.4788036346435547, |
|
"logits/rejected": -2.444715738296509, |
|
"logps/chosen": -328.667724609375, |
|
"logps/rejected": -405.86468505859375, |
|
"loss": 0.5882, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.8170742988586426, |
|
"rewards/margins": 0.4358190596103668, |
|
"rewards/rejected": -1.2528934478759766, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.40816326530612246, |
|
"grad_norm": 15.02549730848373, |
|
"learning_rate": 3.6901857004211443e-07, |
|
"logits/chosen": -2.461081027984619, |
|
"logits/rejected": -2.4211714267730713, |
|
"logps/chosen": -331.6138610839844, |
|
"logps/rejected": -356.93695068359375, |
|
"loss": 0.5381, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.8925091028213501, |
|
"rewards/margins": 0.6261960864067078, |
|
"rewards/rejected": -1.5187050104141235, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.413396127681842, |
|
"grad_norm": 14.307804717064869, |
|
"learning_rate": 3.6498098249582444e-07, |
|
"logits/chosen": -2.464012622833252, |
|
"logits/rejected": -2.4822998046875, |
|
"logps/chosen": -316.3138732910156, |
|
"logps/rejected": -410.690185546875, |
|
"loss": 0.5397, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.8896521329879761, |
|
"rewards/margins": 0.3338387608528137, |
|
"rewards/rejected": -1.2234909534454346, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.4186289900575615, |
|
"grad_norm": 14.122387290168371, |
|
"learning_rate": 3.6090499223540757e-07, |
|
"logits/chosen": -2.503190279006958, |
|
"logits/rejected": -2.5002918243408203, |
|
"logps/chosen": -398.0213928222656, |
|
"logps/rejected": -429.8836364746094, |
|
"loss": 0.584, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.951545238494873, |
|
"rewards/margins": 0.32522302865982056, |
|
"rewards/rejected": -1.2767683267593384, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.423861852433281, |
|
"grad_norm": 16.861353731056123, |
|
"learning_rate": 3.5679196060850034e-07, |
|
"logits/chosen": -2.4697022438049316, |
|
"logits/rejected": -2.3587775230407715, |
|
"logps/chosen": -409.6409606933594, |
|
"logps/rejected": -421.2870178222656, |
|
"loss": 0.5515, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.0030632019042969, |
|
"rewards/margins": 0.5317606925964355, |
|
"rewards/rejected": -1.534824013710022, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.4290947148090005, |
|
"grad_norm": 11.991585417505805, |
|
"learning_rate": 3.5264326133425464e-07, |
|
"logits/chosen": -2.505713701248169, |
|
"logits/rejected": -2.4144287109375, |
|
"logps/chosen": -390.4349365234375, |
|
"logps/rejected": -392.1512145996094, |
|
"loss": 0.5693, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.0158454179763794, |
|
"rewards/margins": 0.547788143157959, |
|
"rewards/rejected": -1.5636335611343384, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.43432757718472004, |
|
"grad_norm": 13.350862318790352, |
|
"learning_rate": 3.4846028004452693e-07, |
|
"logits/chosen": -2.6179637908935547, |
|
"logits/rejected": -2.5537848472595215, |
|
"logps/chosen": -302.6700744628906, |
|
"logps/rejected": -345.3885498046875, |
|
"loss": 0.594, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.6322959661483765, |
|
"rewards/margins": 0.4499518871307373, |
|
"rewards/rejected": -1.0822478532791138, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.43956043956043955, |
|
"grad_norm": 12.571206419485762, |
|
"learning_rate": 3.4424441382108826e-07, |
|
"logits/chosen": -2.684983253479004, |
|
"logits/rejected": -2.4114038944244385, |
|
"logps/chosen": -480.42626953125, |
|
"logps/rejected": -419.2476501464844, |
|
"loss": 0.5469, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.7017939686775208, |
|
"rewards/margins": 0.5825623869895935, |
|
"rewards/rejected": -1.2843563556671143, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.44479330193615907, |
|
"grad_norm": 18.086159855279544, |
|
"learning_rate": 3.399970707290105e-07, |
|
"logits/chosen": -2.5462467670440674, |
|
"logits/rejected": -2.385918378829956, |
|
"logps/chosen": -349.1484375, |
|
"logps/rejected": -348.0808410644531, |
|
"loss": 0.5834, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.9068197011947632, |
|
"rewards/margins": 0.38493895530700684, |
|
"rewards/rejected": -1.2917585372924805, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.4500261643118786, |
|
"grad_norm": 15.337587997760744, |
|
"learning_rate": 3.3571966934638376e-07, |
|
"logits/chosen": -2.551776885986328, |
|
"logits/rejected": -2.5410056114196777, |
|
"logps/chosen": -265.8577880859375, |
|
"logps/rejected": -397.5907287597656, |
|
"loss": 0.5543, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.7245792746543884, |
|
"rewards/margins": 0.657728374004364, |
|
"rewards/rejected": -1.382307767868042, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.4552590266875981, |
|
"grad_norm": 12.663155725160859, |
|
"learning_rate": 3.314136382905234e-07, |
|
"logits/chosen": -2.247466564178467, |
|
"logits/rejected": -2.3040506839752197, |
|
"logps/chosen": -293.7249755859375, |
|
"logps/rejected": -356.9481506347656, |
|
"loss": 0.566, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.6831008791923523, |
|
"rewards/margins": 0.5791589021682739, |
|
"rewards/rejected": -1.2622597217559814, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.4604918890633176, |
|
"grad_norm": 16.557688098904418, |
|
"learning_rate": 3.270804157408225e-07, |
|
"logits/chosen": -2.5478596687316895, |
|
"logits/rejected": -2.507978916168213, |
|
"logps/chosen": -365.33721923828125, |
|
"logps/rejected": -375.700927734375, |
|
"loss": 0.5474, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.8751762509346008, |
|
"rewards/margins": 0.3427140712738037, |
|
"rewards/rejected": -1.2178903818130493, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.46572475143903713, |
|
"grad_norm": 17.18916868130309, |
|
"learning_rate": 3.227214489584128e-07, |
|
"logits/chosen": -2.6057088375091553, |
|
"logits/rejected": -2.520437240600586, |
|
"logps/chosen": -402.12530517578125, |
|
"logps/rejected": -382.2662658691406, |
|
"loss": 0.5524, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.8464688062667847, |
|
"rewards/margins": 0.5937511324882507, |
|
"rewards/rejected": -1.4402198791503906, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.47095761381475665, |
|
"grad_norm": 18.492056543283415, |
|
"learning_rate": 3.1833819380279023e-07, |
|
"logits/chosen": -2.420086622238159, |
|
"logits/rejected": -2.3860373497009277, |
|
"logps/chosen": -335.7374267578125, |
|
"logps/rejected": -481.30999755859375, |
|
"loss": 0.5262, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.8576730489730835, |
|
"rewards/margins": 1.0558011531829834, |
|
"rewards/rejected": -1.9134740829467773, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.47619047619047616, |
|
"grad_norm": 17.46995150497419, |
|
"learning_rate": 3.139321142455703e-07, |
|
"logits/chosen": -2.390942096710205, |
|
"logits/rejected": -2.31058669090271, |
|
"logps/chosen": -272.40582275390625, |
|
"logps/rejected": -354.06787109375, |
|
"loss": 0.5842, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.9074978828430176, |
|
"rewards/margins": 0.7846023440361023, |
|
"rewards/rejected": -1.692100167274475, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.48142333856619574, |
|
"grad_norm": 13.125320894153095, |
|
"learning_rate": 3.095046818815331e-07, |
|
"logits/chosen": -2.558971881866455, |
|
"logits/rejected": -2.407036781311035, |
|
"logps/chosen": -407.01318359375, |
|
"logps/rejected": -400.66986083984375, |
|
"loss": 0.5678, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.9686500430107117, |
|
"rewards/margins": 0.5105721354484558, |
|
"rewards/rejected": -1.4792221784591675, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.48665620094191525, |
|
"grad_norm": 12.468422846545812, |
|
"learning_rate": 3.0505737543712275e-07, |
|
"logits/chosen": -2.4688422679901123, |
|
"logits/rejected": -2.445286273956299, |
|
"logps/chosen": -350.22552490234375, |
|
"logps/rejected": -377.6510925292969, |
|
"loss": 0.5411, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.8176230192184448, |
|
"rewards/margins": 0.3221116364002228, |
|
"rewards/rejected": -1.1397348642349243, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.49188906331763477, |
|
"grad_norm": 12.701802066404364, |
|
"learning_rate": 3.0059168027656475e-07, |
|
"logits/chosen": -2.5918569564819336, |
|
"logits/rejected": -2.4825873374938965, |
|
"logps/chosen": -375.5536804199219, |
|
"logps/rejected": -388.7239685058594, |
|
"loss": 0.4747, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.749475359916687, |
|
"rewards/margins": 0.7011125683784485, |
|
"rewards/rejected": -1.4505879878997803, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.4971219256933543, |
|
"grad_norm": 14.452393120408981, |
|
"learning_rate": 2.9610908790576663e-07, |
|
"logits/chosen": -2.470831871032715, |
|
"logits/rejected": -2.329481601715088, |
|
"logps/chosen": -377.6229553222656, |
|
"logps/rejected": -450.210205078125, |
|
"loss": 0.5188, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.8591662645339966, |
|
"rewards/margins": 0.9110971689224243, |
|
"rewards/rejected": -1.770263433456421, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.5023547880690737, |
|
"grad_norm": 12.34698868311349, |
|
"learning_rate": 2.9161109547416667e-07, |
|
"logits/chosen": -2.5465993881225586, |
|
"logits/rejected": -2.4387006759643555, |
|
"logps/chosen": -407.3908996582031, |
|
"logps/rejected": -446.47271728515625, |
|
"loss": 0.5492, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.1968122720718384, |
|
"rewards/margins": 0.5576936602592468, |
|
"rewards/rejected": -1.7545058727264404, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.5075876504447933, |
|
"grad_norm": 16.584543308090126, |
|
"learning_rate": 2.8709920527469834e-07, |
|
"logits/chosen": -2.4225964546203613, |
|
"logits/rejected": -2.345376968383789, |
|
"logps/chosen": -380.8326416015625, |
|
"logps/rejected": -428.33917236328125, |
|
"loss": 0.5416, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.8356760144233704, |
|
"rewards/margins": 0.9355084300041199, |
|
"rewards/rejected": -1.7711843252182007, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.5128205128205128, |
|
"grad_norm": 10.765690870930914, |
|
"learning_rate": 2.8257492424203685e-07, |
|
"logits/chosen": -2.5656943321228027, |
|
"logits/rejected": -2.3547496795654297, |
|
"logps/chosen": -374.1805725097656, |
|
"logps/rejected": -384.082275390625, |
|
"loss": 0.534, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.6013060212135315, |
|
"rewards/margins": 0.7941185235977173, |
|
"rewards/rejected": -1.3954246044158936, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.5180533751962323, |
|
"grad_norm": 13.743981510014283, |
|
"learning_rate": 2.780397634492949e-07, |
|
"logits/chosen": -2.366999864578247, |
|
"logits/rejected": -2.2590906620025635, |
|
"logps/chosen": -318.1770935058594, |
|
"logps/rejected": -373.51483154296875, |
|
"loss": 0.5356, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.6711180210113525, |
|
"rewards/margins": 0.7915961742401123, |
|
"rewards/rejected": -1.4627141952514648, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.5232862375719518, |
|
"grad_norm": 17.22202415903823, |
|
"learning_rate": 2.7349523760333674e-07, |
|
"logits/chosen": -2.3708062171936035, |
|
"logits/rejected": -2.294684410095215, |
|
"logps/chosen": -351.9925537109375, |
|
"logps/rejected": -387.6063537597656, |
|
"loss": 0.5623, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.1155288219451904, |
|
"rewards/margins": 0.4887705445289612, |
|
"rewards/rejected": -1.6042993068695068, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.5285190999476713, |
|
"grad_norm": 15.575303273110485, |
|
"learning_rate": 2.6894286453887827e-07, |
|
"logits/chosen": -2.3775484561920166, |
|
"logits/rejected": -2.351830244064331, |
|
"logps/chosen": -332.186767578125, |
|
"logps/rejected": -421.51495361328125, |
|
"loss": 0.5409, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.9195478558540344, |
|
"rewards/margins": 0.7130652070045471, |
|
"rewards/rejected": -1.632612943649292, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.533751962323391, |
|
"grad_norm": 13.940995090291242, |
|
"learning_rate": 2.6438416471154273e-07, |
|
"logits/chosen": -2.4398512840270996, |
|
"logits/rejected": -2.3804996013641357, |
|
"logps/chosen": -374.5543518066406, |
|
"logps/rejected": -403.6950988769531, |
|
"loss": 0.5025, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.7311006784439087, |
|
"rewards/margins": 0.7627996802330017, |
|
"rewards/rejected": -1.493900179862976, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.5389848246991105, |
|
"grad_norm": 13.914954697557572, |
|
"learning_rate": 2.598206606900406e-07, |
|
"logits/chosen": -2.4829204082489014, |
|
"logits/rejected": -2.427128553390503, |
|
"logps/chosen": -361.61224365234375, |
|
"logps/rejected": -353.1183166503906, |
|
"loss": 0.5547, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.9841195344924927, |
|
"rewards/margins": 0.31136849522590637, |
|
"rewards/rejected": -1.295487880706787, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.54421768707483, |
|
"grad_norm": 12.574429035047727, |
|
"learning_rate": 2.552538766476443e-07, |
|
"logits/chosen": -2.5080599784851074, |
|
"logits/rejected": -2.5598042011260986, |
|
"logps/chosen": -339.239013671875, |
|
"logps/rejected": -406.1683349609375, |
|
"loss": 0.5688, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.8408640027046204, |
|
"rewards/margins": 0.5512793660163879, |
|
"rewards/rejected": -1.3921434879302979, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.5494505494505495, |
|
"grad_norm": 15.63419168523058, |
|
"learning_rate": 2.5068533785312666e-07, |
|
"logits/chosen": -2.6056623458862305, |
|
"logits/rejected": -2.479773998260498, |
|
"logps/chosen": -401.62493896484375, |
|
"logps/rejected": -437.4549865722656, |
|
"loss": 0.5276, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.6328617334365845, |
|
"rewards/margins": 0.7796282172203064, |
|
"rewards/rejected": -1.4124900102615356, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.554683411826269, |
|
"grad_norm": 14.961916192694616, |
|
"learning_rate": 2.461165701613333e-07, |
|
"logits/chosen": -2.4504477977752686, |
|
"logits/rejected": -2.4499430656433105, |
|
"logps/chosen": -327.9822692871094, |
|
"logps/rejected": -425.7730407714844, |
|
"loss": 0.5507, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.5652133226394653, |
|
"rewards/margins": 0.9709668159484863, |
|
"rewards/rejected": -1.5361802577972412, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.5599162742019885, |
|
"grad_norm": 11.949695298976996, |
|
"learning_rate": 2.415490995035596e-07, |
|
"logits/chosen": -2.466282844543457, |
|
"logits/rejected": -2.4521498680114746, |
|
"logps/chosen": -413.0519104003906, |
|
"logps/rejected": -404.78887939453125, |
|
"loss": 0.5414, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.769394040107727, |
|
"rewards/margins": 0.5370974540710449, |
|
"rewards/rejected": -1.306491494178772, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.565149136577708, |
|
"grad_norm": 14.560071559898743, |
|
"learning_rate": 2.3698445137790258e-07, |
|
"logits/chosen": -2.568333387374878, |
|
"logits/rejected": -2.4807047843933105, |
|
"logps/chosen": -317.51922607421875, |
|
"logps/rejected": -374.6702575683594, |
|
"loss": 0.5499, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.7833175659179688, |
|
"rewards/margins": 0.6573908925056458, |
|
"rewards/rejected": -1.4407083988189697, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.5703819989534276, |
|
"grad_norm": 18.172239519036054, |
|
"learning_rate": 2.3242415033975575e-07, |
|
"logits/chosen": -2.492182493209839, |
|
"logits/rejected": -2.335998058319092, |
|
"logps/chosen": -410.70745849609375, |
|
"logps/rejected": -327.73468017578125, |
|
"loss": 0.5553, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.9884270429611206, |
|
"rewards/margins": 0.37207865715026855, |
|
"rewards/rejected": -1.3605058193206787, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.5756148613291471, |
|
"grad_norm": 14.327062283420295, |
|
"learning_rate": 2.2786971949262134e-07, |
|
"logits/chosen": -2.449826955795288, |
|
"logits/rejected": -2.399465560913086, |
|
"logps/chosen": -350.7200927734375, |
|
"logps/rejected": -423.613525390625, |
|
"loss": 0.5295, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.8182450532913208, |
|
"rewards/margins": 0.6440758109092712, |
|
"rewards/rejected": -1.4623210430145264, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.5808477237048666, |
|
"grad_norm": 15.370253469697573, |
|
"learning_rate": 2.2332267997940513e-07, |
|
"logits/chosen": -2.2341718673706055, |
|
"logits/rejected": -2.215893268585205, |
|
"logps/chosen": -250.4757843017578, |
|
"logps/rejected": -349.6363830566406, |
|
"loss": 0.5716, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.7104169130325317, |
|
"rewards/margins": 0.7649521231651306, |
|
"rewards/rejected": -1.4753690958023071, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.5860805860805861, |
|
"grad_norm": 16.74505481346304, |
|
"learning_rate": 2.1878455047436753e-07, |
|
"logits/chosen": -2.443295955657959, |
|
"logits/rejected": -2.3826141357421875, |
|
"logps/chosen": -364.59844970703125, |
|
"logps/rejected": -402.9962463378906, |
|
"loss": 0.5254, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.8120282292366028, |
|
"rewards/margins": 0.6617709994316101, |
|
"rewards/rejected": -1.473799228668213, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.5913134484563056, |
|
"grad_norm": 15.275524191080605, |
|
"learning_rate": 2.1425684667589852e-07, |
|
"logits/chosen": -2.2886431217193604, |
|
"logits/rejected": -2.260434627532959, |
|
"logps/chosen": -339.64190673828125, |
|
"logps/rejected": -433.12384033203125, |
|
"loss": 0.5253, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.151893973350525, |
|
"rewards/margins": 0.5123482942581177, |
|
"rewards/rejected": -1.6642423868179321, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.5965463108320251, |
|
"grad_norm": 17.725535505434664, |
|
"learning_rate": 2.0974108080028692e-07, |
|
"logits/chosen": -2.5943448543548584, |
|
"logits/rejected": -2.5342066287994385, |
|
"logps/chosen": -349.8543395996094, |
|
"logps/rejected": -431.2449645996094, |
|
"loss": 0.5271, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.8557387590408325, |
|
"rewards/margins": 0.7109647989273071, |
|
"rewards/rejected": -1.5667035579681396, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.6017791732077447, |
|
"grad_norm": 17.830532239589452, |
|
"learning_rate": 2.0523876107665194e-07, |
|
"logits/chosen": -2.5601460933685303, |
|
"logits/rejected": -2.3580188751220703, |
|
"logps/chosen": -357.18505859375, |
|
"logps/rejected": -392.36785888671875, |
|
"loss": 0.5264, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.8235940933227539, |
|
"rewards/margins": 0.7207462787628174, |
|
"rewards/rejected": -1.5443403720855713, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.6070120355834642, |
|
"grad_norm": 15.086156722317808, |
|
"learning_rate": 2.0075139124320787e-07, |
|
"logits/chosen": -2.306135654449463, |
|
"logits/rejected": -2.318141222000122, |
|
"logps/chosen": -319.282470703125, |
|
"logps/rejected": -322.34759521484375, |
|
"loss": 0.5525, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.9360674619674683, |
|
"rewards/margins": 0.3861456513404846, |
|
"rewards/rejected": -1.322213053703308, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.6122448979591837, |
|
"grad_norm": 15.825940084503408, |
|
"learning_rate": 1.962804700450265e-07, |
|
"logits/chosen": -2.4736104011535645, |
|
"logits/rejected": -2.3889100551605225, |
|
"logps/chosen": -358.4153747558594, |
|
"logps/rejected": -449.76202392578125, |
|
"loss": 0.5438, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.7582507133483887, |
|
"rewards/margins": 0.9168073534965515, |
|
"rewards/rejected": -1.675058126449585, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.6174777603349032, |
|
"grad_norm": 12.701668602585332, |
|
"learning_rate": 1.9182749073346943e-07, |
|
"logits/chosen": -2.5458261966705322, |
|
"logits/rejected": -2.4800150394439697, |
|
"logps/chosen": -410.97125244140625, |
|
"logps/rejected": -402.61090087890625, |
|
"loss": 0.4981, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.8969039916992188, |
|
"rewards/margins": 0.3998274803161621, |
|
"rewards/rejected": -1.2967313528060913, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.6227106227106227, |
|
"grad_norm": 15.213333483929908, |
|
"learning_rate": 1.8739394056745372e-07, |
|
"logits/chosen": -2.5773282051086426, |
|
"logits/rejected": -2.4755005836486816, |
|
"logps/chosen": -432.55023193359375, |
|
"logps/rejected": -403.6323547363281, |
|
"loss": 0.5007, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.6178534626960754, |
|
"rewards/margins": 0.6086469292640686, |
|
"rewards/rejected": -1.2265002727508545, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.6279434850863422, |
|
"grad_norm": 13.28337289233937, |
|
"learning_rate": 1.8298130031671972e-07, |
|
"logits/chosen": -2.248260736465454, |
|
"logits/rejected": -2.14320969581604, |
|
"logps/chosen": -411.7080078125, |
|
"logps/rejected": -417.7911682128906, |
|
"loss": 0.5155, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.0241950750350952, |
|
"rewards/margins": 0.44773560762405396, |
|
"rewards/rejected": -1.471930742263794, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.6331763474620618, |
|
"grad_norm": 14.644715830992142, |
|
"learning_rate": 1.785910437672658e-07, |
|
"logits/chosen": -2.575362205505371, |
|
"logits/rejected": -2.505906105041504, |
|
"logps/chosen": -399.6676940917969, |
|
"logps/rejected": -413.50213623046875, |
|
"loss": 0.5518, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.9969428181648254, |
|
"rewards/margins": 0.4946107864379883, |
|
"rewards/rejected": -1.4915534257888794, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.6384092098377813, |
|
"grad_norm": 14.993626975757754, |
|
"learning_rate": 1.7422463722911624e-07, |
|
"logits/chosen": -2.5508971214294434, |
|
"logits/rejected": -2.498213052749634, |
|
"logps/chosen": -406.09857177734375, |
|
"logps/rejected": -459.4518127441406, |
|
"loss": 0.5315, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.7804958820343018, |
|
"rewards/margins": 0.982401967048645, |
|
"rewards/rejected": -1.7628978490829468, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.6436420722135008, |
|
"grad_norm": 14.555399613646646, |
|
"learning_rate": 1.6988353904658492e-07, |
|
"logits/chosen": -2.5013349056243896, |
|
"logits/rejected": -2.3648574352264404, |
|
"logps/chosen": -423.92156982421875, |
|
"logps/rejected": -407.5552673339844, |
|
"loss": 0.4994, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.7597065567970276, |
|
"rewards/margins": 0.9367905855178833, |
|
"rewards/rejected": -1.6964972019195557, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.6488749345892203, |
|
"grad_norm": 17.10213719867531, |
|
"learning_rate": 1.6556919911120081e-07, |
|
"logits/chosen": -2.4204959869384766, |
|
"logits/rejected": -2.392052173614502, |
|
"logps/chosen": -315.9274597167969, |
|
"logps/rejected": -358.2281188964844, |
|
"loss": 0.5133, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.7050585746765137, |
|
"rewards/margins": 0.7052944302558899, |
|
"rewards/rejected": -1.4103529453277588, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.6541077969649398, |
|
"grad_norm": 15.582148230915594, |
|
"learning_rate": 1.6128305837745546e-07, |
|
"logits/chosen": -2.5636868476867676, |
|
"logits/rejected": -2.451737403869629, |
|
"logps/chosen": -364.83648681640625, |
|
"logps/rejected": -451.89154052734375, |
|
"loss": 0.5254, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.8910311460494995, |
|
"rewards/margins": 0.7389537692070007, |
|
"rewards/rejected": -1.6299848556518555, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.6593406593406593, |
|
"grad_norm": 12.389506252977379, |
|
"learning_rate": 1.570265483815364e-07, |
|
"logits/chosen": -2.4629874229431152, |
|
"logits/rejected": -2.4053022861480713, |
|
"logps/chosen": -359.14007568359375, |
|
"logps/rejected": -325.073974609375, |
|
"loss": 0.5304, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.8759597539901733, |
|
"rewards/margins": 0.5990678071975708, |
|
"rewards/rejected": -1.4750275611877441, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.6645735217163788, |
|
"grad_norm": 15.13919490700428, |
|
"learning_rate": 1.5280109076320506e-07, |
|
"logits/chosen": -2.4408326148986816, |
|
"logits/rejected": -2.3683300018310547, |
|
"logps/chosen": -314.1429138183594, |
|
"logps/rejected": -360.7604675292969, |
|
"loss": 0.5179, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.7542547583580017, |
|
"rewards/margins": 0.7044476270675659, |
|
"rewards/rejected": -1.4587024450302124, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.6698063840920984, |
|
"grad_norm": 19.126427468990116, |
|
"learning_rate": 1.4860809679098158e-07, |
|
"logits/chosen": -2.4560303688049316, |
|
"logits/rejected": -2.3065128326416016, |
|
"logps/chosen": -338.4498291015625, |
|
"logps/rejected": -376.27752685546875, |
|
"loss": 0.5216, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.886088490486145, |
|
"rewards/margins": 0.7132107019424438, |
|
"rewards/rejected": -1.599299430847168, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.6750392464678179, |
|
"grad_norm": 14.26442684901879, |
|
"learning_rate": 1.444489668907914e-07, |
|
"logits/chosen": -2.3746871948242188, |
|
"logits/rejected": -2.3699193000793457, |
|
"logps/chosen": -318.0958557128906, |
|
"logps/rejected": -436.58404541015625, |
|
"loss": 0.5662, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.681584358215332, |
|
"rewards/margins": 0.8649810552597046, |
|
"rewards/rejected": -1.5465654134750366, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.6802721088435374, |
|
"grad_norm": 14.706784127879546, |
|
"learning_rate": 1.403250901782354e-07, |
|
"logits/chosen": -2.3808302879333496, |
|
"logits/rejected": -2.412135124206543, |
|
"logps/chosen": -363.4635925292969, |
|
"logps/rejected": -432.95135498046875, |
|
"loss": 0.5341, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.9431090354919434, |
|
"rewards/margins": 0.5307827591896057, |
|
"rewards/rejected": -1.4738918542861938, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.6855049712192569, |
|
"grad_norm": 17.720159916923304, |
|
"learning_rate": 1.3623784399463584e-07, |
|
"logits/chosen": -2.577618360519409, |
|
"logits/rejected": -2.5261757373809814, |
|
"logps/chosen": -319.8127136230469, |
|
"logps/rejected": -351.7994079589844, |
|
"loss": 0.5104, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.6931655406951904, |
|
"rewards/margins": 0.7248280048370361, |
|
"rewards/rejected": -1.4179935455322266, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.6907378335949764, |
|
"grad_norm": 15.192334650934669, |
|
"learning_rate": 1.3218859344701632e-07, |
|
"logits/chosen": -2.4843485355377197, |
|
"logits/rejected": -2.457723617553711, |
|
"logps/chosen": -385.90252685546875, |
|
"logps/rejected": -457.72589111328125, |
|
"loss": 0.4868, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.9207484126091003, |
|
"rewards/margins": 0.6614077687263489, |
|
"rewards/rejected": -1.5821563005447388, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.6959706959706959, |
|
"grad_norm": 15.242352423044528, |
|
"learning_rate": 1.2817869095216624e-07, |
|
"logits/chosen": -2.459498882293701, |
|
"logits/rejected": -2.4493675231933594, |
|
"logps/chosen": -344.1761169433594, |
|
"logps/rejected": -450.4659118652344, |
|
"loss": 0.4937, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.7132623791694641, |
|
"rewards/margins": 0.7677630186080933, |
|
"rewards/rejected": -1.4810254573822021, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.7012035583464155, |
|
"grad_norm": 15.246351096607901, |
|
"learning_rate": 1.2420947578494522e-07, |
|
"logits/chosen": -2.3682773113250732, |
|
"logits/rejected": -2.2514185905456543, |
|
"logps/chosen": -355.0648193359375, |
|
"logps/rejected": -383.1059875488281, |
|
"loss": 0.5149, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.8611183166503906, |
|
"rewards/margins": 0.8342107534408569, |
|
"rewards/rejected": -1.6953290700912476, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.706436420722135, |
|
"grad_norm": 18.47866581271413, |
|
"learning_rate": 1.202822736309758e-07, |
|
"logits/chosen": -2.440950870513916, |
|
"logits/rejected": -2.402467727661133, |
|
"logps/chosen": -381.53582763671875, |
|
"logps/rejected": -396.8423156738281, |
|
"loss": 0.5319, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.8849695324897766, |
|
"rewards/margins": 0.5131067037582397, |
|
"rewards/rejected": -1.3980762958526611, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.7116692830978545, |
|
"grad_norm": 15.195162318042737, |
|
"learning_rate": 1.1639839614387572e-07, |
|
"logits/chosen": -2.3311028480529785, |
|
"logits/rejected": -2.294950008392334, |
|
"logps/chosen": -437.0987243652344, |
|
"logps/rejected": -458.8521423339844, |
|
"loss": 0.5722, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.9422661066055298, |
|
"rewards/margins": 0.5922858119010925, |
|
"rewards/rejected": -1.5345518589019775, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.716902145473574, |
|
"grad_norm": 15.502352930702962, |
|
"learning_rate": 1.1255914050717552e-07, |
|
"logits/chosen": -2.4980738162994385, |
|
"logits/rejected": -2.3025894165039062, |
|
"logps/chosen": -411.71624755859375, |
|
"logps/rejected": -367.0052185058594, |
|
"loss": 0.4963, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.9251989126205444, |
|
"rewards/margins": 0.5994529724121094, |
|
"rewards/rejected": -1.524652123451233, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.7221350078492935, |
|
"grad_norm": 15.922503449406639, |
|
"learning_rate": 1.0876578900107053e-07, |
|
"logits/chosen": -2.457176685333252, |
|
"logits/rejected": -2.388563632965088, |
|
"logps/chosen": -283.1033630371094, |
|
"logps/rejected": -396.8433837890625, |
|
"loss": 0.5005, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.6271666884422302, |
|
"rewards/margins": 0.8667025566101074, |
|
"rewards/rejected": -1.4938693046569824, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.727367870225013, |
|
"grad_norm": 14.942498077328574, |
|
"learning_rate": 1.050196085741491e-07, |
|
"logits/chosen": -2.37958025932312, |
|
"logits/rejected": -2.257580280303955, |
|
"logps/chosen": -346.8523254394531, |
|
"logps/rejected": -394.2856140136719, |
|
"loss": 0.5031, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.8508012890815735, |
|
"rewards/margins": 0.9125627279281616, |
|
"rewards/rejected": -1.7633640766143799, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.7326007326007326, |
|
"grad_norm": 17.17362263234656, |
|
"learning_rate": 1.0132185042024246e-07, |
|
"logits/chosen": -2.3648860454559326, |
|
"logits/rejected": -2.372678279876709, |
|
"logps/chosen": -351.4415588378906, |
|
"logps/rejected": -423.01922607421875, |
|
"loss": 0.5405, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.1479917764663696, |
|
"rewards/margins": 0.5283652544021606, |
|
"rewards/rejected": -1.6763570308685303, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.7378335949764521, |
|
"grad_norm": 15.813092628150583, |
|
"learning_rate": 9.767374956053584e-08, |
|
"logits/chosen": -2.366389036178589, |
|
"logits/rejected": -2.312243938446045, |
|
"logps/chosen": -347.84228515625, |
|
"logps/rejected": -415.84893798828125, |
|
"loss": 0.5577, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.9349943399429321, |
|
"rewards/margins": 0.792957603931427, |
|
"rewards/rejected": -1.7279517650604248, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.7430664573521716, |
|
"grad_norm": 17.88370029448844, |
|
"learning_rate": 9.407652443108192e-08, |
|
"logits/chosen": -2.4744935035705566, |
|
"logits/rejected": -2.3651137351989746, |
|
"logps/chosen": -408.2701110839844, |
|
"logps/rejected": -409.85400390625, |
|
"loss": 0.5753, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.9281660318374634, |
|
"rewards/margins": 0.6298896074295044, |
|
"rewards/rejected": -1.5580556392669678, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.7482993197278912, |
|
"grad_norm": 12.281068889997014, |
|
"learning_rate": 9.053137647585229e-08, |
|
"logits/chosen": -2.4698004722595215, |
|
"logits/rejected": -2.3483598232269287, |
|
"logps/chosen": -389.1154479980469, |
|
"logps/rejected": -421.6959533691406, |
|
"loss": 0.5067, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.8953316807746887, |
|
"rewards/margins": 0.7622554898262024, |
|
"rewards/rejected": -1.6575870513916016, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.7535321821036107, |
|
"grad_norm": 17.73830340247735, |
|
"learning_rate": 8.70394897454659e-08, |
|
"logits/chosen": -2.5352015495300293, |
|
"logits/rejected": -2.4226012229919434, |
|
"logps/chosen": -418.40753173828125, |
|
"logps/rejected": -462.5342712402344, |
|
"loss": 0.5235, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.5649646520614624, |
|
"rewards/margins": 0.9579871296882629, |
|
"rewards/rejected": -1.5229518413543701, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.7587650444793302, |
|
"grad_norm": 13.01616412507477, |
|
"learning_rate": 8.360203050172488e-08, |
|
"logits/chosen": -2.4504306316375732, |
|
"logits/rejected": -2.329036235809326, |
|
"logps/chosen": -399.3974914550781, |
|
"logps/rejected": -438.32818603515625, |
|
"loss": 0.532, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.0125949382781982, |
|
"rewards/margins": 0.7207821607589722, |
|
"rewards/rejected": -1.7333770990371704, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.7639979068550498, |
|
"grad_norm": 13.253129244682603, |
|
"learning_rate": 8.022014682809305e-08, |
|
"logits/chosen": -2.3396387100219727, |
|
"logits/rejected": -2.3325138092041016, |
|
"logps/chosen": -308.99365234375, |
|
"logps/rejected": -367.69732666015625, |
|
"loss": 0.5394, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.9278551936149597, |
|
"rewards/margins": 0.49391236901283264, |
|
"rewards/rejected": -1.4217674732208252, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.7692307692307693, |
|
"grad_norm": 18.366570255789817, |
|
"learning_rate": 7.689496824624525e-08, |
|
"logits/chosen": -2.4899001121520996, |
|
"logits/rejected": -2.309866428375244, |
|
"logps/chosen": -406.96929931640625, |
|
"logps/rejected": -396.9731140136719, |
|
"loss": 0.5086, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.7201082706451416, |
|
"rewards/margins": 0.8545902967453003, |
|
"rewards/rejected": -1.5746986865997314, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.7744636316064888, |
|
"grad_norm": 13.86884696195855, |
|
"learning_rate": 7.362760533881649e-08, |
|
"logits/chosen": -2.343902349472046, |
|
"logits/rejected": -2.3242199420928955, |
|
"logps/chosen": -354.43048095703125, |
|
"logps/rejected": -395.41046142578125, |
|
"loss": 0.5365, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.038723349571228, |
|
"rewards/margins": 0.5951145887374878, |
|
"rewards/rejected": -1.6338380575180054, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.7796964939822083, |
|
"grad_norm": 14.609522681444743, |
|
"learning_rate": 7.041914937847584e-08, |
|
"logits/chosen": -2.1511263847351074, |
|
"logits/rejected": -2.1532607078552246, |
|
"logps/chosen": -405.44482421875, |
|
"logps/rejected": -449.9737243652344, |
|
"loss": 0.5201, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.2105472087860107, |
|
"rewards/margins": 0.608808159828186, |
|
"rewards/rejected": -1.8193553686141968, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.7849293563579278, |
|
"grad_norm": 14.859081561722336, |
|
"learning_rate": 6.727067196345099e-08, |
|
"logits/chosen": -2.2998688220977783, |
|
"logits/rejected": -2.241417646408081, |
|
"logps/chosen": -318.982666015625, |
|
"logps/rejected": -463.7603454589844, |
|
"loss": 0.5113, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.0630351305007935, |
|
"rewards/margins": 0.9601357579231262, |
|
"rewards/rejected": -2.0231709480285645, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.7901622187336473, |
|
"grad_norm": 14.807402173061881, |
|
"learning_rate": 6.418322465962233e-08, |
|
"logits/chosen": -2.297292470932007, |
|
"logits/rejected": -2.3181374073028564, |
|
"logps/chosen": -402.4325866699219, |
|
"logps/rejected": -505.2281799316406, |
|
"loss": 0.5628, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.4203605651855469, |
|
"rewards/margins": 0.6741114258766174, |
|
"rewards/rejected": -2.0944721698760986, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.7953950811093669, |
|
"grad_norm": 13.284587032601959, |
|
"learning_rate": 6.115783864930905e-08, |
|
"logits/chosen": -2.3440067768096924, |
|
"logits/rejected": -2.281280517578125, |
|
"logps/chosen": -314.564453125, |
|
"logps/rejected": -424.89276123046875, |
|
"loss": 0.5305, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.8933273553848267, |
|
"rewards/margins": 0.8551291227340698, |
|
"rewards/rejected": -1.748456358909607, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.8006279434850864, |
|
"grad_norm": 19.866705680642703, |
|
"learning_rate": 5.8195524386862374e-08, |
|
"logits/chosen": -2.5497729778289795, |
|
"logits/rejected": -2.4917826652526855, |
|
"logps/chosen": -439.1163024902344, |
|
"logps/rejected": -550.0157470703125, |
|
"loss": 0.5066, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.1627197265625, |
|
"rewards/margins": 0.5966379046440125, |
|
"rewards/rejected": -1.7593576908111572, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.8058608058608059, |
|
"grad_norm": 11.890318253432318, |
|
"learning_rate": 5.529727126118228e-08, |
|
"logits/chosen": -2.433413028717041, |
|
"logits/rejected": -2.440281391143799, |
|
"logps/chosen": -470.6192932128906, |
|
"logps/rejected": -471.0384826660156, |
|
"loss": 0.5347, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.0888872146606445, |
|
"rewards/margins": 0.46668124198913574, |
|
"rewards/rejected": -1.5555684566497803, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.8110936682365254, |
|
"grad_norm": 13.254578331194432, |
|
"learning_rate": 5.246404726526918e-08, |
|
"logits/chosen": -2.3911731243133545, |
|
"logits/rejected": -2.227848529815674, |
|
"logps/chosen": -392.9666442871094, |
|
"logps/rejected": -377.9125061035156, |
|
"loss": 0.5086, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.6538218259811401, |
|
"rewards/margins": 0.85938560962677, |
|
"rewards/rejected": -1.5132074356079102, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.8163265306122449, |
|
"grad_norm": 12.844163867391341, |
|
"learning_rate": 4.969679867292276e-08, |
|
"logits/chosen": -2.412761926651001, |
|
"logits/rejected": -2.2965760231018066, |
|
"logps/chosen": -444.131103515625, |
|
"logps/rejected": -450.3861389160156, |
|
"loss": 0.5244, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.9285937547683716, |
|
"rewards/margins": 0.6477556824684143, |
|
"rewards/rejected": -1.5763494968414307, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.8215593929879644, |
|
"grad_norm": 14.530993624956581, |
|
"learning_rate": 4.6996449722693315e-08, |
|
"logits/chosen": -2.3721063137054443, |
|
"logits/rejected": -2.2978971004486084, |
|
"logps/chosen": -322.4888916015625, |
|
"logps/rejected": -396.3763122558594, |
|
"loss": 0.519, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.8261367082595825, |
|
"rewards/margins": 0.668242335319519, |
|
"rewards/rejected": -1.4943790435791016, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.826792255363684, |
|
"grad_norm": 16.610549135261834, |
|
"learning_rate": 4.436390230919465e-08, |
|
"logits/chosen": -2.459463596343994, |
|
"logits/rejected": -2.288986921310425, |
|
"logps/chosen": -391.3236999511719, |
|
"logps/rejected": -407.36175537109375, |
|
"loss": 0.5778, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.0118205547332764, |
|
"rewards/margins": 0.6633633375167847, |
|
"rewards/rejected": -1.6751840114593506, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.8320251177394035, |
|
"grad_norm": 18.67993288793065, |
|
"learning_rate": 4.180003568187776e-08, |
|
"logits/chosen": -2.2385175228118896, |
|
"logits/rejected": -2.1284265518188477, |
|
"logps/chosen": -312.7702941894531, |
|
"logps/rejected": -377.21697998046875, |
|
"loss": 0.5226, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.7135175466537476, |
|
"rewards/margins": 0.9779763221740723, |
|
"rewards/rejected": -1.6914939880371094, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.837257980115123, |
|
"grad_norm": 13.670179286762004, |
|
"learning_rate": 3.930570615136919e-08, |
|
"logits/chosen": -2.253570318222046, |
|
"logits/rejected": -2.280837059020996, |
|
"logps/chosen": -371.60699462890625, |
|
"logps/rejected": -450.8907165527344, |
|
"loss": 0.5284, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.1355136632919312, |
|
"rewards/margins": 0.5616965889930725, |
|
"rewards/rejected": -1.6972103118896484, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.8424908424908425, |
|
"grad_norm": 15.269612395866806, |
|
"learning_rate": 3.6881746803469756e-08, |
|
"logits/chosen": -2.5094237327575684, |
|
"logits/rejected": -2.3776464462280273, |
|
"logps/chosen": -466.35345458984375, |
|
"logps/rejected": -485.9764709472656, |
|
"loss": 0.5524, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.878058910369873, |
|
"rewards/margins": 0.7664037346839905, |
|
"rewards/rejected": -1.6444625854492188, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.847723704866562, |
|
"grad_norm": 13.170206102162114, |
|
"learning_rate": 3.452896722091128e-08, |
|
"logits/chosen": -2.348895788192749, |
|
"logits/rejected": -2.2078306674957275, |
|
"logps/chosen": -403.678466796875, |
|
"logps/rejected": -406.2483215332031, |
|
"loss": 0.5399, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.1748065948486328, |
|
"rewards/margins": 0.5404418706893921, |
|
"rewards/rejected": -1.715248465538025, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.8529565672422815, |
|
"grad_norm": 18.443756767005762, |
|
"learning_rate": 3.2248153212961677e-08, |
|
"logits/chosen": -2.4805829524993896, |
|
"logits/rejected": -2.4803428649902344, |
|
"logps/chosen": -331.70599365234375, |
|
"logps/rejected": -399.79443359375, |
|
"loss": 0.5245, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.8988332748413086, |
|
"rewards/margins": 0.6500404477119446, |
|
"rewards/rejected": -1.5488736629486084, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.858189429618001, |
|
"grad_norm": 14.502355149791981, |
|
"learning_rate": 3.004006655297209e-08, |
|
"logits/chosen": -2.4185843467712402, |
|
"logits/rejected": -2.38954496383667, |
|
"logps/chosen": -394.46649169921875, |
|
"logps/rejected": -446.54962158203125, |
|
"loss": 0.5485, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.8174797296524048, |
|
"rewards/margins": 0.7895897626876831, |
|
"rewards/rejected": -1.6070693731307983, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.8634222919937206, |
|
"grad_norm": 13.285869552228172, |
|
"learning_rate": 2.7905444723949762e-08, |
|
"logits/chosen": -2.323714017868042, |
|
"logits/rejected": -2.2698733806610107, |
|
"logps/chosen": -384.14202880859375, |
|
"logps/rejected": -437.5120544433594, |
|
"loss": 0.515, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.0088014602661133, |
|
"rewards/margins": 0.5342134237289429, |
|
"rewards/rejected": -1.5430147647857666, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.8686551543694401, |
|
"grad_norm": 15.176120265852116, |
|
"learning_rate": 2.5845000672245572e-08, |
|
"logits/chosen": -2.300856590270996, |
|
"logits/rejected": -2.2410037517547607, |
|
"logps/chosen": -304.0631103515625, |
|
"logps/rejected": -412.28448486328125, |
|
"loss": 0.5228, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.8832157254219055, |
|
"rewards/margins": 0.8058843612670898, |
|
"rewards/rejected": -1.6891002655029297, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.8738880167451596, |
|
"grad_norm": 17.935790291718803, |
|
"learning_rate": 2.385942256943499e-08, |
|
"logits/chosen": -2.490159273147583, |
|
"logits/rejected": -2.376840353012085, |
|
"logps/chosen": -378.88067626953125, |
|
"logps/rejected": -431.4586486816406, |
|
"loss": 0.5222, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.1389634609222412, |
|
"rewards/margins": 0.6274768114089966, |
|
"rewards/rejected": -1.7664403915405273, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.8791208791208791, |
|
"grad_norm": 14.534884954093348, |
|
"learning_rate": 2.194937358247506e-08, |
|
"logits/chosen": -2.35546612739563, |
|
"logits/rejected": -2.353252410888672, |
|
"logps/chosen": -349.008544921875, |
|
"logps/rejected": -443.03472900390625, |
|
"loss": 0.4851, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.9100105166435242, |
|
"rewards/margins": 0.8192359805107117, |
|
"rewards/rejected": -1.7292464971542358, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.8843537414965986, |
|
"grad_norm": 17.947386889864642, |
|
"learning_rate": 2.011549165221127e-08, |
|
"logits/chosen": -2.29172945022583, |
|
"logits/rejected": -2.232840061187744, |
|
"logps/chosen": -345.0833740234375, |
|
"logps/rejected": -404.31005859375, |
|
"loss": 0.512, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.9045823216438293, |
|
"rewards/margins": 0.8525349497795105, |
|
"rewards/rejected": -1.7571172714233398, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.8895866038723181, |
|
"grad_norm": 13.889156554037553, |
|
"learning_rate": 1.8358389280311303e-08, |
|
"logits/chosen": -2.3812906742095947, |
|
"logits/rejected": -2.3108513355255127, |
|
"logps/chosen": -378.1514892578125, |
|
"logps/rejected": -421.9300231933594, |
|
"loss": 0.5258, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.0261753797531128, |
|
"rewards/margins": 0.6083909273147583, |
|
"rewards/rejected": -1.634566307067871, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.8948194662480377, |
|
"grad_norm": 19.01447635034032, |
|
"learning_rate": 1.6678653324693787e-08, |
|
"logits/chosen": -2.455488681793213, |
|
"logits/rejected": -2.3439297676086426, |
|
"logps/chosen": -428.5733337402344, |
|
"logps/rejected": -486.60003662109375, |
|
"loss": 0.5159, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.9137641787528992, |
|
"rewards/margins": 0.7838142514228821, |
|
"rewards/rejected": -1.6975784301757812, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.9000523286237572, |
|
"grad_norm": 15.023722905938838, |
|
"learning_rate": 1.507684480352292e-08, |
|
"logits/chosen": -2.4375879764556885, |
|
"logits/rejected": -2.2770121097564697, |
|
"logps/chosen": -409.2210388183594, |
|
"logps/rejected": -397.143310546875, |
|
"loss": 0.5342, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.1660840511322021, |
|
"rewards/margins": 0.30486029386520386, |
|
"rewards/rejected": -1.4709442853927612, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.9052851909994767, |
|
"grad_norm": 15.448290425857392, |
|
"learning_rate": 1.3553498707832761e-08, |
|
"logits/chosen": -2.3308370113372803, |
|
"logits/rejected": -2.264747381210327, |
|
"logps/chosen": -321.4301452636719, |
|
"logps/rejected": -340.09716796875, |
|
"loss": 0.5141, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.8954669833183289, |
|
"rewards/margins": 0.6659599542617798, |
|
"rewards/rejected": -1.5614268779754639, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.9105180533751962, |
|
"grad_norm": 14.740162741391886, |
|
"learning_rate": 1.2109123822844653e-08, |
|
"logits/chosen": -2.394498348236084, |
|
"logits/rejected": -2.1879210472106934, |
|
"logps/chosen": -354.54718017578125, |
|
"logps/rejected": -371.6363525390625, |
|
"loss": 0.5496, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.0082483291625977, |
|
"rewards/margins": 0.5861491560935974, |
|
"rewards/rejected": -1.5943976640701294, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.9157509157509157, |
|
"grad_norm": 13.588571082537147, |
|
"learning_rate": 1.0744202558037014e-08, |
|
"logits/chosen": -2.5192902088165283, |
|
"logits/rejected": -2.4645519256591797, |
|
"logps/chosen": -421.0448303222656, |
|
"logps/rejected": -444.0043029785156, |
|
"loss": 0.57, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.9191666841506958, |
|
"rewards/margins": 0.6467918157577515, |
|
"rewards/rejected": -1.5659583806991577, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.9209837781266352, |
|
"grad_norm": 13.390219718345612, |
|
"learning_rate": 9.459190786024696e-09, |
|
"logits/chosen": -2.4041290283203125, |
|
"logits/rejected": -2.3528878688812256, |
|
"logps/chosen": -316.8312072753906, |
|
"logps/rejected": -348.93280029296875, |
|
"loss": 0.4842, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.8934735059738159, |
|
"rewards/margins": 0.47905582189559937, |
|
"rewards/rejected": -1.3725292682647705, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.9262166405023547, |
|
"grad_norm": 13.510861802788243, |
|
"learning_rate": 8.254517690300944e-09, |
|
"logits/chosen": -2.243380069732666, |
|
"logits/rejected": -2.150890827178955, |
|
"logps/chosen": -395.6463317871094, |
|
"logps/rejected": -426.24072265625, |
|
"loss": 0.517, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.962546706199646, |
|
"rewards/margins": 0.7899658679962158, |
|
"rewards/rejected": -1.7525125741958618, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.9314495028780743, |
|
"grad_norm": 13.608423691071376, |
|
"learning_rate": 7.130585621893809e-09, |
|
"logits/chosen": -2.2718772888183594, |
|
"logits/rejected": -2.2424051761627197, |
|
"logps/chosen": -350.9353942871094, |
|
"logps/rejected": -369.8668518066406, |
|
"loss": 0.5475, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.0604169368743896, |
|
"rewards/margins": 0.5255022048950195, |
|
"rewards/rejected": -1.5859191417694092, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.9366823652537938, |
|
"grad_norm": 16.606480964389576, |
|
"learning_rate": 6.0877699649840574e-09, |
|
"logits/chosen": -2.3981852531433105, |
|
"logits/rejected": -2.408480405807495, |
|
"logps/chosen": -417.22412109375, |
|
"logps/rejected": -467.68914794921875, |
|
"loss": 0.5326, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.8352551460266113, |
|
"rewards/margins": 0.5547670125961304, |
|
"rewards/rejected": -1.3900221586227417, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.9419152276295133, |
|
"grad_norm": 18.64083446169978, |
|
"learning_rate": 5.126419011529992e-09, |
|
"logits/chosen": -2.1910786628723145, |
|
"logits/rejected": -2.1055684089660645, |
|
"logps/chosen": -336.47723388671875, |
|
"logps/rejected": -470.904052734375, |
|
"loss": 0.4992, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.0156116485595703, |
|
"rewards/margins": 1.038367509841919, |
|
"rewards/rejected": -2.0539793968200684, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.9471480900052328, |
|
"grad_norm": 16.538041075880063, |
|
"learning_rate": 4.246853844940723e-09, |
|
"logits/chosen": -2.4223251342773438, |
|
"logits/rejected": -2.3221535682678223, |
|
"logps/chosen": -354.92938232421875, |
|
"logps/rejected": -380.63970947265625, |
|
"loss": 0.546, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.949341893196106, |
|
"rewards/margins": 0.6054549217224121, |
|
"rewards/rejected": -1.5547969341278076, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.9523809523809523, |
|
"grad_norm": 17.397548792426996, |
|
"learning_rate": 3.449368232836869e-09, |
|
"logits/chosen": -2.2566580772399902, |
|
"logits/rejected": -2.165665864944458, |
|
"logps/chosen": -301.8537292480469, |
|
"logps/rejected": -334.85931396484375, |
|
"loss": 0.5451, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.8274661898612976, |
|
"rewards/margins": 0.6850829124450684, |
|
"rewards/rejected": -1.5125491619110107, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.957613814756672, |
|
"grad_norm": 18.389752274514752, |
|
"learning_rate": 2.734228528934679e-09, |
|
"logits/chosen": -2.465012550354004, |
|
"logits/rejected": -2.3980252742767334, |
|
"logps/chosen": -375.5146484375, |
|
"logps/rejected": -390.62469482421875, |
|
"loss": 0.5499, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.8783760070800781, |
|
"rewards/margins": 0.6250108480453491, |
|
"rewards/rejected": -1.5033868551254272, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.9628466771323915, |
|
"grad_norm": 16.01702968300774, |
|
"learning_rate": 2.1016735840859447e-09, |
|
"logits/chosen": -2.4276375770568848, |
|
"logits/rejected": -2.2194676399230957, |
|
"logps/chosen": -442.44744873046875, |
|
"logps/rejected": -459.9623107910156, |
|
"loss": 0.527, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.0679312944412231, |
|
"rewards/margins": 0.8163121938705444, |
|
"rewards/rejected": -1.8842436075210571, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.968079539508111, |
|
"grad_norm": 14.310624305561785, |
|
"learning_rate": 1.551914666503812e-09, |
|
"logits/chosen": -2.3774361610412598, |
|
"logits/rejected": -2.3170013427734375, |
|
"logps/chosen": -454.64312744140625, |
|
"logps/rejected": -431.9732360839844, |
|
"loss": 0.5229, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.7297849059104919, |
|
"rewards/margins": 0.5755053758621216, |
|
"rewards/rejected": -1.3052901029586792, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.9733124018838305, |
|
"grad_norm": 13.728570350303638, |
|
"learning_rate": 1.0851353912008642e-09, |
|
"logits/chosen": -2.374072551727295, |
|
"logits/rejected": -2.189120292663574, |
|
"logps/chosen": -431.0322265625, |
|
"logps/rejected": -404.7000732421875, |
|
"loss": 0.5096, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.0377578735351562, |
|
"rewards/margins": 0.6482242345809937, |
|
"rewards/rejected": -1.685982346534729, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.97854526425955, |
|
"grad_norm": 18.53002581870753, |
|
"learning_rate": 7.014916586632336e-10, |
|
"logits/chosen": -2.373727798461914, |
|
"logits/rejected": -2.255896806716919, |
|
"logps/chosen": -331.47845458984375, |
|
"logps/rejected": -374.38616943359375, |
|
"loss": 0.5295, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.79266357421875, |
|
"rewards/margins": 0.5838620662689209, |
|
"rewards/rejected": -1.376525640487671, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.9837781266352695, |
|
"grad_norm": 11.696499734488652, |
|
"learning_rate": 4.011116027811956e-10, |
|
"logits/chosen": -2.4176530838012695, |
|
"logits/rejected": -2.489184856414795, |
|
"logps/chosen": -343.1055908203125, |
|
"logps/rejected": -467.28607177734375, |
|
"loss": 0.5233, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.8438282012939453, |
|
"rewards/margins": 0.6649068593978882, |
|
"rewards/rejected": -1.508734941482544, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.989010989010989, |
|
"grad_norm": 16.467829285757585, |
|
"learning_rate": 1.840955480532924e-10, |
|
"logits/chosen": -2.4320759773254395, |
|
"logits/rejected": -2.360839366912842, |
|
"logps/chosen": -482.7928161621094, |
|
"logps/rejected": -476.65509033203125, |
|
"loss": 0.5208, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.8602573275566101, |
|
"rewards/margins": 0.5700306296348572, |
|
"rewards/rejected": -1.4302880764007568, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.9942438513867086, |
|
"grad_norm": 17.167167673829663, |
|
"learning_rate": 5.051597607894087e-11, |
|
"logits/chosen": -2.442420482635498, |
|
"logits/rejected": -2.2953944206237793, |
|
"logps/chosen": -327.2977294921875, |
|
"logps/rejected": -396.27789306640625, |
|
"loss": 0.5371, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.9869822263717651, |
|
"rewards/margins": 0.7807688117027283, |
|
"rewards/rejected": -1.7677509784698486, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.9994767137624281, |
|
"grad_norm": 17.181928485699192, |
|
"learning_rate": 4.1750135001961117e-13, |
|
"logits/chosen": -2.355985164642334, |
|
"logits/rejected": -2.3409178256988525, |
|
"logps/chosen": -411.2369079589844, |
|
"logps/rejected": -506.39312744140625, |
|
"loss": 0.5193, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.7699571251869202, |
|
"rewards/margins": 1.0011080503463745, |
|
"rewards/rejected": -1.771065354347229, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 1911, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5690948443535047, |
|
"train_runtime": 12671.4295, |
|
"train_samples_per_second": 4.825, |
|
"train_steps_per_second": 0.151 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1911, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|