|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9997382884061764, |
|
"eval_steps": 500, |
|
"global_step": 1910, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0005234231876472127, |
|
"grad_norm": 14.640187733663568, |
|
"learning_rate": 2.617801047120419e-09, |
|
"logits/chosen": -2.959083080291748, |
|
"logits/rejected": -3.0469329357147217, |
|
"logps/chosen": -134.99343872070312, |
|
"logps/rejected": -161.30029296875, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.25, |
|
"rewards/chosen": 0.0001932144077727571, |
|
"rewards/margins": 0.0005058574606664479, |
|
"rewards/rejected": -0.00031264303834177554, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.005234231876472127, |
|
"grad_norm": 14.254383428129056, |
|
"learning_rate": 2.6178010471204188e-08, |
|
"logits/chosen": -2.9816136360168457, |
|
"logits/rejected": -2.893190860748291, |
|
"logps/chosen": -186.0762176513672, |
|
"logps/rejected": -186.07012939453125, |
|
"loss": 0.6933, |
|
"rewards/accuracies": 0.4583333432674408, |
|
"rewards/chosen": 0.0004193554923404008, |
|
"rewards/margins": -0.00021269405260682106, |
|
"rewards/rejected": 0.0006320495158433914, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.010468463752944255, |
|
"grad_norm": 12.949573017221987, |
|
"learning_rate": 5.2356020942408376e-08, |
|
"logits/chosen": -2.829813003540039, |
|
"logits/rejected": -2.8478147983551025, |
|
"logps/chosen": -174.7886199951172, |
|
"logps/rejected": -193.36080932617188, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.36250001192092896, |
|
"rewards/chosen": -0.0009858906269073486, |
|
"rewards/margins": -0.0011966052697971463, |
|
"rewards/rejected": 0.00021071455557830632, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.015702695629416383, |
|
"grad_norm": 15.371844314146115, |
|
"learning_rate": 7.853403141361257e-08, |
|
"logits/chosen": -3.01656174659729, |
|
"logits/rejected": -2.9938042163848877, |
|
"logps/chosen": -214.79541015625, |
|
"logps/rejected": -201.54867553710938, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.000996897229924798, |
|
"rewards/margins": 0.00021716937771998346, |
|
"rewards/rejected": -0.00121406652033329, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.02093692750588851, |
|
"grad_norm": 14.22366978101997, |
|
"learning_rate": 1.0471204188481675e-07, |
|
"logits/chosen": -2.874187469482422, |
|
"logits/rejected": -2.886640787124634, |
|
"logps/chosen": -172.05311584472656, |
|
"logps/rejected": -216.9876251220703, |
|
"loss": 0.6925, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.002353773918002844, |
|
"rewards/margins": 0.0017049995949491858, |
|
"rewards/rejected": -0.004058773163706064, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.02617115938236064, |
|
"grad_norm": 13.356014407693477, |
|
"learning_rate": 1.3089005235602092e-07, |
|
"logits/chosen": -2.9540908336639404, |
|
"logits/rejected": -2.9672648906707764, |
|
"logps/chosen": -196.1494598388672, |
|
"logps/rejected": -180.84803771972656, |
|
"loss": 0.6919, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.00872573908418417, |
|
"rewards/margins": 0.002879455918446183, |
|
"rewards/rejected": -0.01160519476979971, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.031405391258832765, |
|
"grad_norm": 14.564330446769505, |
|
"learning_rate": 1.5706806282722514e-07, |
|
"logits/chosen": -2.9785428047180176, |
|
"logits/rejected": -2.9725818634033203, |
|
"logps/chosen": -183.6147918701172, |
|
"logps/rejected": -203.66824340820312, |
|
"loss": 0.6902, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.01978887803852558, |
|
"rewards/margins": 0.005196017678827047, |
|
"rewards/rejected": -0.024984892457723618, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.036639623135304895, |
|
"grad_norm": 16.449869342928505, |
|
"learning_rate": 1.8324607329842932e-07, |
|
"logits/chosen": -2.995837688446045, |
|
"logits/rejected": -2.8539786338806152, |
|
"logps/chosen": -197.07168579101562, |
|
"logps/rejected": -206.65469360351562, |
|
"loss": 0.6886, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.02455015294253826, |
|
"rewards/margins": 0.011808360926806927, |
|
"rewards/rejected": -0.03635851666331291, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.04187385501177702, |
|
"grad_norm": 14.79926563160046, |
|
"learning_rate": 2.094240837696335e-07, |
|
"logits/chosen": -2.9914395809173584, |
|
"logits/rejected": -2.942915439605713, |
|
"logps/chosen": -205.7972869873047, |
|
"logps/rejected": -192.72232055664062, |
|
"loss": 0.6881, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.04919552057981491, |
|
"rewards/margins": 0.009290899150073528, |
|
"rewards/rejected": -0.058486420661211014, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.04710808688824915, |
|
"grad_norm": 14.05004280238572, |
|
"learning_rate": 2.356020942408377e-07, |
|
"logits/chosen": -2.888996124267578, |
|
"logits/rejected": -2.804115056991577, |
|
"logps/chosen": -184.47732543945312, |
|
"logps/rejected": -172.6226348876953, |
|
"loss": 0.6844, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.07440824806690216, |
|
"rewards/margins": 0.013659747317433357, |
|
"rewards/rejected": -0.08806798607110977, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.05234231876472128, |
|
"grad_norm": 13.559496262792273, |
|
"learning_rate": 2.6178010471204185e-07, |
|
"logits/chosen": -2.9032299518585205, |
|
"logits/rejected": -2.857269525527954, |
|
"logps/chosen": -206.85140991210938, |
|
"logps/rejected": -219.37026977539062, |
|
"loss": 0.6773, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.0838988870382309, |
|
"rewards/margins": 0.028635507449507713, |
|
"rewards/rejected": -0.11253438144922256, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.05757655064119341, |
|
"grad_norm": 13.4996248822944, |
|
"learning_rate": 2.879581151832461e-07, |
|
"logits/chosen": -2.940826892852783, |
|
"logits/rejected": -2.8834738731384277, |
|
"logps/chosen": -212.55807495117188, |
|
"logps/rejected": -201.2547607421875, |
|
"loss": 0.675, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.10653187334537506, |
|
"rewards/margins": 0.05816306918859482, |
|
"rewards/rejected": -0.16469493508338928, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.06281078251766553, |
|
"grad_norm": 12.884354544368191, |
|
"learning_rate": 3.1413612565445027e-07, |
|
"logits/chosen": -2.927412986755371, |
|
"logits/rejected": -2.985417604446411, |
|
"logps/chosen": -228.93350219726562, |
|
"logps/rejected": -238.04269409179688, |
|
"loss": 0.6747, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.10475633293390274, |
|
"rewards/margins": 0.046889565885066986, |
|
"rewards/rejected": -0.15164589881896973, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.06804501439413765, |
|
"grad_norm": 14.312004568183767, |
|
"learning_rate": 3.4031413612565446e-07, |
|
"logits/chosen": -2.8365414142608643, |
|
"logits/rejected": -2.876112461090088, |
|
"logps/chosen": -200.61264038085938, |
|
"logps/rejected": -213.1061248779297, |
|
"loss": 0.6622, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.13382001221179962, |
|
"rewards/margins": 0.06009231135249138, |
|
"rewards/rejected": -0.1939123272895813, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.07327924627060979, |
|
"grad_norm": 15.573435236138954, |
|
"learning_rate": 3.6649214659685864e-07, |
|
"logits/chosen": -2.8712472915649414, |
|
"logits/rejected": -2.8083159923553467, |
|
"logps/chosen": -212.3485870361328, |
|
"logps/rejected": -216.5910186767578, |
|
"loss": 0.659, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.2070785015821457, |
|
"rewards/margins": 0.06957153975963593, |
|
"rewards/rejected": -0.27665001153945923, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.07851347814708191, |
|
"grad_norm": 17.962582071011855, |
|
"learning_rate": 3.926701570680628e-07, |
|
"logits/chosen": -2.8894333839416504, |
|
"logits/rejected": -2.783268928527832, |
|
"logps/chosen": -211.5115509033203, |
|
"logps/rejected": -224.9191131591797, |
|
"loss": 0.6492, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.206833153963089, |
|
"rewards/margins": 0.1691361963748932, |
|
"rewards/rejected": -0.3759693503379822, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.08374771002355404, |
|
"grad_norm": 17.592745804055944, |
|
"learning_rate": 4.18848167539267e-07, |
|
"logits/chosen": -2.866983413696289, |
|
"logits/rejected": -2.766951084136963, |
|
"logps/chosen": -217.34207153320312, |
|
"logps/rejected": -228.534912109375, |
|
"loss": 0.6384, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.25625622272491455, |
|
"rewards/margins": 0.15949755907058716, |
|
"rewards/rejected": -0.4157537817955017, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.08898194190002617, |
|
"grad_norm": 17.652878273699034, |
|
"learning_rate": 4.450261780104712e-07, |
|
"logits/chosen": -2.8391916751861572, |
|
"logits/rejected": -2.832740306854248, |
|
"logps/chosen": -199.69781494140625, |
|
"logps/rejected": -227.84652709960938, |
|
"loss": 0.6301, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.13288532197475433, |
|
"rewards/margins": 0.19410356879234314, |
|
"rewards/rejected": -0.32698890566825867, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.0942161737764983, |
|
"grad_norm": 17.777586322420262, |
|
"learning_rate": 4.712041884816754e-07, |
|
"logits/chosen": -2.715904951095581, |
|
"logits/rejected": -2.5992846488952637, |
|
"logps/chosen": -210.23941040039062, |
|
"logps/rejected": -229.0234832763672, |
|
"loss": 0.6307, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.21204690635204315, |
|
"rewards/margins": 0.17581574618816376, |
|
"rewards/rejected": -0.3878627121448517, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.09945040565297043, |
|
"grad_norm": 28.072972397122147, |
|
"learning_rate": 4.973821989528796e-07, |
|
"logits/chosen": -2.6811397075653076, |
|
"logits/rejected": -2.6616740226745605, |
|
"logps/chosen": -216.45114135742188, |
|
"logps/rejected": -258.03021240234375, |
|
"loss": 0.6089, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.33117493987083435, |
|
"rewards/margins": 0.33262476325035095, |
|
"rewards/rejected": -0.6637996435165405, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.10468463752944256, |
|
"grad_norm": 29.910313896117426, |
|
"learning_rate": 4.999661831436498e-07, |
|
"logits/chosen": -2.5953736305236816, |
|
"logits/rejected": -2.6287741661071777, |
|
"logps/chosen": -201.4033966064453, |
|
"logps/rejected": -260.02728271484375, |
|
"loss": 0.6132, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.2563866972923279, |
|
"rewards/margins": 0.3042460083961487, |
|
"rewards/rejected": -0.5606327652931213, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.10991886940591468, |
|
"grad_norm": 35.27450975604578, |
|
"learning_rate": 4.998492971140339e-07, |
|
"logits/chosen": -2.543915271759033, |
|
"logits/rejected": -2.3116703033447266, |
|
"logps/chosen": -247.22891235351562, |
|
"logps/rejected": -270.3451843261719, |
|
"loss": 0.6417, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.595511794090271, |
|
"rewards/margins": 0.2611444592475891, |
|
"rewards/rejected": -0.8566562533378601, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.11515310128238682, |
|
"grad_norm": 48.58365341922755, |
|
"learning_rate": 4.996489634487865e-07, |
|
"logits/chosen": -2.545841932296753, |
|
"logits/rejected": -2.2179739475250244, |
|
"logps/chosen": -234.15786743164062, |
|
"logps/rejected": -287.24920654296875, |
|
"loss": 0.585, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.533110499382019, |
|
"rewards/margins": 0.5109399557113647, |
|
"rewards/rejected": -1.0440504550933838, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.12038733315885894, |
|
"grad_norm": 75.83009735431122, |
|
"learning_rate": 4.993652490577246e-07, |
|
"logits/chosen": -2.5557217597961426, |
|
"logits/rejected": -2.3648390769958496, |
|
"logps/chosen": -249.6676483154297, |
|
"logps/rejected": -277.823486328125, |
|
"loss": 0.6025, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.5915195941925049, |
|
"rewards/margins": 0.36318284273147583, |
|
"rewards/rejected": -0.9547024965286255, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.12562156503533106, |
|
"grad_norm": 33.52383186890237, |
|
"learning_rate": 4.9899824869915e-07, |
|
"logits/chosen": -2.333035945892334, |
|
"logits/rejected": -2.079817295074463, |
|
"logps/chosen": -255.44595336914062, |
|
"logps/rejected": -313.8713684082031, |
|
"loss": 0.5681, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.8247841000556946, |
|
"rewards/margins": 0.47075214982032776, |
|
"rewards/rejected": -1.2955362796783447, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.13085579691180318, |
|
"grad_norm": 35.13466621709411, |
|
"learning_rate": 4.985480849482012e-07, |
|
"logits/chosen": -2.3787760734558105, |
|
"logits/rejected": -2.364150047302246, |
|
"logps/chosen": -304.33441162109375, |
|
"logps/rejected": -327.4510498046875, |
|
"loss": 0.5645, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.0099525451660156, |
|
"rewards/margins": 0.29796892404556274, |
|
"rewards/rejected": -1.3079214096069336, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.1360900287882753, |
|
"grad_norm": 33.359619674605376, |
|
"learning_rate": 4.980149081559142e-07, |
|
"logits/chosen": -2.2392945289611816, |
|
"logits/rejected": -2.0072715282440186, |
|
"logps/chosen": -299.57879638671875, |
|
"logps/rejected": -395.2071838378906, |
|
"loss": 0.5998, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.3633458614349365, |
|
"rewards/margins": 0.728509247303009, |
|
"rewards/rejected": -2.09185528755188, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.14132426066474746, |
|
"grad_norm": 47.681686539836825, |
|
"learning_rate": 4.973988963990065e-07, |
|
"logits/chosen": -2.056598424911499, |
|
"logits/rejected": -2.0986738204956055, |
|
"logps/chosen": -318.61956787109375, |
|
"logps/rejected": -412.1875915527344, |
|
"loss": 0.548, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.372604489326477, |
|
"rewards/margins": 0.7224138975143433, |
|
"rewards/rejected": -2.095018148422241, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.14655849254121958, |
|
"grad_norm": 30.114115223768707, |
|
"learning_rate": 4.967002554204008e-07, |
|
"logits/chosen": -2.0974724292755127, |
|
"logits/rejected": -2.0098748207092285, |
|
"logps/chosen": -350.09600830078125, |
|
"logps/rejected": -403.1016540527344, |
|
"loss": 0.5879, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -1.6619269847869873, |
|
"rewards/margins": 0.39111390709877014, |
|
"rewards/rejected": -2.0530409812927246, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.1517927244176917, |
|
"grad_norm": 31.769810828425065, |
|
"learning_rate": 4.959192185605087e-07, |
|
"logits/chosen": -2.51796817779541, |
|
"logits/rejected": -2.2334158420562744, |
|
"logps/chosen": -324.0982360839844, |
|
"logps/rejected": -340.4048767089844, |
|
"loss": 0.5945, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -1.1041514873504639, |
|
"rewards/margins": 0.38266053795814514, |
|
"rewards/rejected": -1.486811876296997, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.15702695629416383, |
|
"grad_norm": 48.774597859411024, |
|
"learning_rate": 4.950560466792969e-07, |
|
"logits/chosen": -2.362421751022339, |
|
"logits/rejected": -2.193986654281616, |
|
"logps/chosen": -305.4976806640625, |
|
"logps/rejected": -367.10467529296875, |
|
"loss": 0.5823, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.2003720998764038, |
|
"rewards/margins": 0.572384774684906, |
|
"rewards/rejected": -1.7727569341659546, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.16226118817063595, |
|
"grad_norm": 39.19160116710573, |
|
"learning_rate": 4.941110280691619e-07, |
|
"logits/chosen": -2.353766441345215, |
|
"logits/rejected": -2.0096635818481445, |
|
"logps/chosen": -309.58905029296875, |
|
"logps/rejected": -353.7257080078125, |
|
"loss": 0.5836, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -1.201560378074646, |
|
"rewards/margins": 0.48785895109176636, |
|
"rewards/rejected": -1.6894193887710571, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.16749542004710807, |
|
"grad_norm": 37.587189222436905, |
|
"learning_rate": 4.930844783586424e-07, |
|
"logits/chosen": -2.396035671234131, |
|
"logits/rejected": -2.165098190307617, |
|
"logps/chosen": -288.3567810058594, |
|
"logps/rejected": -332.8038635253906, |
|
"loss": 0.5415, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.062434196472168, |
|
"rewards/margins": 0.4423864781856537, |
|
"rewards/rejected": -1.5048205852508545, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.17272965192358022, |
|
"grad_norm": 50.8199153591931, |
|
"learning_rate": 4.919767404070033e-07, |
|
"logits/chosen": -1.8470354080200195, |
|
"logits/rejected": -1.600237250328064, |
|
"logps/chosen": -307.25360107421875, |
|
"logps/rejected": -398.39910888671875, |
|
"loss": 0.5536, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.3850140571594238, |
|
"rewards/margins": 0.7685943245887756, |
|
"rewards/rejected": -2.153608560562134, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.17796388380005235, |
|
"grad_norm": 39.2860888155997, |
|
"learning_rate": 4.907881841897216e-07, |
|
"logits/chosen": -2.0841307640075684, |
|
"logits/rejected": -1.8823187351226807, |
|
"logps/chosen": -306.69305419921875, |
|
"logps/rejected": -375.80352783203125, |
|
"loss": 0.5377, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.2261284589767456, |
|
"rewards/margins": 0.7133340835571289, |
|
"rewards/rejected": -1.939462423324585, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.18319811567652447, |
|
"grad_norm": 44.441009515641795, |
|
"learning_rate": 4.895192066749189e-07, |
|
"logits/chosen": -2.40903902053833, |
|
"logits/rejected": -2.2892374992370605, |
|
"logps/chosen": -299.1591796875, |
|
"logps/rejected": -401.603271484375, |
|
"loss": 0.5481, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.1807595491409302, |
|
"rewards/margins": 0.7055211663246155, |
|
"rewards/rejected": -1.8862807750701904, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.1884323475529966, |
|
"grad_norm": 33.989270442301404, |
|
"learning_rate": 4.881702316907768e-07, |
|
"logits/chosen": -2.2026100158691406, |
|
"logits/rejected": -2.008274793624878, |
|
"logps/chosen": -290.2915344238281, |
|
"logps/rejected": -346.9622497558594, |
|
"loss": 0.5596, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.1741074323654175, |
|
"rewards/margins": 0.5104838609695435, |
|
"rewards/rejected": -1.68459153175354, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.19366657942946872, |
|
"grad_norm": 39.74252231849878, |
|
"learning_rate": 4.86741709783982e-07, |
|
"logits/chosen": -2.0070841312408447, |
|
"logits/rejected": -1.5279141664505005, |
|
"logps/chosen": -293.36578369140625, |
|
"logps/rejected": -345.587646484375, |
|
"loss": 0.5468, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.2320070266723633, |
|
"rewards/margins": 0.6169141530990601, |
|
"rewards/rejected": -1.8489214181900024, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.19890081130594087, |
|
"grad_norm": 41.66915169536549, |
|
"learning_rate": 4.85234118069247e-07, |
|
"logits/chosen": -1.4260574579238892, |
|
"logits/rejected": -1.1182783842086792, |
|
"logps/chosen": -323.9841003417969, |
|
"logps/rejected": -404.05517578125, |
|
"loss": 0.5282, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.595336675643921, |
|
"rewards/margins": 0.6996889114379883, |
|
"rewards/rejected": -2.295025587081909, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.204135043182413, |
|
"grad_norm": 38.40542199265962, |
|
"learning_rate": 4.836479600699578e-07, |
|
"logits/chosen": -1.7142446041107178, |
|
"logits/rejected": -1.7196638584136963, |
|
"logps/chosen": -361.3770446777344, |
|
"logps/rejected": -439.6658630371094, |
|
"loss": 0.5487, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.594753623008728, |
|
"rewards/margins": 0.683542788028717, |
|
"rewards/rejected": -2.27829647064209, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.2093692750588851, |
|
"grad_norm": 39.63693507119565, |
|
"learning_rate": 4.819837655500013e-07, |
|
"logits/chosen": -1.911871314048767, |
|
"logits/rejected": -1.7180120944976807, |
|
"logps/chosen": -339.29705810546875, |
|
"logps/rejected": -399.7021179199219, |
|
"loss": 0.54, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.528294563293457, |
|
"rewards/margins": 0.6349833011627197, |
|
"rewards/rejected": -2.163278102874756, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.21460350693535724, |
|
"grad_norm": 39.95185592857185, |
|
"learning_rate": 4.802420903368285e-07, |
|
"logits/chosen": -1.7220875024795532, |
|
"logits/rejected": -1.3575185537338257, |
|
"logps/chosen": -365.69964599609375, |
|
"logps/rejected": -418.9811096191406, |
|
"loss": 0.5823, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.7411553859710693, |
|
"rewards/margins": 0.48729467391967773, |
|
"rewards/rejected": -2.228450059890747, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.21983773881182936, |
|
"grad_norm": 42.46558644572104, |
|
"learning_rate": 4.784235161358123e-07, |
|
"logits/chosen": -1.8128083944320679, |
|
"logits/rejected": -1.8185402154922485, |
|
"logps/chosen": -309.54937744140625, |
|
"logps/rejected": -385.9242248535156, |
|
"loss": 0.5788, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.3886371850967407, |
|
"rewards/margins": 0.5779408812522888, |
|
"rewards/rejected": -1.9665781259536743, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.22507197068830148, |
|
"grad_norm": 40.391730044431704, |
|
"learning_rate": 4.7652865033596314e-07, |
|
"logits/chosen": -2.0464603900909424, |
|
"logits/rejected": -1.5721585750579834, |
|
"logps/chosen": -317.18560791015625, |
|
"logps/rejected": -354.8387756347656, |
|
"loss": 0.5339, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.305790662765503, |
|
"rewards/margins": 0.5792006254196167, |
|
"rewards/rejected": -1.8849912881851196, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.23030620256477363, |
|
"grad_norm": 44.15647862665091, |
|
"learning_rate": 4.7455812580706534e-07, |
|
"logits/chosen": -2.0872130393981934, |
|
"logits/rejected": -1.5626317262649536, |
|
"logps/chosen": -345.29278564453125, |
|
"logps/rejected": -409.45989990234375, |
|
"loss": 0.549, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.5832579135894775, |
|
"rewards/margins": 0.6660935282707214, |
|
"rewards/rejected": -2.2493512630462646, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.23554043444124576, |
|
"grad_norm": 42.04086790745509, |
|
"learning_rate": 4.725126006883046e-07, |
|
"logits/chosen": -1.773993730545044, |
|
"logits/rejected": -1.5137646198272705, |
|
"logps/chosen": -326.9532165527344, |
|
"logps/rejected": -401.57867431640625, |
|
"loss": 0.577, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.5454232692718506, |
|
"rewards/margins": 0.6505107879638672, |
|
"rewards/rejected": -2.1959340572357178, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.24077466631771788, |
|
"grad_norm": 36.31044897637686, |
|
"learning_rate": 4.703927581684539e-07, |
|
"logits/chosen": -2.1727023124694824, |
|
"logits/rejected": -1.8378498554229736, |
|
"logps/chosen": -350.1428527832031, |
|
"logps/rejected": -373.5480041503906, |
|
"loss": 0.5687, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.4619617462158203, |
|
"rewards/margins": 0.49050265550613403, |
|
"rewards/rejected": -1.9524643421173096, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.24600889819419, |
|
"grad_norm": 38.97906947550607, |
|
"learning_rate": 4.68199306257695e-07, |
|
"logits/chosen": -1.960343599319458, |
|
"logits/rejected": -1.3587312698364258, |
|
"logps/chosen": -331.9888916015625, |
|
"logps/rejected": -399.2967834472656, |
|
"loss": 0.4892, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.4141899347305298, |
|
"rewards/margins": 0.7412781715393066, |
|
"rewards/rejected": -2.155468225479126, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.2512431300706621, |
|
"grad_norm": 60.96840256666392, |
|
"learning_rate": 4.6593297755114776e-07, |
|
"logits/chosen": -1.4715737104415894, |
|
"logits/rejected": -0.8570035099983215, |
|
"logps/chosen": -363.36376953125, |
|
"logps/rejected": -419.7437438964844, |
|
"loss": 0.5466, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.9223047494888306, |
|
"rewards/margins": 0.5888181328773499, |
|
"rewards/rejected": -2.511122941970825, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.2564773619471343, |
|
"grad_norm": 56.85832650681642, |
|
"learning_rate": 4.635945289841902e-07, |
|
"logits/chosen": -1.736699104309082, |
|
"logits/rejected": -1.6159862279891968, |
|
"logps/chosen": -331.16339111328125, |
|
"logps/rejected": -429.3055114746094, |
|
"loss": 0.5324, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.511039137840271, |
|
"rewards/margins": 0.6905651092529297, |
|
"rewards/rejected": -2.2016043663024902, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.26171159382360637, |
|
"grad_norm": 45.91788216290567, |
|
"learning_rate": 4.611847415796476e-07, |
|
"logits/chosen": -1.6824716329574585, |
|
"logits/rejected": -1.536476731300354, |
|
"logps/chosen": -365.19488525390625, |
|
"logps/rejected": -471.90985107421875, |
|
"loss": 0.5294, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.727085828781128, |
|
"rewards/margins": 0.8007200956344604, |
|
"rewards/rejected": -2.527806043624878, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.26171159382360637, |
|
"eval_logits/chosen": -0.863903820514679, |
|
"eval_logits/rejected": -0.4723878502845764, |
|
"eval_logps/chosen": -358.6011047363281, |
|
"eval_logps/rejected": -428.4360656738281, |
|
"eval_loss": 0.5469957590103149, |
|
"eval_rewards/accuracies": 0.6980000138282776, |
|
"eval_rewards/chosen": -1.7358005046844482, |
|
"eval_rewards/margins": 0.6566510796546936, |
|
"eval_rewards/rejected": -2.392451763153076, |
|
"eval_runtime": 316.8135, |
|
"eval_samples_per_second": 6.313, |
|
"eval_steps_per_second": 0.789, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.2669458257000785, |
|
"grad_norm": 37.615935219623054, |
|
"learning_rate": 4.5870442018693773e-07, |
|
"logits/chosen": -1.8111470937728882, |
|
"logits/rejected": -1.5349056720733643, |
|
"logps/chosen": -327.2267761230469, |
|
"logps/rejected": -381.962890625, |
|
"loss": 0.5606, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.547119140625, |
|
"rewards/margins": 0.5296986699104309, |
|
"rewards/rejected": -2.076817750930786, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.2721800575765506, |
|
"grad_norm": 35.85517663702724, |
|
"learning_rate": 4.5615439321325735e-07, |
|
"logits/chosen": -2.0327141284942627, |
|
"logits/rejected": -1.460468053817749, |
|
"logps/chosen": -312.06671142578125, |
|
"logps/rejected": -395.5126647949219, |
|
"loss": 0.5241, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.3687623739242554, |
|
"rewards/margins": 0.6768960952758789, |
|
"rewards/rejected": -2.045658588409424, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.27741428945302277, |
|
"grad_norm": 50.37074560251419, |
|
"learning_rate": 4.535355123469008e-07, |
|
"logits/chosen": -1.4753310680389404, |
|
"logits/rejected": -1.471920132637024, |
|
"logps/chosen": -338.68060302734375, |
|
"logps/rejected": -424.1499938964844, |
|
"loss": 0.545, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.7495324611663818, |
|
"rewards/margins": 0.5848132371902466, |
|
"rewards/rejected": -2.334345579147339, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.2826485213294949, |
|
"grad_norm": 55.36008562841294, |
|
"learning_rate": 4.5084865227280366e-07, |
|
"logits/chosen": -1.087989091873169, |
|
"logits/rejected": -1.2594499588012695, |
|
"logps/chosen": -424.70941162109375, |
|
"logps/rejected": -481.74688720703125, |
|
"loss": 0.5205, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -2.1392130851745605, |
|
"rewards/margins": 0.6303030252456665, |
|
"rewards/rejected": -2.7695162296295166, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.287882753205967, |
|
"grad_norm": 39.75849189232047, |
|
"learning_rate": 4.4809471038040437e-07, |
|
"logits/chosen": -1.642165184020996, |
|
"logits/rejected": -0.9477740526199341, |
|
"logps/chosen": -399.1435546875, |
|
"logps/rejected": -444.47882080078125, |
|
"loss": 0.5659, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.8148092031478882, |
|
"rewards/margins": 0.7540385127067566, |
|
"rewards/rejected": -2.568847894668579, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.29311698508243916, |
|
"grad_norm": 33.49141951717591, |
|
"learning_rate": 4.4527460646392386e-07, |
|
"logits/chosen": -2.0694565773010254, |
|
"logits/rejected": -1.7511459589004517, |
|
"logps/chosen": -310.82574462890625, |
|
"logps/rejected": -369.46539306640625, |
|
"loss": 0.5329, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.1495193243026733, |
|
"rewards/margins": 0.6521326303482056, |
|
"rewards/rejected": -1.801652193069458, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.29835121695891126, |
|
"grad_norm": 56.88589774049656, |
|
"learning_rate": 4.4238928241516163e-07, |
|
"logits/chosen": -1.8898957967758179, |
|
"logits/rejected": -1.2891989946365356, |
|
"logps/chosen": -350.3455810546875, |
|
"logps/rejected": -402.1921081542969, |
|
"loss": 0.5373, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.413482427597046, |
|
"rewards/margins": 0.6170186996459961, |
|
"rewards/rejected": -2.030501127243042, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.3035854488353834, |
|
"grad_norm": 42.10520204001843, |
|
"learning_rate": 4.394397019089116e-07, |
|
"logits/chosen": -1.6958551406860352, |
|
"logits/rejected": -0.6804537773132324, |
|
"logps/chosen": -387.3909606933594, |
|
"logps/rejected": -451.7494201660156, |
|
"loss": 0.544, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.97348952293396, |
|
"rewards/margins": 0.7865778207778931, |
|
"rewards/rejected": -2.7600669860839844, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.30881968071185556, |
|
"grad_norm": 35.07476918232387, |
|
"learning_rate": 4.3642685008110246e-07, |
|
"logits/chosen": -1.4044368267059326, |
|
"logits/rejected": -0.7555860280990601, |
|
"logps/chosen": -428.0267639160156, |
|
"logps/rejected": -496.0921936035156, |
|
"loss": 0.5421, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.2372324466705322, |
|
"rewards/margins": 0.8896536827087402, |
|
"rewards/rejected": -3.1268858909606934, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.31405391258832765, |
|
"grad_norm": 56.64712231153677, |
|
"learning_rate": 4.333517331997704e-07, |
|
"logits/chosen": -1.425999641418457, |
|
"logits/rejected": -0.7339752912521362, |
|
"logps/chosen": -357.58050537109375, |
|
"logps/rejected": -459.1241149902344, |
|
"loss": 0.5675, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.811273217201233, |
|
"rewards/margins": 0.9072386622428894, |
|
"rewards/rejected": -2.7185120582580566, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.3192881444647998, |
|
"grad_norm": 53.88796332939181, |
|
"learning_rate": 4.302153783289736e-07, |
|
"logits/chosen": -1.348676085472107, |
|
"logits/rejected": -0.9413366317749023, |
|
"logps/chosen": -354.7875061035156, |
|
"logps/rejected": -408.4991149902344, |
|
"loss": 0.5262, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.5594650506973267, |
|
"rewards/margins": 0.6488285064697266, |
|
"rewards/rejected": -2.2082934379577637, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.3245223763412719, |
|
"grad_norm": 52.50744960152232, |
|
"learning_rate": 4.2701883298576124e-07, |
|
"logits/chosen": -1.720445990562439, |
|
"logits/rejected": -1.290917992591858, |
|
"logps/chosen": -375.7829284667969, |
|
"logps/rejected": -434.217529296875, |
|
"loss": 0.5917, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.6508047580718994, |
|
"rewards/margins": 0.5466897487640381, |
|
"rewards/rejected": -2.1974947452545166, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.32975660821774405, |
|
"grad_norm": 44.222624438240096, |
|
"learning_rate": 4.237631647903115e-07, |
|
"logits/chosen": -1.4673423767089844, |
|
"logits/rejected": -0.8496716618537903, |
|
"logps/chosen": -377.01470947265625, |
|
"logps/rejected": -444.0126953125, |
|
"loss": 0.5623, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -1.9182418584823608, |
|
"rewards/margins": 0.6217330694198608, |
|
"rewards/rejected": -2.539975166320801, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.33499084009421615, |
|
"grad_norm": 56.74613947557357, |
|
"learning_rate": 4.204494611093548e-07, |
|
"logits/chosen": -1.0175368785858154, |
|
"logits/rejected": -1.0307013988494873, |
|
"logps/chosen": -352.26666259765625, |
|
"logps/rejected": -444.2943420410156, |
|
"loss": 0.5419, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.003497362136841, |
|
"rewards/margins": 0.6112686395645142, |
|
"rewards/rejected": -2.6147658824920654, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.3402250719706883, |
|
"grad_norm": 56.35474765839724, |
|
"learning_rate": 4.1707882869300235e-07, |
|
"logits/chosen": -1.4247758388519287, |
|
"logits/rejected": -0.8727043271064758, |
|
"logps/chosen": -357.51800537109375, |
|
"logps/rejected": -452.87677001953125, |
|
"loss": 0.526, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.8759056329727173, |
|
"rewards/margins": 0.9082478284835815, |
|
"rewards/rejected": -2.784153699874878, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.34545930384716045, |
|
"grad_norm": 51.29897875137078, |
|
"learning_rate": 4.136523933051005e-07, |
|
"logits/chosen": -1.3449084758758545, |
|
"logits/rejected": -1.0442228317260742, |
|
"logps/chosen": -372.7139587402344, |
|
"logps/rejected": -517.5125732421875, |
|
"loss": 0.5216, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.0670166015625, |
|
"rewards/margins": 1.0036545991897583, |
|
"rewards/rejected": -3.0706706047058105, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.35069353572363254, |
|
"grad_norm": 53.649661826724454, |
|
"learning_rate": 4.101712993472348e-07, |
|
"logits/chosen": -1.49883234500885, |
|
"logits/rejected": -1.0886845588684082, |
|
"logps/chosen": -370.93817138671875, |
|
"logps/rejected": -424.97064208984375, |
|
"loss": 0.5716, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.9939110279083252, |
|
"rewards/margins": 0.5497623085975647, |
|
"rewards/rejected": -2.543673276901245, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.3559277676001047, |
|
"grad_norm": 44.18426608864023, |
|
"learning_rate": 4.066367094765091e-07, |
|
"logits/chosen": -1.3670942783355713, |
|
"logits/rejected": -1.2187774181365967, |
|
"logps/chosen": -374.8455810546875, |
|
"logps/rejected": -434.95867919921875, |
|
"loss": 0.535, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.9276262521743774, |
|
"rewards/margins": 0.5358133912086487, |
|
"rewards/rejected": -2.463439464569092, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.3611619994765768, |
|
"grad_norm": 52.799712890225976, |
|
"learning_rate": 4.0304980421722766e-07, |
|
"logits/chosen": -1.5443974733352661, |
|
"logits/rejected": -0.7440096139907837, |
|
"logps/chosen": -365.8990173339844, |
|
"logps/rejected": -443.71368408203125, |
|
"loss": 0.5022, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.7589327096939087, |
|
"rewards/margins": 0.8254102468490601, |
|
"rewards/rejected": -2.5843429565429688, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.36639623135304894, |
|
"grad_norm": 51.1426156745725, |
|
"learning_rate": 3.994117815666095e-07, |
|
"logits/chosen": -1.7846711874008179, |
|
"logits/rejected": -0.7315829992294312, |
|
"logps/chosen": -387.86376953125, |
|
"logps/rejected": -462.34503173828125, |
|
"loss": 0.5186, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.9715665578842163, |
|
"rewards/margins": 0.752856969833374, |
|
"rewards/rejected": -2.724423885345459, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.3716304632295211, |
|
"grad_norm": 62.36681697005891, |
|
"learning_rate": 3.957238565946671e-07, |
|
"logits/chosen": -1.2870978116989136, |
|
"logits/rejected": -0.7070925235748291, |
|
"logps/chosen": -403.86444091796875, |
|
"logps/rejected": -515.5906982421875, |
|
"loss": 0.483, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.2011804580688477, |
|
"rewards/margins": 1.022341012954712, |
|
"rewards/rejected": -3.2235217094421387, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.3768646951059932, |
|
"grad_norm": 57.328832948777865, |
|
"learning_rate": 3.9198726103838306e-07, |
|
"logits/chosen": -1.2161122560501099, |
|
"logits/rejected": -0.788101315498352, |
|
"logps/chosen": -387.68768310546875, |
|
"logps/rejected": -519.7088012695312, |
|
"loss": 0.5069, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.9085991382598877, |
|
"rewards/margins": 1.1666193008422852, |
|
"rewards/rejected": -3.0752182006835938, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.38209892698246534, |
|
"grad_norm": 68.04530741097648, |
|
"learning_rate": 3.8820324289031946e-07, |
|
"logits/chosen": -1.356323003768921, |
|
"logits/rejected": -1.176628828048706, |
|
"logps/chosen": -346.1966247558594, |
|
"logps/rejected": -447.4593811035156, |
|
"loss": 0.5455, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.8683630228042603, |
|
"rewards/margins": 0.8649228811264038, |
|
"rewards/rejected": -2.733285903930664, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.38733315885893743, |
|
"grad_norm": 39.4868442275961, |
|
"learning_rate": 3.84373065981799e-07, |
|
"logits/chosen": -1.5190155506134033, |
|
"logits/rejected": -1.4792649745941162, |
|
"logps/chosen": -346.9692077636719, |
|
"logps/rejected": -456.06634521484375, |
|
"loss": 0.4934, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.7277275323867798, |
|
"rewards/margins": 0.8931981921195984, |
|
"rewards/rejected": -2.6209259033203125, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.3925673907354096, |
|
"grad_norm": 63.695440464895746, |
|
"learning_rate": 3.8049800956079545e-07, |
|
"logits/chosen": -1.434788703918457, |
|
"logits/rejected": -1.0202864408493042, |
|
"logps/chosen": -409.87701416015625, |
|
"logps/rejected": -506.40179443359375, |
|
"loss": 0.492, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.0980966091156006, |
|
"rewards/margins": 0.8423215746879578, |
|
"rewards/rejected": -2.940418243408203, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.39780162261188173, |
|
"grad_norm": 74.4694029881303, |
|
"learning_rate": 3.7657936786467525e-07, |
|
"logits/chosen": -1.2294615507125854, |
|
"logits/rejected": -0.37707972526550293, |
|
"logps/chosen": -439.51025390625, |
|
"logps/rejected": -541.850830078125, |
|
"loss": 0.5554, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -2.648507595062256, |
|
"rewards/margins": 1.04387366771698, |
|
"rewards/rejected": -3.6923813819885254, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.40303585448835383, |
|
"grad_norm": 71.73793938729527, |
|
"learning_rate": 3.7261844968793226e-07, |
|
"logits/chosen": -1.8120813369750977, |
|
"logits/rejected": -1.2725474834442139, |
|
"logps/chosen": -390.5841979980469, |
|
"logps/rejected": -455.8675231933594, |
|
"loss": 0.5758, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.8539800643920898, |
|
"rewards/margins": 0.7287551164627075, |
|
"rewards/rejected": -2.582735300064087, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.408270086364826, |
|
"grad_norm": 39.07736477725613, |
|
"learning_rate": 3.6861657794506187e-07, |
|
"logits/chosen": -1.9753271341323853, |
|
"logits/rejected": -1.181422233581543, |
|
"logps/chosen": -334.88177490234375, |
|
"logps/rejected": -404.42864990234375, |
|
"loss": 0.5208, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.412209153175354, |
|
"rewards/margins": 0.8599991798400879, |
|
"rewards/rejected": -2.2722084522247314, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.4135043182412981, |
|
"grad_norm": 43.71312206302924, |
|
"learning_rate": 3.6457508922871777e-07, |
|
"logits/chosen": -1.889682412147522, |
|
"logits/rejected": -1.6067005395889282, |
|
"logps/chosen": -302.65997314453125, |
|
"logps/rejected": -356.685302734375, |
|
"loss": 0.5254, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.3029696941375732, |
|
"rewards/margins": 0.5454732775688171, |
|
"rewards/rejected": -1.8484432697296143, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.4187385501177702, |
|
"grad_norm": 55.95387505858846, |
|
"learning_rate": 3.6049533336330084e-07, |
|
"logits/chosen": -1.7215135097503662, |
|
"logits/rejected": -1.557579755783081, |
|
"logps/chosen": -316.13299560546875, |
|
"logps/rejected": -418.8580017089844, |
|
"loss": 0.574, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.4686353206634521, |
|
"rewards/margins": 0.7653969526290894, |
|
"rewards/rejected": -2.234032154083252, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.4239727819942423, |
|
"grad_norm": 55.856275598760135, |
|
"learning_rate": 3.56378672954129e-07, |
|
"logits/chosen": -1.6764100790023804, |
|
"logits/rejected": -1.513831377029419, |
|
"logps/chosen": -329.6163635253906, |
|
"logps/rejected": -439.32666015625, |
|
"loss": 0.5538, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.5339328050613403, |
|
"rewards/margins": 0.7833306789398193, |
|
"rewards/rejected": -2.317263126373291, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.42920701387071447, |
|
"grad_norm": 30.497969544668866, |
|
"learning_rate": 3.5222648293233803e-07, |
|
"logits/chosen": -1.4442280530929565, |
|
"logits/rejected": -1.609626054763794, |
|
"logps/chosen": -373.2001037597656, |
|
"logps/rejected": -476.081787109375, |
|
"loss": 0.5508, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.8375556468963623, |
|
"rewards/margins": 0.7855282425880432, |
|
"rewards/rejected": -2.6230838298797607, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.4344412457471866, |
|
"grad_norm": 61.68802213789782, |
|
"learning_rate": 3.480401500956657e-07, |
|
"logits/chosen": -1.523938536643982, |
|
"logits/rejected": -1.2494620084762573, |
|
"logps/chosen": -351.20379638671875, |
|
"logps/rejected": -404.7106018066406, |
|
"loss": 0.566, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.7532094717025757, |
|
"rewards/margins": 0.5370227098464966, |
|
"rewards/rejected": -2.2902321815490723, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.4396754776236587, |
|
"grad_norm": 60.810924331570874, |
|
"learning_rate": 3.438210726452724e-07, |
|
"logits/chosen": -1.4685875177383423, |
|
"logits/rejected": -0.2091219425201416, |
|
"logps/chosen": -392.4021911621094, |
|
"logps/rejected": -455.44482421875, |
|
"loss": 0.5227, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.634267807006836, |
|
"rewards/margins": 0.8823213577270508, |
|
"rewards/rejected": -2.5165891647338867, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.44490970950013087, |
|
"grad_norm": 96.20779655004723, |
|
"learning_rate": 3.395706597187538e-07, |
|
"logits/chosen": -1.657881498336792, |
|
"logits/rejected": -1.0053375959396362, |
|
"logps/chosen": -368.91693115234375, |
|
"logps/rejected": -448.57928466796875, |
|
"loss": 0.5498, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.6729650497436523, |
|
"rewards/margins": 0.8312269449234009, |
|
"rewards/rejected": -2.5041918754577637, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.45014394137660296, |
|
"grad_norm": 42.3904090971244, |
|
"learning_rate": 3.3529033091949986e-07, |
|
"logits/chosen": -1.2889825105667114, |
|
"logits/rejected": -1.143614411354065, |
|
"logps/chosen": -324.1905212402344, |
|
"logps/rejected": -454.4544372558594, |
|
"loss": 0.5259, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.6198065280914307, |
|
"rewards/margins": 0.8226202130317688, |
|
"rewards/rejected": -2.4424266815185547, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.4553781732530751, |
|
"grad_norm": 58.49911093913357, |
|
"learning_rate": 3.309815158425591e-07, |
|
"logits/chosen": -1.6366761922836304, |
|
"logits/rejected": -1.365851640701294, |
|
"logps/chosen": -323.0370178222656, |
|
"logps/rejected": -400.08746337890625, |
|
"loss": 0.5314, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.317899465560913, |
|
"rewards/margins": 0.7919186353683472, |
|
"rewards/rejected": -2.1098179817199707, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.46061240512954726, |
|
"grad_norm": 51.25947355514214, |
|
"learning_rate": 3.2664565359716536e-07, |
|
"logits/chosen": -1.6139739751815796, |
|
"logits/rejected": -1.3585468530654907, |
|
"logps/chosen": -331.5315856933594, |
|
"logps/rejected": -412.1536560058594, |
|
"loss": 0.5143, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.580944299697876, |
|
"rewards/margins": 0.688396155834198, |
|
"rewards/rejected": -2.269340753555298, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.46584663700601936, |
|
"grad_norm": 46.47446781450997, |
|
"learning_rate": 3.222841923260869e-07, |
|
"logits/chosen": -1.56208074092865, |
|
"logits/rejected": -1.448430061340332, |
|
"logps/chosen": -320.86212158203125, |
|
"logps/rejected": -417.16864013671875, |
|
"loss": 0.5148, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.4867175817489624, |
|
"rewards/margins": 0.8568013310432434, |
|
"rewards/rejected": -2.3435187339782715, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.4710808688824915, |
|
"grad_norm": 48.96651105399355, |
|
"learning_rate": 3.1789858872195887e-07, |
|
"logits/chosen": -1.9190874099731445, |
|
"logits/rejected": -1.5447766780853271, |
|
"logps/chosen": -371.22479248046875, |
|
"logps/rejected": -460.0589904785156, |
|
"loss": 0.5126, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.576744794845581, |
|
"rewards/margins": 0.9420223236083984, |
|
"rewards/rejected": -2.5187671184539795, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.4763151007589636, |
|
"grad_norm": 52.14197578469426, |
|
"learning_rate": 3.1349030754075937e-07, |
|
"logits/chosen": -1.5406408309936523, |
|
"logits/rejected": -1.2619167566299438, |
|
"logps/chosen": -352.7300109863281, |
|
"logps/rejected": -432.47027587890625, |
|
"loss": 0.5743, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.5983822345733643, |
|
"rewards/margins": 0.7195128202438354, |
|
"rewards/rejected": -2.3178951740264893, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.48154933263543576, |
|
"grad_norm": 40.54722236321293, |
|
"learning_rate": 3.090608211125931e-07, |
|
"logits/chosen": -1.6173185110092163, |
|
"logits/rejected": -1.3097602128982544, |
|
"logps/chosen": -330.4248046875, |
|
"logps/rejected": -428.1268005371094, |
|
"loss": 0.5167, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.4730908870697021, |
|
"rewards/margins": 0.8812786340713501, |
|
"rewards/rejected": -2.354369640350342, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.48678356451190785, |
|
"grad_norm": 72.50233844212927, |
|
"learning_rate": 3.0461160884994487e-07, |
|
"logits/chosen": -1.5237468481063843, |
|
"logits/rejected": -1.1552776098251343, |
|
"logps/chosen": -362.2909240722656, |
|
"logps/rejected": -435.55718994140625, |
|
"loss": 0.5268, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.6922056674957275, |
|
"rewards/margins": 0.7138403654098511, |
|
"rewards/rejected": -2.4060463905334473, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.49201779638838, |
|
"grad_norm": 43.01313334263546, |
|
"learning_rate": 3.001441567535681e-07, |
|
"logits/chosen": -1.4215238094329834, |
|
"logits/rejected": -0.5556894540786743, |
|
"logps/chosen": -325.53814697265625, |
|
"logps/rejected": -438.48602294921875, |
|
"loss": 0.4948, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -1.556456208229065, |
|
"rewards/margins": 1.1343437433242798, |
|
"rewards/rejected": -2.6907999515533447, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.49725202826485215, |
|
"grad_norm": 53.39321127260436, |
|
"learning_rate": 2.956599569161724e-07, |
|
"logits/chosen": -1.5301493406295776, |
|
"logits/rejected": -0.6811811327934265, |
|
"logps/chosen": -359.93988037109375, |
|
"logps/rejected": -444.7823181152344, |
|
"loss": 0.504, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.7924388647079468, |
|
"rewards/margins": 0.9484294652938843, |
|
"rewards/rejected": -2.740868330001831, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.5024862601413242, |
|
"grad_norm": 54.86988246933394, |
|
"learning_rate": 2.91160507024077e-07, |
|
"logits/chosen": -1.3366544246673584, |
|
"logits/rejected": -0.8686436414718628, |
|
"logps/chosen": -387.9107360839844, |
|
"logps/rejected": -483.66546630859375, |
|
"loss": 0.5133, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.0935609340667725, |
|
"rewards/margins": 0.9254472851753235, |
|
"rewards/rejected": -3.019008159637451, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.5077204920177963, |
|
"grad_norm": 40.90699971350834, |
|
"learning_rate": 2.866473098569953e-07, |
|
"logits/chosen": -1.3852179050445557, |
|
"logits/rejected": -1.096529483795166, |
|
"logps/chosen": -406.48736572265625, |
|
"logps/rejected": -503.5655822753906, |
|
"loss": 0.5081, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -2.1852097511291504, |
|
"rewards/margins": 0.9200779795646667, |
|
"rewards/rejected": -3.105287551879883, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.5129547238942685, |
|
"grad_norm": 58.095451640836856, |
|
"learning_rate": 2.8212187278611905e-07, |
|
"logits/chosen": -1.689866065979004, |
|
"logits/rejected": -1.0209802389144897, |
|
"logps/chosen": -391.45513916015625, |
|
"logps/rejected": -496.5826110839844, |
|
"loss": 0.5035, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.8062623739242554, |
|
"rewards/margins": 0.9972459077835083, |
|
"rewards/rejected": -2.8035082817077637, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.5181889557707406, |
|
"grad_norm": 59.318676556528416, |
|
"learning_rate": 2.775857072706684e-07, |
|
"logits/chosen": -1.3584972620010376, |
|
"logits/rejected": -1.3198730945587158, |
|
"logps/chosen": -378.9326171875, |
|
"logps/rejected": -501.0203552246094, |
|
"loss": 0.5252, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.007432460784912, |
|
"rewards/margins": 0.9733474850654602, |
|
"rewards/rejected": -2.9807801246643066, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.5234231876472127, |
|
"grad_norm": 71.87530393074267, |
|
"learning_rate": 2.7304032835307667e-07, |
|
"logits/chosen": -1.4289054870605469, |
|
"logits/rejected": -1.1081018447875977, |
|
"logps/chosen": -383.5538024902344, |
|
"logps/rejected": -441.691650390625, |
|
"loss": 0.5232, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -2.0912938117980957, |
|
"rewards/margins": 0.5263177156448364, |
|
"rewards/rejected": -2.6176114082336426, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.5234231876472127, |
|
"eval_logits/chosen": -0.6161556839942932, |
|
"eval_logits/rejected": -0.05726008489727974, |
|
"eval_logps/chosen": -376.86456298828125, |
|
"eval_logps/rejected": -464.8497009277344, |
|
"eval_loss": 0.5098932385444641, |
|
"eval_rewards/accuracies": 0.7160000205039978, |
|
"eval_rewards/chosen": -1.9184350967407227, |
|
"eval_rewards/margins": 0.838153064250946, |
|
"eval_rewards/rejected": -2.7565884590148926, |
|
"eval_runtime": 316.7389, |
|
"eval_samples_per_second": 6.314, |
|
"eval_steps_per_second": 0.789, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.528657419523685, |
|
"grad_norm": 34.830235400374406, |
|
"learning_rate": 2.6848725415297884e-07, |
|
"logits/chosen": -1.6549265384674072, |
|
"logits/rejected": -1.5296745300292969, |
|
"logps/chosen": -367.93243408203125, |
|
"logps/rejected": -463.02581787109375, |
|
"loss": 0.5217, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.8079150915145874, |
|
"rewards/margins": 0.8082486391067505, |
|
"rewards/rejected": -2.616163730621338, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.533891651400157, |
|
"grad_norm": 46.15714344837178, |
|
"learning_rate": 2.6392800536017183e-07, |
|
"logits/chosen": -1.3246982097625732, |
|
"logits/rejected": -1.457916498184204, |
|
"logps/chosen": -347.6453552246094, |
|
"logps/rejected": -456.356689453125, |
|
"loss": 0.5123, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.6062390804290771, |
|
"rewards/margins": 0.8783868551254272, |
|
"rewards/rejected": -2.484626054763794, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.5391258832766291, |
|
"grad_norm": 40.29007295543721, |
|
"learning_rate": 2.59364104726716e-07, |
|
"logits/chosen": -1.495118498802185, |
|
"logits/rejected": -1.118583083152771, |
|
"logps/chosen": -361.8853454589844, |
|
"logps/rejected": -440.2359924316406, |
|
"loss": 0.4939, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.8378098011016846, |
|
"rewards/margins": 0.7821410894393921, |
|
"rewards/rejected": -2.619950771331787, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.5443601151531012, |
|
"grad_norm": 41.4595455203303, |
|
"learning_rate": 2.547970765583491e-07, |
|
"logits/chosen": -1.4750334024429321, |
|
"logits/rejected": -1.2206333875656128, |
|
"logps/chosen": -345.68243408203125, |
|
"logps/rejected": -457.96417236328125, |
|
"loss": 0.5369, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.8482234477996826, |
|
"rewards/margins": 0.8760908246040344, |
|
"rewards/rejected": -2.7243142127990723, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.5495943470295734, |
|
"grad_norm": 69.76278032549452, |
|
"learning_rate": 2.502284462053799e-07, |
|
"logits/chosen": -1.6379356384277344, |
|
"logits/rejected": -1.1788249015808105, |
|
"logps/chosen": -364.0078125, |
|
"logps/rejected": -483.25750732421875, |
|
"loss": 0.5505, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.7867047786712646, |
|
"rewards/margins": 0.9141599535942078, |
|
"rewards/rejected": -2.700864553451538, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.5548285789060455, |
|
"grad_norm": 43.473418630985485, |
|
"learning_rate": 2.4565973955323374e-07, |
|
"logits/chosen": -1.9050061702728271, |
|
"logits/rejected": -0.7371460795402527, |
|
"logps/chosen": -347.5364990234375, |
|
"logps/rejected": -419.16436767578125, |
|
"loss": 0.5246, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.645029067993164, |
|
"rewards/margins": 0.8056646585464478, |
|
"rewards/rejected": -2.4506936073303223, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.5600628107825176, |
|
"grad_norm": 44.18467471903625, |
|
"learning_rate": 2.410924825128195e-07, |
|
"logits/chosen": -1.677795648574829, |
|
"logits/rejected": -1.0934385061264038, |
|
"logps/chosen": -336.7928161621094, |
|
"logps/rejected": -414.9443359375, |
|
"loss": 0.5456, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.7231500148773193, |
|
"rewards/margins": 0.6935104727745056, |
|
"rewards/rejected": -2.416660785675049, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.5652970426589898, |
|
"grad_norm": 43.21905358731484, |
|
"learning_rate": 2.365282005108875e-07, |
|
"logits/chosen": -1.9169113636016846, |
|
"logits/rejected": -1.1039310693740845, |
|
"logps/chosen": -326.3893127441406, |
|
"logps/rejected": -395.2596435546875, |
|
"loss": 0.5318, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.3917843103408813, |
|
"rewards/margins": 0.8932709693908691, |
|
"rewards/rejected": -2.28505539894104, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.5705312745354619, |
|
"grad_norm": 57.74038702140702, |
|
"learning_rate": 2.319684179805491e-07, |
|
"logits/chosen": -1.7448060512542725, |
|
"logits/rejected": -0.4081326425075531, |
|
"logps/chosen": -359.01214599609375, |
|
"logps/rejected": -431.38531494140625, |
|
"loss": 0.5478, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.73810613155365, |
|
"rewards/margins": 0.8466591835021973, |
|
"rewards/rejected": -2.5847649574279785, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.575765506411934, |
|
"grad_norm": 47.91534163404264, |
|
"learning_rate": 2.2741465785212902e-07, |
|
"logits/chosen": -1.360697627067566, |
|
"logits/rejected": -1.1678688526153564, |
|
"logps/chosen": -341.6931457519531, |
|
"logps/rejected": -434.61749267578125, |
|
"loss": 0.5126, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.7018409967422485, |
|
"rewards/margins": 0.6925634741783142, |
|
"rewards/rejected": -2.394404649734497, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.5809997382884062, |
|
"grad_norm": 45.37428786887181, |
|
"learning_rate": 2.2286844104451843e-07, |
|
"logits/chosen": -1.6492805480957031, |
|
"logits/rejected": -1.0351827144622803, |
|
"logps/chosen": -381.6548156738281, |
|
"logps/rejected": -453.56561279296875, |
|
"loss": 0.5463, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.7667547464370728, |
|
"rewards/margins": 0.7139405012130737, |
|
"rewards/rejected": -2.4806952476501465, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.5862339701648783, |
|
"grad_norm": 55.038838111114046, |
|
"learning_rate": 2.183312859572008e-07, |
|
"logits/chosen": -1.5666944980621338, |
|
"logits/rejected": -1.518080711364746, |
|
"logps/chosen": -349.3604431152344, |
|
"logps/rejected": -461.66033935546875, |
|
"loss": 0.5142, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.716058373451233, |
|
"rewards/margins": 0.9028706550598145, |
|
"rewards/rejected": -2.618929147720337, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.5914682020413504, |
|
"grad_norm": 51.86256999226559, |
|
"learning_rate": 2.138047079631184e-07, |
|
"logits/chosen": -1.71830153465271, |
|
"logits/rejected": -1.3731257915496826, |
|
"logps/chosen": -348.7271423339844, |
|
"logps/rejected": -429.04730224609375, |
|
"loss": 0.5304, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.4229063987731934, |
|
"rewards/margins": 0.8486738204956055, |
|
"rewards/rejected": -2.271580219268799, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.5967024339178225, |
|
"grad_norm": 61.89587232832013, |
|
"learning_rate": 2.0929021890255068e-07, |
|
"logits/chosen": -1.7627557516098022, |
|
"logits/rejected": -0.9542855024337769, |
|
"logps/chosen": -336.31402587890625, |
|
"logps/rejected": -434.7019958496094, |
|
"loss": 0.4759, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.5431474447250366, |
|
"rewards/margins": 1.0138742923736572, |
|
"rewards/rejected": -2.5570216178894043, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.6019366657942947, |
|
"grad_norm": 40.16605369465375, |
|
"learning_rate": 2.0478932657817102e-07, |
|
"logits/chosen": -1.7182365655899048, |
|
"logits/rejected": -1.2019648551940918, |
|
"logps/chosen": -383.36444091796875, |
|
"logps/rejected": -474.964599609375, |
|
"loss": 0.5167, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.7786900997161865, |
|
"rewards/margins": 0.8999784588813782, |
|
"rewards/rejected": -2.67866849899292, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.6071708976707668, |
|
"grad_norm": 65.12157906582378, |
|
"learning_rate": 2.0030353425145374e-07, |
|
"logits/chosen": -1.5357849597930908, |
|
"logits/rejected": -0.887830913066864, |
|
"logps/chosen": -351.23516845703125, |
|
"logps/rejected": -427.96795654296875, |
|
"loss": 0.5519, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.722428560256958, |
|
"rewards/margins": 0.9013321995735168, |
|
"rewards/rejected": -2.62376070022583, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.6124051295472389, |
|
"grad_norm": 56.641784845018215, |
|
"learning_rate": 1.9583434014059635e-07, |
|
"logits/chosen": -1.632693886756897, |
|
"logits/rejected": -1.4601401090621948, |
|
"logps/chosen": -347.542236328125, |
|
"logps/rejected": -451.15875244140625, |
|
"loss": 0.5421, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.6873490810394287, |
|
"rewards/margins": 0.8980420827865601, |
|
"rewards/rejected": -2.5853912830352783, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.6176393614237111, |
|
"grad_norm": 45.373706821931066, |
|
"learning_rate": 1.9138323692012733e-07, |
|
"logits/chosen": -1.8676795959472656, |
|
"logits/rejected": -1.3623721599578857, |
|
"logps/chosen": -318.84417724609375, |
|
"logps/rejected": -431.2315979003906, |
|
"loss": 0.4796, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.421907901763916, |
|
"rewards/margins": 0.911504864692688, |
|
"rewards/rejected": -2.3334126472473145, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.6228735933001832, |
|
"grad_norm": 33.99709051781791, |
|
"learning_rate": 1.8695171122236442e-07, |
|
"logits/chosen": -1.6899328231811523, |
|
"logits/rejected": -1.231277585029602, |
|
"logps/chosen": -341.48797607421875, |
|
"logps/rejected": -474.08343505859375, |
|
"loss": 0.4651, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.5275802612304688, |
|
"rewards/margins": 1.2658531665802002, |
|
"rewards/rejected": -2.793433427810669, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.6281078251766553, |
|
"grad_norm": 59.87117531330853, |
|
"learning_rate": 1.8254124314089223e-07, |
|
"logits/chosen": -1.715743064880371, |
|
"logits/rejected": -1.2481369972229004, |
|
"logps/chosen": -357.3684997558594, |
|
"logps/rejected": -465.179443359375, |
|
"loss": 0.4937, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.7480823993682861, |
|
"rewards/margins": 0.9352186322212219, |
|
"rewards/rejected": -2.6833012104034424, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.6333420570531274, |
|
"grad_norm": 47.15786058024146, |
|
"learning_rate": 1.7815330573622205e-07, |
|
"logits/chosen": -1.7806997299194336, |
|
"logits/rejected": -1.0503987073898315, |
|
"logps/chosen": -366.83319091796875, |
|
"logps/rejected": -447.2417907714844, |
|
"loss": 0.5129, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.8660345077514648, |
|
"rewards/margins": 0.938534140586853, |
|
"rewards/rejected": -2.8045687675476074, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.6385762889295996, |
|
"grad_norm": 59.66066585989714, |
|
"learning_rate": 1.7378936454380274e-07, |
|
"logits/chosen": -1.665154218673706, |
|
"logits/rejected": -1.2627506256103516, |
|
"logps/chosen": -375.3360290527344, |
|
"logps/rejected": -451.8492736816406, |
|
"loss": 0.5141, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.9925143718719482, |
|
"rewards/margins": 0.8351390957832336, |
|
"rewards/rejected": -2.827653408050537, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.6438105208060717, |
|
"grad_norm": 41.626090508354544, |
|
"learning_rate": 1.694508770845427e-07, |
|
"logits/chosen": -1.8555021286010742, |
|
"logits/rejected": -1.1900012493133545, |
|
"logps/chosen": -381.927490234375, |
|
"logps/rejected": -469.22900390625, |
|
"loss": 0.481, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.8637516498565674, |
|
"rewards/margins": 0.8393726348876953, |
|
"rewards/rejected": -2.7031240463256836, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.6490447526825438, |
|
"grad_norm": 46.575209941519326, |
|
"learning_rate": 1.651392923780105e-07, |
|
"logits/chosen": -1.7130018472671509, |
|
"logits/rejected": -1.2077395915985107, |
|
"logps/chosen": -395.5782775878906, |
|
"logps/rejected": -510.3377380371094, |
|
"loss": 0.5015, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.0861849784851074, |
|
"rewards/margins": 1.0165139436721802, |
|
"rewards/rejected": -3.102698802947998, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.654278984559016, |
|
"grad_norm": 37.25325816311792, |
|
"learning_rate": 1.6085605045847367e-07, |
|
"logits/chosen": -1.3418257236480713, |
|
"logits/rejected": -0.8439585566520691, |
|
"logps/chosen": -367.1421813964844, |
|
"logps/rejected": -509.10919189453125, |
|
"loss": 0.4888, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -2.0315098762512207, |
|
"rewards/margins": 1.2717547416687012, |
|
"rewards/rejected": -3.303264617919922, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.6595132164354881, |
|
"grad_norm": 56.54421548734596, |
|
"learning_rate": 1.5660258189393944e-07, |
|
"logits/chosen": -1.6082178354263306, |
|
"logits/rejected": -0.8709881901741028, |
|
"logps/chosen": -418.84100341796875, |
|
"logps/rejected": -491.2918395996094, |
|
"loss": 0.4837, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.1445069313049316, |
|
"rewards/margins": 0.9897298812866211, |
|
"rewards/rejected": -3.1342368125915527, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.6647474483119602, |
|
"grad_norm": 58.872726779228444, |
|
"learning_rate": 1.5238030730835577e-07, |
|
"logits/chosen": -1.4633188247680664, |
|
"logits/rejected": -1.0070345401763916, |
|
"logps/chosen": -417.1031188964844, |
|
"logps/rejected": -512.1041870117188, |
|
"loss": 0.4794, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.1558361053466797, |
|
"rewards/margins": 1.0906795263290405, |
|
"rewards/rejected": -3.246515989303589, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.6699816801884323, |
|
"grad_norm": 81.68062237565607, |
|
"learning_rate": 1.4819063690713564e-07, |
|
"logits/chosen": -1.230272650718689, |
|
"logits/rejected": -1.1420851945877075, |
|
"logps/chosen": -400.37384033203125, |
|
"logps/rejected": -501.90203857421875, |
|
"loss": 0.4836, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.241870403289795, |
|
"rewards/margins": 0.9584578275680542, |
|
"rewards/rejected": -3.2003283500671387, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.6752159120649045, |
|
"grad_norm": 43.4045980317099, |
|
"learning_rate": 1.4403497000615883e-07, |
|
"logits/chosen": -1.5301976203918457, |
|
"logits/rejected": -1.0589085817337036, |
|
"logps/chosen": -406.1276550292969, |
|
"logps/rejected": -488.4261779785156, |
|
"loss": 0.5306, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.061319589614868, |
|
"rewards/margins": 0.8899547457695007, |
|
"rewards/rejected": -2.9512743949890137, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.6804501439413766, |
|
"grad_norm": 89.57498346480389, |
|
"learning_rate": 1.3991469456441272e-07, |
|
"logits/chosen": -1.6822435855865479, |
|
"logits/rejected": -1.314344882965088, |
|
"logps/chosen": -366.78118896484375, |
|
"logps/rejected": -476.983642578125, |
|
"loss": 0.4653, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.9514825344085693, |
|
"rewards/margins": 0.9372948408126831, |
|
"rewards/rejected": -2.888777256011963, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.6856843758178487, |
|
"grad_norm": 79.22800819366738, |
|
"learning_rate": 1.358311867204244e-07, |
|
"logits/chosen": -1.3850901126861572, |
|
"logits/rejected": -1.0397329330444336, |
|
"logps/chosen": -410.082763671875, |
|
"logps/rejected": -514.4329833984375, |
|
"loss": 0.5204, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.1403348445892334, |
|
"rewards/margins": 0.8943174481391907, |
|
"rewards/rejected": -3.0346522331237793, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.6909186076943209, |
|
"grad_norm": 38.40836601776419, |
|
"learning_rate": 1.3178581033264216e-07, |
|
"logits/chosen": -1.7234236001968384, |
|
"logits/rejected": -1.0658862590789795, |
|
"logps/chosen": -384.4350280761719, |
|
"logps/rejected": -510.3180236816406, |
|
"loss": 0.4734, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.981745719909668, |
|
"rewards/margins": 1.077263355255127, |
|
"rewards/rejected": -3.059009075164795, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.696152839570793, |
|
"grad_norm": 64.64336518776386, |
|
"learning_rate": 1.2777991652391757e-07, |
|
"logits/chosen": -1.686440110206604, |
|
"logits/rejected": -1.139702558517456, |
|
"logps/chosen": -364.24102783203125, |
|
"logps/rejected": -508.68310546875, |
|
"loss": 0.4863, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.8615505695343018, |
|
"rewards/margins": 1.3014369010925293, |
|
"rewards/rejected": -3.162987232208252, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.7013870714472651, |
|
"grad_norm": 54.064436812936954, |
|
"learning_rate": 1.2381484323024178e-07, |
|
"logits/chosen": -1.5776959657669067, |
|
"logits/rejected": -1.3716309070587158, |
|
"logps/chosen": -381.18890380859375, |
|
"logps/rejected": -485.0956115722656, |
|
"loss": 0.4748, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.0563852787017822, |
|
"rewards/margins": 0.8801882863044739, |
|
"rewards/rejected": -2.9365735054016113, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.7066213033237373, |
|
"grad_norm": 71.26268968336478, |
|
"learning_rate": 1.1989191475388516e-07, |
|
"logits/chosen": -1.5418208837509155, |
|
"logits/rejected": -1.3163645267486572, |
|
"logps/chosen": -389.85479736328125, |
|
"logps/rejected": -469.4613342285156, |
|
"loss": 0.5114, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.9432039260864258, |
|
"rewards/margins": 0.8496206402778625, |
|
"rewards/rejected": -2.7928245067596436, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.7118555352002094, |
|
"grad_norm": 47.16713356681667, |
|
"learning_rate": 1.1601244132109179e-07, |
|
"logits/chosen": -1.547049641609192, |
|
"logits/rejected": -1.1506636142730713, |
|
"logps/chosen": -378.78546142578125, |
|
"logps/rejected": -475.17340087890625, |
|
"loss": 0.5175, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.9611685276031494, |
|
"rewards/margins": 0.8856902122497559, |
|
"rewards/rejected": -2.8468587398529053, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.7170897670766815, |
|
"grad_norm": 57.49724175574713, |
|
"learning_rate": 1.1217771864447395e-07, |
|
"logits/chosen": -1.5181536674499512, |
|
"logits/rejected": -1.0610802173614502, |
|
"logps/chosen": -382.1415710449219, |
|
"logps/rejected": -458.43310546875, |
|
"loss": 0.446, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.9159305095672607, |
|
"rewards/margins": 0.8874963521957397, |
|
"rewards/rejected": -2.803427219390869, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.7223239989531536, |
|
"grad_norm": 48.43604137330991, |
|
"learning_rate": 1.0838902749025499e-07, |
|
"logits/chosen": -1.5803115367889404, |
|
"logits/rejected": -0.9147381782531738, |
|
"logps/chosen": -364.0269470214844, |
|
"logps/rejected": -485.8170471191406, |
|
"loss": 0.4886, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.7391821146011353, |
|
"rewards/margins": 1.2611137628555298, |
|
"rewards/rejected": -3.000295639038086, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.7275582308296258, |
|
"grad_norm": 46.02896607743019, |
|
"learning_rate": 1.0464763325050358e-07, |
|
"logits/chosen": -1.389105200767517, |
|
"logits/rejected": -1.2149088382720947, |
|
"logps/chosen": -363.1921081542969, |
|
"logps/rejected": -481.0618591308594, |
|
"loss": 0.4901, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.8936526775360107, |
|
"rewards/margins": 1.019033670425415, |
|
"rewards/rejected": -2.912686586380005, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.7327924627060979, |
|
"grad_norm": 47.49065837586764, |
|
"learning_rate": 1.0095478552050346e-07, |
|
"logits/chosen": -1.6921743154525757, |
|
"logits/rejected": -1.1181484460830688, |
|
"logps/chosen": -369.4226989746094, |
|
"logps/rejected": -469.757568359375, |
|
"loss": 0.5117, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.803931474685669, |
|
"rewards/margins": 0.8779823184013367, |
|
"rewards/rejected": -2.6819140911102295, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.73802669458257, |
|
"grad_norm": 51.70967904748882, |
|
"learning_rate": 9.731171768139806e-08, |
|
"logits/chosen": -1.5017741918563843, |
|
"logits/rejected": -0.8476959466934204, |
|
"logps/chosen": -366.8438415527344, |
|
"logps/rejected": -477.4512634277344, |
|
"loss": 0.5513, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.8564783334732056, |
|
"rewards/margins": 1.0491094589233398, |
|
"rewards/rejected": -2.905587673187256, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.7432609264590422, |
|
"grad_norm": 38.12827677711878, |
|
"learning_rate": 9.37196464882522e-08, |
|
"logits/chosen": -1.7628695964813232, |
|
"logits/rejected": -0.9748938679695129, |
|
"logps/chosen": -358.43377685546875, |
|
"logps/rejected": -426.41839599609375, |
|
"loss": 0.5369, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.729318380355835, |
|
"rewards/margins": 0.7932037711143494, |
|
"rewards/rejected": -2.522522211074829, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.7484951583355143, |
|
"grad_norm": 54.47585201435841, |
|
"learning_rate": 9.017977166366444e-08, |
|
"logits/chosen": -1.7095372676849365, |
|
"logits/rejected": -1.3382246494293213, |
|
"logps/chosen": -386.80419921875, |
|
"logps/rejected": -485.66217041015625, |
|
"loss": 0.4759, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.8498786687850952, |
|
"rewards/margins": 0.956847071647644, |
|
"rewards/rejected": -2.8067257404327393, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.7537293902119864, |
|
"grad_norm": 55.994908017980144, |
|
"learning_rate": 8.669327549707095e-08, |
|
"logits/chosen": -1.8079534769058228, |
|
"logits/rejected": -1.6061460971832275, |
|
"logps/chosen": -372.0968017578125, |
|
"logps/rejected": -449.3272399902344, |
|
"loss": 0.4883, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.8120940923690796, |
|
"rewards/margins": 0.7753244042396545, |
|
"rewards/rejected": -2.587418794631958, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.7589636220884585, |
|
"grad_norm": 44.71580454221332, |
|
"learning_rate": 8.326132244986931e-08, |
|
"logits/chosen": -1.501529335975647, |
|
"logits/rejected": -1.167120337486267, |
|
"logps/chosen": -371.7989807128906, |
|
"logps/rejected": -466.7962951660156, |
|
"loss": 0.4955, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.9070402383804321, |
|
"rewards/margins": 0.8023300170898438, |
|
"rewards/rejected": -2.7093701362609863, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.7641978539649307, |
|
"grad_norm": 47.918962412956276, |
|
"learning_rate": 7.988505876649862e-08, |
|
"logits/chosen": -1.6484168767929077, |
|
"logits/rejected": -1.049037218093872, |
|
"logps/chosen": -387.1507263183594, |
|
"logps/rejected": -471.23455810546875, |
|
"loss": 0.5227, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.9400913715362549, |
|
"rewards/margins": 0.9767180681228638, |
|
"rewards/rejected": -2.916809320449829, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.7694320858414028, |
|
"grad_norm": 44.681178148336286, |
|
"learning_rate": 7.656561209160248e-08, |
|
"logits/chosen": -1.5934867858886719, |
|
"logits/rejected": -0.9533841013908386, |
|
"logps/chosen": -415.989013671875, |
|
"logps/rejected": -503.83038330078125, |
|
"loss": 0.5124, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.1351356506347656, |
|
"rewards/margins": 1.072280764579773, |
|
"rewards/rejected": -3.2074170112609863, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.7746663177178749, |
|
"grad_norm": 42.957667070339845, |
|
"learning_rate": 7.330409109340562e-08, |
|
"logits/chosen": -1.6417922973632812, |
|
"logits/rejected": -0.9782828092575073, |
|
"logps/chosen": -368.1243591308594, |
|
"logps/rejected": -457.6202697753906, |
|
"loss": 0.53, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.979144811630249, |
|
"rewards/margins": 0.9068864583969116, |
|
"rewards/rejected": -2.88603138923645, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.7799005495943471, |
|
"grad_norm": 50.32703939295202, |
|
"learning_rate": 7.010158509342681e-08, |
|
"logits/chosen": -1.492157220840454, |
|
"logits/rejected": -1.0186296701431274, |
|
"logps/chosen": -405.69769287109375, |
|
"logps/rejected": -512.1472778320312, |
|
"loss": 0.4922, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.2098875045776367, |
|
"rewards/margins": 0.9893035888671875, |
|
"rewards/rejected": -3.199190855026245, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.7851347814708192, |
|
"grad_norm": 46.95925974979752, |
|
"learning_rate": 6.695916370265527e-08, |
|
"logits/chosen": -1.5276159048080444, |
|
"logits/rejected": -1.1085699796676636, |
|
"logps/chosen": -377.8671875, |
|
"logps/rejected": -545.2662353515625, |
|
"loss": 0.4707, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -2.0651357173919678, |
|
"rewards/margins": 1.2801060676574707, |
|
"rewards/rejected": -3.3452422618865967, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.7851347814708192, |
|
"eval_logits/chosen": -0.6702331304550171, |
|
"eval_logits/rejected": -0.05479540303349495, |
|
"eval_logps/chosen": -403.77130126953125, |
|
"eval_logps/rejected": -503.5488586425781, |
|
"eval_loss": 0.500029981136322, |
|
"eval_rewards/accuracies": 0.7319999933242798, |
|
"eval_rewards/chosen": -2.1875030994415283, |
|
"eval_rewards/margins": 0.9560768604278564, |
|
"eval_rewards/rejected": -3.1435797214508057, |
|
"eval_runtime": 316.8429, |
|
"eval_samples_per_second": 6.312, |
|
"eval_steps_per_second": 0.789, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.7903690133472913, |
|
"grad_norm": 54.6800615563418, |
|
"learning_rate": 6.387787646430853e-08, |
|
"logits/chosen": -1.6765468120574951, |
|
"logits/rejected": -0.902447521686554, |
|
"logps/chosen": -411.84197998046875, |
|
"logps/rejected": -520.2639770507812, |
|
"loss": 0.5386, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.209589719772339, |
|
"rewards/margins": 1.2369428873062134, |
|
"rewards/rejected": -3.4465324878692627, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.7956032452237635, |
|
"grad_norm": 39.76345106053052, |
|
"learning_rate": 6.0858752503294e-08, |
|
"logits/chosen": -1.9017261266708374, |
|
"logits/rejected": -1.027804970741272, |
|
"logps/chosen": -375.6845397949219, |
|
"logps/rejected": -452.0986328125, |
|
"loss": 0.524, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.9550809860229492, |
|
"rewards/margins": 0.9305828213691711, |
|
"rewards/rejected": -2.8856637477874756, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.8008374771002356, |
|
"grad_norm": 58.54818603530255, |
|
"learning_rate": 5.7902800182489385e-08, |
|
"logits/chosen": -1.340591311454773, |
|
"logits/rejected": -0.8900511860847473, |
|
"logps/chosen": -382.7643737792969, |
|
"logps/rejected": -484.4600524902344, |
|
"loss": 0.4566, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.102140188217163, |
|
"rewards/margins": 0.9162777066230774, |
|
"rewards/rejected": -3.018418073654175, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.8060717089767077, |
|
"grad_norm": 31.245998478258013, |
|
"learning_rate": 5.5011006765957604e-08, |
|
"logits/chosen": -1.618656873703003, |
|
"logits/rejected": -1.1801456212997437, |
|
"logps/chosen": -419.6280822753906, |
|
"logps/rejected": -499.01751708984375, |
|
"loss": 0.4916, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.230376720428467, |
|
"rewards/margins": 0.7931150197982788, |
|
"rewards/rejected": -3.023491621017456, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.8113059408531798, |
|
"grad_norm": 51.33396122501131, |
|
"learning_rate": 5.218433808920883e-08, |
|
"logits/chosen": -1.60371994972229, |
|
"logits/rejected": -1.1156429052352905, |
|
"logps/chosen": -398.92974853515625, |
|
"logps/rejected": -472.16973876953125, |
|
"loss": 0.5013, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -2.029679775238037, |
|
"rewards/margins": 0.9714105725288391, |
|
"rewards/rejected": -3.0010905265808105, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.816540172729652, |
|
"grad_norm": 39.552994793370026, |
|
"learning_rate": 4.942373823661927e-08, |
|
"logits/chosen": -1.6282466650009155, |
|
"logits/rejected": -0.91362464427948, |
|
"logps/chosen": -435.72216796875, |
|
"logps/rejected": -547.359619140625, |
|
"loss": 0.5372, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.306370496749878, |
|
"rewards/margins": 0.9918804168701172, |
|
"rewards/rejected": -3.298250913619995, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.821774404606124, |
|
"grad_norm": 60.08870167062165, |
|
"learning_rate": 4.6730129226114354e-08, |
|
"logits/chosen": -1.6371328830718994, |
|
"logits/rejected": -0.6333679556846619, |
|
"logps/chosen": -426.95703125, |
|
"logps/rejected": -538.3595581054688, |
|
"loss": 0.5232, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.2964425086975098, |
|
"rewards/margins": 1.1182937622070312, |
|
"rewards/rejected": -3.414735794067383, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.8270086364825961, |
|
"grad_norm": 46.54218001272997, |
|
"learning_rate": 4.41044107012227e-08, |
|
"logits/chosen": -1.46670401096344, |
|
"logits/rejected": -1.1195374727249146, |
|
"logps/chosen": -394.80078125, |
|
"logps/rejected": -524.2930297851562, |
|
"loss": 0.5174, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.0618937015533447, |
|
"rewards/margins": 1.137542486190796, |
|
"rewards/rejected": -3.1994361877441406, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.8322428683590684, |
|
"grad_norm": 61.54813763490487, |
|
"learning_rate": 4.1547459630601966e-08, |
|
"logits/chosen": -1.5916258096694946, |
|
"logits/rejected": -0.7485260963439941, |
|
"logps/chosen": -393.9290771484375, |
|
"logps/rejected": -469.08026123046875, |
|
"loss": 0.488, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -2.228323459625244, |
|
"rewards/margins": 0.8031214475631714, |
|
"rewards/rejected": -3.031445026397705, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.8374771002355405, |
|
"grad_norm": 47.454976444510464, |
|
"learning_rate": 3.9060130015138857e-08, |
|
"logits/chosen": -1.6673399209976196, |
|
"logits/rejected": -0.9725383520126343, |
|
"logps/chosen": -391.20428466796875, |
|
"logps/rejected": -521.3951416015625, |
|
"loss": 0.5182, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.1082262992858887, |
|
"rewards/margins": 1.1958465576171875, |
|
"rewards/rejected": -3.304072856903076, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.8427113321120125, |
|
"grad_norm": 49.49623357781679, |
|
"learning_rate": 3.664325260271953e-08, |
|
"logits/chosen": -1.785679578781128, |
|
"logits/rejected": -1.3486297130584717, |
|
"logps/chosen": -425.0037536621094, |
|
"logps/rejected": -499.3848571777344, |
|
"loss": 0.5221, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.9705803394317627, |
|
"rewards/margins": 1.0100176334381104, |
|
"rewards/rejected": -2.980597972869873, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.8479455639884846, |
|
"grad_norm": 46.41346156870081, |
|
"learning_rate": 3.429763461076676e-08, |
|
"logits/chosen": -1.5286333560943604, |
|
"logits/rejected": -1.219725251197815, |
|
"logps/chosen": -410.5511169433594, |
|
"logps/rejected": -490.86669921875, |
|
"loss": 0.499, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.219921112060547, |
|
"rewards/margins": 0.7538778185844421, |
|
"rewards/rejected": -2.973799228668213, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.8531797958649568, |
|
"grad_norm": 48.04705054674233, |
|
"learning_rate": 3.202405945663555e-08, |
|
"logits/chosen": -1.2301688194274902, |
|
"logits/rejected": -1.1187329292297363, |
|
"logps/chosen": -398.3143615722656, |
|
"logps/rejected": -471.55010986328125, |
|
"loss": 0.5216, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -2.1741843223571777, |
|
"rewards/margins": 0.7370268702507019, |
|
"rewards/rejected": -2.9112114906311035, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.8584140277414289, |
|
"grad_norm": 55.340731067768736, |
|
"learning_rate": 2.9823286495958556e-08, |
|
"logits/chosen": -1.6326100826263428, |
|
"logits/rejected": -1.1603213548660278, |
|
"logps/chosen": -397.24005126953125, |
|
"logps/rejected": -491.3506774902344, |
|
"loss": 0.5211, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.9936778545379639, |
|
"rewards/margins": 1.0326869487762451, |
|
"rewards/rejected": -3.026364803314209, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.863648259617901, |
|
"grad_norm": 50.97109608180561, |
|
"learning_rate": 2.769605076902695e-08, |
|
"logits/chosen": -1.5886093378067017, |
|
"logits/rejected": -1.497037649154663, |
|
"logps/chosen": -364.107666015625, |
|
"logps/rejected": -491.516357421875, |
|
"loss": 0.5201, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.014535903930664, |
|
"rewards/margins": 0.9813510179519653, |
|
"rewards/rejected": -2.9958865642547607, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.8688824914943732, |
|
"grad_norm": 79.38139421221125, |
|
"learning_rate": 2.5643062755293403e-08, |
|
"logits/chosen": -1.504066824913025, |
|
"logits/rejected": -0.8931864500045776, |
|
"logps/chosen": -391.62713623046875, |
|
"logps/rejected": -493.92425537109375, |
|
"loss": 0.5236, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.1034412384033203, |
|
"rewards/margins": 0.948072075843811, |
|
"rewards/rejected": -3.051513195037842, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.8741167233708453, |
|
"grad_norm": 51.56700786640102, |
|
"learning_rate": 2.366500813607733e-08, |
|
"logits/chosen": -1.5938122272491455, |
|
"logits/rejected": -1.2632464170455933, |
|
"logps/chosen": -385.2617492675781, |
|
"logps/rejected": -489.09228515625, |
|
"loss": 0.4905, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.0015676021575928, |
|
"rewards/margins": 1.0208126306533813, |
|
"rewards/rejected": -3.0223803520202637, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.8793509552473174, |
|
"grad_norm": 57.01702831334661, |
|
"learning_rate": 2.176254756555329e-08, |
|
"logits/chosen": -1.5943069458007812, |
|
"logits/rejected": -1.2856522798538208, |
|
"logps/chosen": -391.60736083984375, |
|
"logps/rejected": -528.451171875, |
|
"loss": 0.4412, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.9151815176010132, |
|
"rewards/margins": 1.0721461772918701, |
|
"rewards/rejected": -2.987327814102173, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.8845851871237895, |
|
"grad_norm": 52.04207661692601, |
|
"learning_rate": 1.9936316450097468e-08, |
|
"logits/chosen": -1.6583569049835205, |
|
"logits/rejected": -1.2720321416854858, |
|
"logps/chosen": -375.07989501953125, |
|
"logps/rejected": -463.97015380859375, |
|
"loss": 0.48, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.0725209712982178, |
|
"rewards/margins": 0.813088595867157, |
|
"rewards/rejected": -2.8856096267700195, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.8898194190002617, |
|
"grad_norm": 56.72817214823504, |
|
"learning_rate": 1.8186924736067477e-08, |
|
"logits/chosen": -1.6529877185821533, |
|
"logits/rejected": -1.2598158121109009, |
|
"logps/chosen": -395.845703125, |
|
"logps/rejected": -497.45574951171875, |
|
"loss": 0.4753, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.0222179889678955, |
|
"rewards/margins": 1.0427659749984741, |
|
"rewards/rejected": -3.06498384475708, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.8950536508767338, |
|
"grad_norm": 60.40350237801338, |
|
"learning_rate": 1.651495670608488e-08, |
|
"logits/chosen": -1.6280845403671265, |
|
"logits/rejected": -1.0049400329589844, |
|
"logps/chosen": -408.4595642089844, |
|
"logps/rejected": -528.3615112304688, |
|
"loss": 0.4851, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.1130669116973877, |
|
"rewards/margins": 1.1445512771606445, |
|
"rewards/rejected": -3.2576184272766113, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.9002878827532059, |
|
"grad_norm": 42.82234513435798, |
|
"learning_rate": 1.4920970783889737e-08, |
|
"logits/chosen": -1.784874677658081, |
|
"logits/rejected": -1.022477388381958, |
|
"logps/chosen": -403.08294677734375, |
|
"logps/rejected": -465.8329162597656, |
|
"loss": 0.4906, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.028446912765503, |
|
"rewards/margins": 0.7884195446968079, |
|
"rewards/rejected": -2.816866636276245, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.9055221146296781, |
|
"grad_norm": 67.83919792587483, |
|
"learning_rate": 1.340549934783164e-08, |
|
"logits/chosen": -1.6942017078399658, |
|
"logits/rejected": -1.3202614784240723, |
|
"logps/chosen": -391.9894104003906, |
|
"logps/rejected": -499.2957458496094, |
|
"loss": 0.5082, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.1304984092712402, |
|
"rewards/margins": 0.9605563879013062, |
|
"rewards/rejected": -3.091054916381836, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.9107563465061502, |
|
"grad_norm": 38.38762358526929, |
|
"learning_rate": 1.1969048553059608e-08, |
|
"logits/chosen": -1.5242860317230225, |
|
"logits/rejected": -1.0694557428359985, |
|
"logps/chosen": -385.64447021484375, |
|
"logps/rejected": -532.7508544921875, |
|
"loss": 0.5175, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.149869441986084, |
|
"rewards/margins": 1.1629979610443115, |
|
"rewards/rejected": -3.3128669261932373, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.9159905783826223, |
|
"grad_norm": 47.57788484474261, |
|
"learning_rate": 1.06120981624703e-08, |
|
"logits/chosen": -1.8843555450439453, |
|
"logits/rejected": -0.6692565083503723, |
|
"logps/chosen": -416.1067810058594, |
|
"logps/rejected": -449.87103271484375, |
|
"loss": 0.524, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.03684401512146, |
|
"rewards/margins": 0.8309102058410645, |
|
"rewards/rejected": -2.8677542209625244, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.9212248102590945, |
|
"grad_norm": 51.79734307723889, |
|
"learning_rate": 9.335101386471284e-09, |
|
"logits/chosen": -1.769464135169983, |
|
"logits/rejected": -1.5171881914138794, |
|
"logps/chosen": -366.7747497558594, |
|
"logps/rejected": -517.2105712890625, |
|
"loss": 0.4596, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -1.8451915979385376, |
|
"rewards/margins": 1.2242134809494019, |
|
"rewards/rejected": -3.0694050788879395, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.9264590421355666, |
|
"grad_norm": 59.243332805566055, |
|
"learning_rate": 8.138484731612273e-09, |
|
"logits/chosen": -1.610945701599121, |
|
"logits/rejected": -0.8597782850265503, |
|
"logps/chosen": -412.3548278808594, |
|
"logps/rejected": -507.70050048828125, |
|
"loss": 0.4844, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -2.1251347064971924, |
|
"rewards/margins": 1.0798755884170532, |
|
"rewards/rejected": -3.205010175704956, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.9316932740120387, |
|
"grad_norm": 35.23329301018955, |
|
"learning_rate": 7.0226478581355e-09, |
|
"logits/chosen": -1.5735182762145996, |
|
"logits/rejected": -1.0823094844818115, |
|
"logps/chosen": -382.31378173828125, |
|
"logps/rejected": -459.026611328125, |
|
"loss": 0.522, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -2.1322665214538574, |
|
"rewards/margins": 0.7317706346511841, |
|
"rewards/rejected": -2.86403751373291, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.9369275058885108, |
|
"grad_norm": 39.12208203761793, |
|
"learning_rate": 5.987963446492383e-09, |
|
"logits/chosen": -1.6163190603256226, |
|
"logits/rejected": -1.3726081848144531, |
|
"logps/chosen": -368.1934509277344, |
|
"logps/rejected": -484.42620849609375, |
|
"loss": 0.4783, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.939570426940918, |
|
"rewards/margins": 0.9698532819747925, |
|
"rewards/rejected": -2.909423828125, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.942161737764983, |
|
"grad_norm": 45.32578662406385, |
|
"learning_rate": 5.0347770728713935e-09, |
|
"logits/chosen": -1.5549806356430054, |
|
"logits/rejected": -0.9020807147026062, |
|
"logps/chosen": -382.2792053222656, |
|
"logps/rejected": -480.25714111328125, |
|
"loss": 0.4825, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.1894912719726562, |
|
"rewards/margins": 0.8568480610847473, |
|
"rewards/rejected": -3.046339273452759, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.9473959696414551, |
|
"grad_norm": 51.50392990407485, |
|
"learning_rate": 4.1634070937782424e-09, |
|
"logits/chosen": -1.563454270362854, |
|
"logits/rejected": -1.4620710611343384, |
|
"logps/chosen": -374.35662841796875, |
|
"logps/rejected": -459.2840881347656, |
|
"loss": 0.4949, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.0908713340759277, |
|
"rewards/margins": 0.8073171377182007, |
|
"rewards/rejected": -2.898188829421997, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.9526302015179272, |
|
"grad_norm": 52.57515906506144, |
|
"learning_rate": 3.3741445397075797e-09, |
|
"logits/chosen": -1.5654033422470093, |
|
"logits/rejected": -0.9612579345703125, |
|
"logps/chosen": -403.11767578125, |
|
"logps/rejected": -518.7571411132812, |
|
"loss": 0.499, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -2.0279488563537598, |
|
"rewards/margins": 1.2528560161590576, |
|
"rewards/rejected": -3.2808051109313965, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.9578644333943994, |
|
"grad_norm": 80.78348799446286, |
|
"learning_rate": 2.667253017941018e-09, |
|
"logits/chosen": -1.5542982816696167, |
|
"logits/rejected": -1.2357369661331177, |
|
"logps/chosen": -414.04571533203125, |
|
"logps/rejected": -515.9486083984375, |
|
"loss": 0.5405, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.095529079437256, |
|
"rewards/margins": 0.8272086977958679, |
|
"rewards/rejected": -2.9227375984191895, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.9630986652708715, |
|
"grad_norm": 60.481486866817654, |
|
"learning_rate": 2.0429686245045097e-09, |
|
"logits/chosen": -1.7054321765899658, |
|
"logits/rejected": -0.9457400441169739, |
|
"logps/chosen": -383.46612548828125, |
|
"logps/rejected": -498.7964782714844, |
|
"loss": 0.4679, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.065452814102173, |
|
"rewards/margins": 1.179323434829712, |
|
"rewards/rejected": -3.244776487350464, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.9683328971473436, |
|
"grad_norm": 42.47290413747811, |
|
"learning_rate": 1.5014998653141708e-09, |
|
"logits/chosen": -1.4126255512237549, |
|
"logits/rejected": -1.1500160694122314, |
|
"logps/chosen": -424.77337646484375, |
|
"logps/rejected": -545.006591796875, |
|
"loss": 0.4857, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.2447969913482666, |
|
"rewards/margins": 1.013723611831665, |
|
"rewards/rejected": -3.2585208415985107, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.9735671290238157, |
|
"grad_norm": 55.24460892129673, |
|
"learning_rate": 1.0430275865371263e-09, |
|
"logits/chosen": -1.460265874862671, |
|
"logits/rejected": -0.5893803238868713, |
|
"logps/chosen": -364.1796569824219, |
|
"logps/rejected": -458.278564453125, |
|
"loss": 0.4837, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -2.0408921241760254, |
|
"rewards/margins": 1.031553864479065, |
|
"rewards/rejected": -3.0724456310272217, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.9788013609002879, |
|
"grad_norm": 78.57983765704049, |
|
"learning_rate": 6.677049141901314e-10, |
|
"logits/chosen": -1.6051515340805054, |
|
"logits/rejected": -0.9829438924789429, |
|
"logps/chosen": -398.05267333984375, |
|
"logps/rejected": -477.8648376464844, |
|
"loss": 0.4942, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.0092108249664307, |
|
"rewards/margins": 0.8877508044242859, |
|
"rewards/rejected": -2.8969614505767822, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.98403559277676, |
|
"grad_norm": 37.16308130632785, |
|
"learning_rate": 3.7565720299687077e-10, |
|
"logits/chosen": -1.8912632465362549, |
|
"logits/rejected": -0.9422454833984375, |
|
"logps/chosen": -400.77197265625, |
|
"logps/rejected": -511.6893005371094, |
|
"loss": 0.4815, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.160900115966797, |
|
"rewards/margins": 1.268087387084961, |
|
"rewards/rejected": -3.428987503051758, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.9892698246532321, |
|
"grad_norm": 54.378639302860876, |
|
"learning_rate": 1.6698199452053197e-10, |
|
"logits/chosen": -1.7411140203475952, |
|
"logits/rejected": -1.1444064378738403, |
|
"logps/chosen": -405.96466064453125, |
|
"logps/rejected": -516.4884643554688, |
|
"loss": 0.5036, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.084994077682495, |
|
"rewards/margins": 0.9552907943725586, |
|
"rewards/rejected": -3.0402846336364746, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.9945040565297043, |
|
"grad_norm": 55.60143338165864, |
|
"learning_rate": 4.174898458556009e-11, |
|
"logits/chosen": -1.7632405757904053, |
|
"logits/rejected": -1.2880979776382446, |
|
"logps/chosen": -400.0120544433594, |
|
"logps/rejected": -445.19036865234375, |
|
"loss": 0.516, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.067795515060425, |
|
"rewards/margins": 0.6980506777763367, |
|
"rewards/rejected": -2.7658464908599854, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.9997382884061764, |
|
"grad_norm": 50.31539314937899, |
|
"learning_rate": 0.0, |
|
"logits/chosen": -1.4952467679977417, |
|
"logits/rejected": -0.9173043966293335, |
|
"logps/chosen": -421.25, |
|
"logps/rejected": -543.969482421875, |
|
"loss": 0.4891, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.100468635559082, |
|
"rewards/margins": 1.1773890256881714, |
|
"rewards/rejected": -3.277857542037964, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.9997382884061764, |
|
"step": 1910, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5374546366836388, |
|
"train_runtime": 38043.1849, |
|
"train_samples_per_second": 1.607, |
|
"train_steps_per_second": 0.05 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1910, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 1000, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|