|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9984301412872841, |
|
"eval_steps": 500, |
|
"global_step": 477, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0020931449502878076, |
|
"grad_norm": 9.686688861295458, |
|
"learning_rate": 1.0416666666666666e-08, |
|
"logits/chosen": -2.7144877910614014, |
|
"logits/rejected": -2.6456987857818604, |
|
"logps/chosen": -307.5989990234375, |
|
"logps/rejected": -353.280517578125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.020931449502878074, |
|
"grad_norm": 8.910692539294239, |
|
"learning_rate": 1.0416666666666667e-07, |
|
"logits/chosen": -2.464939832687378, |
|
"logits/rejected": -2.4213643074035645, |
|
"logps/chosen": -299.6020812988281, |
|
"logps/rejected": -269.6002502441406, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.4027777910232544, |
|
"rewards/chosen": -0.0003792633942794055, |
|
"rewards/margins": -0.000643148785457015, |
|
"rewards/rejected": 0.00026388533296994865, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.04186289900575615, |
|
"grad_norm": 7.998647811770768, |
|
"learning_rate": 2.0833333333333333e-07, |
|
"logits/chosen": -2.5575523376464844, |
|
"logits/rejected": -2.4738364219665527, |
|
"logps/chosen": -300.9610290527344, |
|
"logps/rejected": -276.1901550292969, |
|
"loss": 0.6924, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.003328660037368536, |
|
"rewards/margins": 0.0013169770827516913, |
|
"rewards/rejected": 0.0020116830710321665, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06279434850863422, |
|
"grad_norm": 8.21834105065241, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": -2.532283306121826, |
|
"logits/rejected": -2.4365248680114746, |
|
"logps/chosen": -283.2300109863281, |
|
"logps/rejected": -240.62158203125, |
|
"loss": 0.6892, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.024711979553103447, |
|
"rewards/margins": 0.00843757577240467, |
|
"rewards/rejected": 0.016274403780698776, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0837257980115123, |
|
"grad_norm": 8.23700971574819, |
|
"learning_rate": 4.1666666666666667e-07, |
|
"logits/chosen": -2.4243757724761963, |
|
"logits/rejected": -2.3694825172424316, |
|
"logps/chosen": -255.69534301757812, |
|
"logps/rejected": -253.03646850585938, |
|
"loss": 0.6803, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.051530830562114716, |
|
"rewards/margins": 0.027234002947807312, |
|
"rewards/rejected": 0.024296827614307404, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.10465724751439037, |
|
"grad_norm": 8.490471908136655, |
|
"learning_rate": 4.999731868769026e-07, |
|
"logits/chosen": -2.4612836837768555, |
|
"logits/rejected": -2.366600275039673, |
|
"logps/chosen": -266.41839599609375, |
|
"logps/rejected": -266.593017578125, |
|
"loss": 0.6671, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": 0.05303896218538284, |
|
"rewards/margins": 0.05404331535100937, |
|
"rewards/rejected": -0.0010043552611023188, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.12558869701726844, |
|
"grad_norm": 11.448244491004864, |
|
"learning_rate": 4.990353313429303e-07, |
|
"logits/chosen": -2.5066006183624268, |
|
"logits/rejected": -2.4322540760040283, |
|
"logps/chosen": -249.62960815429688, |
|
"logps/rejected": -252.5321807861328, |
|
"loss": 0.6449, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.015312853269279003, |
|
"rewards/margins": 0.12173323333263397, |
|
"rewards/rejected": -0.13704606890678406, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.14652014652014653, |
|
"grad_norm": 12.222640628288522, |
|
"learning_rate": 4.967625656594781e-07, |
|
"logits/chosen": -2.4458515644073486, |
|
"logits/rejected": -2.4075169563293457, |
|
"logps/chosen": -304.5162048339844, |
|
"logps/rejected": -312.8282775878906, |
|
"loss": 0.6171, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.16042451560497284, |
|
"rewards/margins": 0.20845112204551697, |
|
"rewards/rejected": -0.368875652551651, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.1674515960230246, |
|
"grad_norm": 13.355609497034044, |
|
"learning_rate": 4.93167072587771e-07, |
|
"logits/chosen": -2.5683891773223877, |
|
"logits/rejected": -2.4449405670166016, |
|
"logps/chosen": -335.45166015625, |
|
"logps/rejected": -294.0103454589844, |
|
"loss": 0.6038, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.12419579923152924, |
|
"rewards/margins": 0.2938229441642761, |
|
"rewards/rejected": -0.41801875829696655, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.18838304552590268, |
|
"grad_norm": 16.831516529666843, |
|
"learning_rate": 4.882681251368548e-07, |
|
"logits/chosen": -2.5068366527557373, |
|
"logits/rejected": -2.4493064880371094, |
|
"logps/chosen": -292.50592041015625, |
|
"logps/rejected": -333.88446044921875, |
|
"loss": 0.5754, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.45342904329299927, |
|
"rewards/margins": 0.37515172362327576, |
|
"rewards/rejected": -0.8285807371139526, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.20931449502878074, |
|
"grad_norm": 14.56937938894116, |
|
"learning_rate": 4.820919832540181e-07, |
|
"logits/chosen": -2.426504373550415, |
|
"logits/rejected": -2.359724521636963, |
|
"logps/chosen": -338.78021240234375, |
|
"logps/rejected": -367.85589599609375, |
|
"loss": 0.5745, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.47890257835388184, |
|
"rewards/margins": 0.4808070659637451, |
|
"rewards/rejected": -0.959709644317627, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2302459445316588, |
|
"grad_norm": 20.002857020935622, |
|
"learning_rate": 4.7467175306295647e-07, |
|
"logits/chosen": -2.346848726272583, |
|
"logits/rejected": -2.258775472640991, |
|
"logps/chosen": -342.98046875, |
|
"logps/rejected": -371.9133605957031, |
|
"loss": 0.5817, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.5712723731994629, |
|
"rewards/margins": 0.45406538248062134, |
|
"rewards/rejected": -1.02533757686615, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.25117739403453687, |
|
"grad_norm": 20.278919360241712, |
|
"learning_rate": 4.6604720940421207e-07, |
|
"logits/chosen": -2.105543613433838, |
|
"logits/rejected": -2.0793662071228027, |
|
"logps/chosen": -311.2692565917969, |
|
"logps/rejected": -360.46136474609375, |
|
"loss": 0.5614, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.4692445397377014, |
|
"rewards/margins": 0.5657081604003906, |
|
"rewards/rejected": -1.0349526405334473, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.272108843537415, |
|
"grad_norm": 15.445489902432293, |
|
"learning_rate": 4.5626458262912735e-07, |
|
"logits/chosen": -2.0017788410186768, |
|
"logits/rejected": -1.9253467321395874, |
|
"logps/chosen": -359.1274719238281, |
|
"logps/rejected": -392.7835998535156, |
|
"loss": 0.5493, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.8196575045585632, |
|
"rewards/margins": 0.4658297002315521, |
|
"rewards/rejected": -1.285487174987793, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.29304029304029305, |
|
"grad_norm": 23.527632580214693, |
|
"learning_rate": 4.453763107901675e-07, |
|
"logits/chosen": -2.021439790725708, |
|
"logits/rejected": -1.9034312963485718, |
|
"logps/chosen": -354.5030517578125, |
|
"logps/rejected": -370.05926513671875, |
|
"loss": 0.5458, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.44141221046447754, |
|
"rewards/margins": 0.6134065389633179, |
|
"rewards/rejected": -1.0548187494277954, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.3139717425431711, |
|
"grad_norm": 20.969211081154093, |
|
"learning_rate": 4.3344075855595097e-07, |
|
"logits/chosen": -1.808932900428772, |
|
"logits/rejected": -1.6896326541900635, |
|
"logps/chosen": -336.74200439453125, |
|
"logps/rejected": -351.64471435546875, |
|
"loss": 0.5563, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.6119992733001709, |
|
"rewards/margins": 0.46776071190834045, |
|
"rewards/rejected": -1.0797598361968994, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.3349031920460492, |
|
"grad_norm": 16.819029625662488, |
|
"learning_rate": 4.2052190435769554e-07, |
|
"logits/chosen": -1.6512082815170288, |
|
"logits/rejected": -1.4201633930206299, |
|
"logps/chosen": -328.9945373535156, |
|
"logps/rejected": -369.72216796875, |
|
"loss": 0.554, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.5794623494148254, |
|
"rewards/margins": 0.6330465078353882, |
|
"rewards/rejected": -1.2125087976455688, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.35583464154892724, |
|
"grad_norm": 21.61579841438679, |
|
"learning_rate": 4.0668899744407567e-07, |
|
"logits/chosen": -1.3276041746139526, |
|
"logits/rejected": -1.2030179500579834, |
|
"logps/chosen": -326.64642333984375, |
|
"logps/rejected": -355.4222106933594, |
|
"loss": 0.5522, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.7154276967048645, |
|
"rewards/margins": 0.5184686779975891, |
|
"rewards/rejected": -1.2338963747024536, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.37676609105180536, |
|
"grad_norm": 23.330504007228544, |
|
"learning_rate": 3.920161866827889e-07, |
|
"logits/chosen": -1.4597452878952026, |
|
"logits/rejected": -1.335136890411377, |
|
"logps/chosen": -339.64739990234375, |
|
"logps/rejected": -370.96832275390625, |
|
"loss": 0.512, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.7294771075248718, |
|
"rewards/margins": 0.5706949234008789, |
|
"rewards/rejected": -1.3001720905303955, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.3976975405546834, |
|
"grad_norm": 20.457726835632624, |
|
"learning_rate": 3.765821230985757e-07, |
|
"logits/chosen": -1.406663179397583, |
|
"logits/rejected": -1.3718976974487305, |
|
"logps/chosen": -341.9677429199219, |
|
"logps/rejected": -390.94769287109375, |
|
"loss": 0.5365, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.8832741975784302, |
|
"rewards/margins": 0.5474223494529724, |
|
"rewards/rejected": -1.4306964874267578, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.4186289900575615, |
|
"grad_norm": 45.24370174913431, |
|
"learning_rate": 3.604695382782159e-07, |
|
"logits/chosen": -1.4117847681045532, |
|
"logits/rejected": -1.3832786083221436, |
|
"logps/chosen": -319.09490966796875, |
|
"logps/rejected": -393.9739074707031, |
|
"loss": 0.5489, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.7460958957672119, |
|
"rewards/margins": 0.5401177406311035, |
|
"rewards/rejected": -1.286213755607605, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.43956043956043955, |
|
"grad_norm": 21.103872725510552, |
|
"learning_rate": 3.4376480090239047e-07, |
|
"logits/chosen": -1.6419851779937744, |
|
"logits/rejected": -1.4086859226226807, |
|
"logps/chosen": -358.5858154296875, |
|
"logps/rejected": -359.4571228027344, |
|
"loss": 0.5454, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.5668836832046509, |
|
"rewards/margins": 0.5663009285926819, |
|
"rewards/rejected": -1.1331846714019775, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.4604918890633176, |
|
"grad_norm": 25.972160767293996, |
|
"learning_rate": 3.265574537815398e-07, |
|
"logits/chosen": -1.3075270652770996, |
|
"logits/rejected": -1.233934998512268, |
|
"logps/chosen": -309.5500793457031, |
|
"logps/rejected": -373.98223876953125, |
|
"loss": 0.5313, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.7272627353668213, |
|
"rewards/margins": 0.6237983703613281, |
|
"rewards/rejected": -1.351061224937439, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.48142333856619574, |
|
"grad_norm": 24.174538019841428, |
|
"learning_rate": 3.0893973387735683e-07, |
|
"logits/chosen": -1.3718620538711548, |
|
"logits/rejected": -1.1529138088226318, |
|
"logps/chosen": -327.77801513671875, |
|
"logps/rejected": -397.93426513671875, |
|
"loss": 0.5333, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.7222844958305359, |
|
"rewards/margins": 0.8920642733573914, |
|
"rewards/rejected": -1.6143487691879272, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5023547880690737, |
|
"grad_norm": 23.429710656248453, |
|
"learning_rate": 2.910060778827554e-07, |
|
"logits/chosen": -1.2215020656585693, |
|
"logits/rejected": -1.0080658197402954, |
|
"logps/chosen": -363.2150573730469, |
|
"logps/rejected": -406.48297119140625, |
|
"loss": 0.5051, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.8090720176696777, |
|
"rewards/margins": 0.6936684846878052, |
|
"rewards/rejected": -1.5027403831481934, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.5232862375719518, |
|
"grad_norm": 21.330001354953637, |
|
"learning_rate": 2.7285261601056697e-07, |
|
"logits/chosen": -1.1898741722106934, |
|
"logits/rejected": -0.9075394868850708, |
|
"logps/chosen": -360.69354248046875, |
|
"logps/rejected": -408.86773681640625, |
|
"loss": 0.5165, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -0.8794037103652954, |
|
"rewards/margins": 0.8506172299385071, |
|
"rewards/rejected": -1.7300208806991577, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.54421768707483, |
|
"grad_norm": 27.49314240274799, |
|
"learning_rate": 2.5457665670441937e-07, |
|
"logits/chosen": -1.4158910512924194, |
|
"logits/rejected": -1.3504579067230225, |
|
"logps/chosen": -341.8540344238281, |
|
"logps/rejected": -389.331787109375, |
|
"loss": 0.521, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.8279495239257812, |
|
"rewards/margins": 0.6607829332351685, |
|
"rewards/rejected": -1.4887326955795288, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.565149136577708, |
|
"grad_norm": 23.74859118455523, |
|
"learning_rate": 2.3627616503391812e-07, |
|
"logits/chosen": -1.4965870380401611, |
|
"logits/rejected": -1.3825056552886963, |
|
"logps/chosen": -359.75787353515625, |
|
"logps/rejected": -420.7306213378906, |
|
"loss": 0.5209, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.7315458655357361, |
|
"rewards/margins": 0.853925347328186, |
|
"rewards/rejected": -1.5854711532592773, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.5860805860805861, |
|
"grad_norm": 26.82116388601379, |
|
"learning_rate": 2.1804923757009882e-07, |
|
"logits/chosen": -1.37690007686615, |
|
"logits/rejected": -1.2291252613067627, |
|
"logps/chosen": -337.71820068359375, |
|
"logps/rejected": -374.30706787109375, |
|
"loss": 0.5319, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.7968527674674988, |
|
"rewards/margins": 0.657835066318512, |
|
"rewards/rejected": -1.4546878337860107, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.6070120355834642, |
|
"grad_norm": 22.28753319365226, |
|
"learning_rate": 1.9999357655598891e-07, |
|
"logits/chosen": -1.51715087890625, |
|
"logits/rejected": -1.3852065801620483, |
|
"logps/chosen": -324.92340087890625, |
|
"logps/rejected": -390.6435241699219, |
|
"loss": 0.5218, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.8132198452949524, |
|
"rewards/margins": 0.717974066734314, |
|
"rewards/rejected": -1.5311939716339111, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.6279434850863422, |
|
"grad_norm": 26.95577178592052, |
|
"learning_rate": 1.8220596619089573e-07, |
|
"logits/chosen": -1.4670779705047607, |
|
"logits/rejected": -1.297649621963501, |
|
"logps/chosen": -403.04913330078125, |
|
"logps/rejected": -428.7928161621094, |
|
"loss": 0.5019, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.8886338472366333, |
|
"rewards/margins": 0.7050989270210266, |
|
"rewards/rejected": -1.5937328338623047, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6488749345892203, |
|
"grad_norm": 24.13195804406926, |
|
"learning_rate": 1.647817538357072e-07, |
|
"logits/chosen": -1.4607212543487549, |
|
"logits/rejected": -1.3081916570663452, |
|
"logps/chosen": -383.5493469238281, |
|
"logps/rejected": -418.50146484375, |
|
"loss": 0.5041, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.8673523664474487, |
|
"rewards/margins": 0.8634257316589355, |
|
"rewards/rejected": -1.7307780981063843, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.6698063840920984, |
|
"grad_norm": 27.24325859713386, |
|
"learning_rate": 1.478143389201113e-07, |
|
"logits/chosen": -1.382027506828308, |
|
"logits/rejected": -1.1992334127426147, |
|
"logps/chosen": -328.2751770019531, |
|
"logps/rejected": -391.3571472167969, |
|
"loss": 0.4938, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.7451919317245483, |
|
"rewards/margins": 0.9388906359672546, |
|
"rewards/rejected": -1.6840826272964478, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.6907378335949764, |
|
"grad_norm": 27.216778454048367, |
|
"learning_rate": 1.3139467229135998e-07, |
|
"logits/chosen": -1.4924046993255615, |
|
"logits/rejected": -1.4578219652175903, |
|
"logps/chosen": -335.9745788574219, |
|
"logps/rejected": -404.7709655761719, |
|
"loss": 0.5135, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.7176355123519897, |
|
"rewards/margins": 0.7227510213851929, |
|
"rewards/rejected": -1.4403865337371826, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.7116692830978545, |
|
"grad_norm": 28.17952431515267, |
|
"learning_rate": 1.1561076868822755e-07, |
|
"logits/chosen": -1.3802480697631836, |
|
"logits/rejected": -1.277980089187622, |
|
"logps/chosen": -361.58062744140625, |
|
"logps/rejected": -420.6078186035156, |
|
"loss": 0.507, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.7701286673545837, |
|
"rewards/margins": 0.8021227717399597, |
|
"rewards/rejected": -1.572251558303833, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.7326007326007326, |
|
"grad_norm": 22.12939500514488, |
|
"learning_rate": 1.0054723495346482e-07, |
|
"logits/chosen": -1.3996691703796387, |
|
"logits/rejected": -1.2072045803070068, |
|
"logps/chosen": -330.85455322265625, |
|
"logps/rejected": -388.41876220703125, |
|
"loss": 0.478, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -0.7518431544303894, |
|
"rewards/margins": 0.8218573331832886, |
|
"rewards/rejected": -1.5737005472183228, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.7535321821036107, |
|
"grad_norm": 19.503424794677688, |
|
"learning_rate": 8.628481651367875e-08, |
|
"logits/chosen": -1.2794829607009888, |
|
"logits/rejected": -1.063652753829956, |
|
"logps/chosen": -383.0862121582031, |
|
"logps/rejected": -429.30682373046875, |
|
"loss": 0.5281, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.8063914179801941, |
|
"rewards/margins": 0.8900289535522461, |
|
"rewards/rejected": -1.696420431137085, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.7744636316064888, |
|
"grad_norm": 26.907332636471214, |
|
"learning_rate": 7.289996455765748e-08, |
|
"logits/chosen": -1.2005360126495361, |
|
"logits/rejected": -1.0235365629196167, |
|
"logps/chosen": -340.9770202636719, |
|
"logps/rejected": -393.8902587890625, |
|
"loss": 0.5067, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.7764495015144348, |
|
"rewards/margins": 0.8340295553207397, |
|
"rewards/rejected": -1.6104789972305298, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.7953950811093669, |
|
"grad_norm": 29.80515491184858, |
|
"learning_rate": 6.046442623320145e-08, |
|
"logits/chosen": -0.9497655034065247, |
|
"logits/rejected": -0.878097414970398, |
|
"logps/chosen": -346.8731994628906, |
|
"logps/rejected": -461.755126953125, |
|
"loss": 0.5097, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.0337427854537964, |
|
"rewards/margins": 0.9731820225715637, |
|
"rewards/rejected": -2.006925106048584, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.8163265306122449, |
|
"grad_norm": 24.841605469293125, |
|
"learning_rate": 4.904486005914027e-08, |
|
"logits/chosen": -1.209467887878418, |
|
"logits/rejected": -1.031672716140747, |
|
"logps/chosen": -429.25628662109375, |
|
"logps/rejected": -471.14501953125, |
|
"loss": 0.4938, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.9466843605041504, |
|
"rewards/margins": 0.7874827980995178, |
|
"rewards/rejected": -1.7341673374176025, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.837257980115123, |
|
"grad_norm": 30.69453014687722, |
|
"learning_rate": 3.8702478614051345e-08, |
|
"logits/chosen": -1.067712664604187, |
|
"logits/rejected": -0.8794199228286743, |
|
"logps/chosen": -337.4797058105469, |
|
"logps/rejected": -408.54315185546875, |
|
"loss": 0.5135, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.8585729598999023, |
|
"rewards/margins": 0.849725604057312, |
|
"rewards/rejected": -1.7082984447479248, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.858189429618001, |
|
"grad_norm": 59.96090300586583, |
|
"learning_rate": 2.9492720416985e-08, |
|
"logits/chosen": -1.267378807067871, |
|
"logits/rejected": -1.0611450672149658, |
|
"logps/chosen": -386.88427734375, |
|
"logps/rejected": -436.865234375, |
|
"loss": 0.5168, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.9016349911689758, |
|
"rewards/margins": 0.8452868461608887, |
|
"rewards/rejected": -1.7469215393066406, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.8791208791208791, |
|
"grad_norm": 28.101172543050826, |
|
"learning_rate": 2.1464952759020856e-08, |
|
"logits/chosen": -1.0743638277053833, |
|
"logits/rejected": -0.9579585790634155, |
|
"logps/chosen": -352.7454528808594, |
|
"logps/rejected": -439.5741271972656, |
|
"loss": 0.5045, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.0042593479156494, |
|
"rewards/margins": 0.8400126695632935, |
|
"rewards/rejected": -1.8442720174789429, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.9000523286237572, |
|
"grad_norm": 25.50390969555994, |
|
"learning_rate": 1.4662207078575684e-08, |
|
"logits/chosen": -1.0834466218948364, |
|
"logits/rejected": -0.8814092874526978, |
|
"logps/chosen": -379.11468505859375, |
|
"logps/rejected": -440.3974609375, |
|
"loss": 0.4848, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.9338687062263489, |
|
"rewards/margins": 0.8329343795776367, |
|
"rewards/rejected": -1.7668030261993408, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.9209837781266352, |
|
"grad_norm": 24.719240527877194, |
|
"learning_rate": 9.12094829893642e-09, |
|
"logits/chosen": -1.2495243549346924, |
|
"logits/rejected": -1.0566151142120361, |
|
"logps/chosen": -346.22894287109375, |
|
"logps/rejected": -386.73565673828125, |
|
"loss": 0.4991, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.8856858015060425, |
|
"rewards/margins": 0.7673971056938171, |
|
"rewards/rejected": -1.653083086013794, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.9419152276295133, |
|
"grad_norm": 27.94864423555306, |
|
"learning_rate": 4.8708793644441086e-09, |
|
"logits/chosen": -0.9761242866516113, |
|
"logits/rejected": -0.8453343510627747, |
|
"logps/chosen": -365.3951416015625, |
|
"logps/rejected": -443.78924560546875, |
|
"loss": 0.5018, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.9703672528266907, |
|
"rewards/margins": 0.9006422162055969, |
|
"rewards/rejected": -1.8710094690322876, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.9628466771323915, |
|
"grad_norm": 44.89769499800862, |
|
"learning_rate": 1.9347820230782295e-09, |
|
"logits/chosen": -1.1586068868637085, |
|
"logits/rejected": -0.8781611323356628, |
|
"logps/chosen": -361.3426818847656, |
|
"logps/rejected": -399.6344299316406, |
|
"loss": 0.5109, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.9305132031440735, |
|
"rewards/margins": 0.8112867474555969, |
|
"rewards/rejected": -1.7417999505996704, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.9837781266352695, |
|
"grad_norm": 22.89147164994492, |
|
"learning_rate": 3.2839470889836627e-10, |
|
"logits/chosen": -1.140649676322937, |
|
"logits/rejected": -0.9729598164558411, |
|
"logps/chosen": -378.35162353515625, |
|
"logps/rejected": -425.26666259765625, |
|
"loss": 0.4908, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.828622043132782, |
|
"rewards/margins": 0.7690635919570923, |
|
"rewards/rejected": -1.59768545627594, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.9984301412872841, |
|
"eval_logits/chosen": -0.9970575571060181, |
|
"eval_logits/rejected": -0.8678091764450073, |
|
"eval_logps/chosen": -339.41973876953125, |
|
"eval_logps/rejected": -429.93389892578125, |
|
"eval_loss": 0.508111834526062, |
|
"eval_rewards/accuracies": 0.78125, |
|
"eval_rewards/chosen": -0.8088568449020386, |
|
"eval_rewards/margins": 0.9086635112762451, |
|
"eval_rewards/rejected": -1.7175202369689941, |
|
"eval_runtime": 167.8174, |
|
"eval_samples_per_second": 11.918, |
|
"eval_steps_per_second": 0.191, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.9984301412872841, |
|
"step": 477, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5463379294117542, |
|
"train_runtime": 13584.2132, |
|
"train_samples_per_second": 4.5, |
|
"train_steps_per_second": 0.035 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 477, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|