|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9984301412872841, |
|
"eval_steps": 500, |
|
"global_step": 477, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0020931449502878076, |
|
"grad_norm": 9.420194644919581, |
|
"learning_rate": 1.0416666666666666e-08, |
|
"logits/chosen": -2.6372292041778564, |
|
"logits/rejected": -2.557051181793213, |
|
"logps/chosen": -303.9394836425781, |
|
"logps/rejected": -351.8717041015625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.020931449502878074, |
|
"grad_norm": 9.776596281600654, |
|
"learning_rate": 1.0416666666666667e-07, |
|
"logits/chosen": -2.366276741027832, |
|
"logits/rejected": -2.316371202468872, |
|
"logps/chosen": -292.4997253417969, |
|
"logps/rejected": -264.6174621582031, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.4583333432674408, |
|
"rewards/chosen": 1.6382920875912532e-05, |
|
"rewards/margins": 7.999181252671406e-05, |
|
"rewards/rejected": -6.360887346090749e-05, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.04186289900575615, |
|
"grad_norm": 8.872984148971444, |
|
"learning_rate": 2.0833333333333333e-07, |
|
"logits/chosen": -2.471057653427124, |
|
"logits/rejected": -2.376948833465576, |
|
"logps/chosen": -296.95635986328125, |
|
"logps/rejected": -272.93511962890625, |
|
"loss": 0.6917, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.005188685841858387, |
|
"rewards/margins": 0.0027170330286026, |
|
"rewards/rejected": 0.0024716525804251432, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06279434850863422, |
|
"grad_norm": 8.9584085433827, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": -2.450611114501953, |
|
"logits/rejected": -2.3455896377563477, |
|
"logps/chosen": -278.2935791015625, |
|
"logps/rejected": -238.2044677734375, |
|
"loss": 0.6862, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.03269987553358078, |
|
"rewards/margins": 0.013539234176278114, |
|
"rewards/rejected": 0.019160641357302666, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0837257980115123, |
|
"grad_norm": 9.348192963564985, |
|
"learning_rate": 4.1666666666666667e-07, |
|
"logits/chosen": -2.361204147338867, |
|
"logits/rejected": -2.2996718883514404, |
|
"logps/chosen": -251.10452270507812, |
|
"logps/rejected": -253.61843872070312, |
|
"loss": 0.6712, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.04305989295244217, |
|
"rewards/margins": 0.048449400812387466, |
|
"rewards/rejected": -0.00538951251655817, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.10465724751439037, |
|
"grad_norm": 10.542523800768048, |
|
"learning_rate": 4.999731868769026e-07, |
|
"logits/chosen": -2.4171504974365234, |
|
"logits/rejected": -2.3192880153656006, |
|
"logps/chosen": -267.4609375, |
|
"logps/rejected": -273.1936950683594, |
|
"loss": 0.6466, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": 0.008090761490166187, |
|
"rewards/margins": 0.11023788154125214, |
|
"rewards/rejected": -0.10214712470769882, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.12558869701726844, |
|
"grad_norm": 19.049765193058207, |
|
"learning_rate": 4.990353313429303e-07, |
|
"logits/chosen": -2.451169490814209, |
|
"logits/rejected": -2.373406410217285, |
|
"logps/chosen": -257.0067138671875, |
|
"logps/rejected": -271.317626953125, |
|
"loss": 0.6146, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.11953003704547882, |
|
"rewards/margins": 0.22274689376354218, |
|
"rewards/rejected": -0.342276930809021, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.14652014652014653, |
|
"grad_norm": 17.106774713296346, |
|
"learning_rate": 4.967625656594781e-07, |
|
"logits/chosen": -2.3775253295898438, |
|
"logits/rejected": -2.3367419242858887, |
|
"logps/chosen": -305.51055908203125, |
|
"logps/rejected": -324.1693115234375, |
|
"loss": 0.5932, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.2125539779663086, |
|
"rewards/margins": 0.3053613603115082, |
|
"rewards/rejected": -0.5179153680801392, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.1674515960230246, |
|
"grad_norm": 22.00744385125915, |
|
"learning_rate": 4.93167072587771e-07, |
|
"logits/chosen": -2.502345561981201, |
|
"logits/rejected": -2.377171277999878, |
|
"logps/chosen": -355.249267578125, |
|
"logps/rejected": -326.4585876464844, |
|
"loss": 0.5855, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.3869813084602356, |
|
"rewards/margins": 0.3918095827102661, |
|
"rewards/rejected": -0.7787908315658569, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.18838304552590268, |
|
"grad_norm": 16.53408344392586, |
|
"learning_rate": 4.882681251368548e-07, |
|
"logits/chosen": -2.398308277130127, |
|
"logits/rejected": -2.338383197784424, |
|
"logps/chosen": -301.67816162109375, |
|
"logps/rejected": -352.0071716308594, |
|
"loss": 0.5635, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.5751432180404663, |
|
"rewards/margins": 0.4532225728034973, |
|
"rewards/rejected": -1.0283657312393188, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.20931449502878074, |
|
"grad_norm": 20.576427446405972, |
|
"learning_rate": 4.820919832540181e-07, |
|
"logits/chosen": -2.3533272743225098, |
|
"logits/rejected": -2.2900137901306152, |
|
"logps/chosen": -348.10467529296875, |
|
"logps/rejected": -388.0940246582031, |
|
"loss": 0.5631, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.6305854320526123, |
|
"rewards/margins": 0.5525855422019958, |
|
"rewards/rejected": -1.183171033859253, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2302459445316588, |
|
"grad_norm": 20.106003054090923, |
|
"learning_rate": 4.7467175306295647e-07, |
|
"logits/chosen": -2.4152920246124268, |
|
"logits/rejected": -2.345858573913574, |
|
"logps/chosen": -330.9453125, |
|
"logps/rejected": -357.2662353515625, |
|
"loss": 0.575, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.4834519028663635, |
|
"rewards/margins": 0.4216728210449219, |
|
"rewards/rejected": -0.9051246643066406, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.25117739403453687, |
|
"grad_norm": 17.544100089861768, |
|
"learning_rate": 4.6604720940421207e-07, |
|
"logits/chosen": -2.3248140811920166, |
|
"logits/rejected": -2.3209547996520996, |
|
"logps/chosen": -308.89434814453125, |
|
"logps/rejected": -358.4286193847656, |
|
"loss": 0.5561, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.4968286156654358, |
|
"rewards/margins": 0.5540732145309448, |
|
"rewards/rejected": -1.0509017705917358, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.272108843537415, |
|
"grad_norm": 17.72276209068045, |
|
"learning_rate": 4.5626458262912735e-07, |
|
"logits/chosen": -2.3460891246795654, |
|
"logits/rejected": -2.315985918045044, |
|
"logps/chosen": -341.44708251953125, |
|
"logps/rejected": -379.0929260253906, |
|
"loss": 0.536, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.7040597200393677, |
|
"rewards/margins": 0.48738259077072144, |
|
"rewards/rejected": -1.1914422512054443, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.29304029304029305, |
|
"grad_norm": 21.32563247174665, |
|
"learning_rate": 4.453763107901675e-07, |
|
"logits/chosen": -2.328991174697876, |
|
"logits/rejected": -2.274400472640991, |
|
"logps/chosen": -354.708984375, |
|
"logps/rejected": -377.31011962890625, |
|
"loss": 0.5535, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.506158173084259, |
|
"rewards/margins": 0.6448914408683777, |
|
"rewards/rejected": -1.1510496139526367, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.3139717425431711, |
|
"grad_norm": 25.439845700518678, |
|
"learning_rate": 4.3344075855595097e-07, |
|
"logits/chosen": -2.2812905311584473, |
|
"logits/rejected": -2.228199005126953, |
|
"logps/chosen": -332.77044677734375, |
|
"logps/rejected": -350.35162353515625, |
|
"loss": 0.5502, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.6236374974250793, |
|
"rewards/margins": 0.47987040877342224, |
|
"rewards/rejected": -1.1035077571868896, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.3349031920460492, |
|
"grad_norm": 22.437229700307615, |
|
"learning_rate": 4.2052190435769554e-07, |
|
"logits/chosen": -2.3055479526519775, |
|
"logits/rejected": -2.2253096103668213, |
|
"logps/chosen": -341.20880126953125, |
|
"logps/rejected": -384.2608337402344, |
|
"loss": 0.5544, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.743469774723053, |
|
"rewards/margins": 0.6333221793174744, |
|
"rewards/rejected": -1.3767919540405273, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.35583464154892724, |
|
"grad_norm": 22.375265306667423, |
|
"learning_rate": 4.0668899744407567e-07, |
|
"logits/chosen": -2.170342206954956, |
|
"logits/rejected": -2.1414666175842285, |
|
"logps/chosen": -315.53131103515625, |
|
"logps/rejected": -349.3824462890625, |
|
"loss": 0.5418, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.647168755531311, |
|
"rewards/margins": 0.554858386516571, |
|
"rewards/rejected": -1.2020272016525269, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.37676609105180536, |
|
"grad_norm": 22.685803060689793, |
|
"learning_rate": 3.920161866827889e-07, |
|
"logits/chosen": -2.2532403469085693, |
|
"logits/rejected": -2.2120420932769775, |
|
"logps/chosen": -348.15325927734375, |
|
"logps/rejected": -376.5264587402344, |
|
"loss": 0.5167, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.8467988967895508, |
|
"rewards/margins": 0.5318444967269897, |
|
"rewards/rejected": -1.3786433935165405, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.3976975405546834, |
|
"grad_norm": 23.346233993869507, |
|
"learning_rate": 3.765821230985757e-07, |
|
"logits/chosen": -2.181307315826416, |
|
"logits/rejected": -2.1494665145874023, |
|
"logps/chosen": -329.13287353515625, |
|
"logps/rejected": -377.4722595214844, |
|
"loss": 0.5433, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.7779133915901184, |
|
"rewards/margins": 0.5419718027114868, |
|
"rewards/rejected": -1.319885015487671, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.4186289900575615, |
|
"grad_norm": 59.244174530672744, |
|
"learning_rate": 3.604695382782159e-07, |
|
"logits/chosen": -2.183567762374878, |
|
"logits/rejected": -2.1823012828826904, |
|
"logps/chosen": -307.64337158203125, |
|
"logps/rejected": -381.5365905761719, |
|
"loss": 0.5493, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.6649466753005981, |
|
"rewards/margins": 0.5270031094551086, |
|
"rewards/rejected": -1.1919496059417725, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.43956043956043955, |
|
"grad_norm": 21.4577312447254, |
|
"learning_rate": 3.4376480090239047e-07, |
|
"logits/chosen": -2.2622885704040527, |
|
"logits/rejected": -2.175058603286743, |
|
"logps/chosen": -374.94775390625, |
|
"logps/rejected": -381.5616760253906, |
|
"loss": 0.5386, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.7791308164596558, |
|
"rewards/margins": 0.5941972732543945, |
|
"rewards/rejected": -1.3733280897140503, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.4604918890633176, |
|
"grad_norm": 27.605297631673444, |
|
"learning_rate": 3.265574537815398e-07, |
|
"logits/chosen": -2.142841339111328, |
|
"logits/rejected": -2.153688907623291, |
|
"logps/chosen": -322.2380065917969, |
|
"logps/rejected": -390.0839538574219, |
|
"loss": 0.5304, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.9062248468399048, |
|
"rewards/margins": 0.6386878490447998, |
|
"rewards/rejected": -1.544912576675415, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.48142333856619574, |
|
"grad_norm": 25.98305182876635, |
|
"learning_rate": 3.0893973387735683e-07, |
|
"logits/chosen": -2.246525526046753, |
|
"logits/rejected": -2.182516098022461, |
|
"logps/chosen": -357.104736328125, |
|
"logps/rejected": -429.77337646484375, |
|
"loss": 0.5391, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.0580464601516724, |
|
"rewards/margins": 0.896924614906311, |
|
"rewards/rejected": -1.9549709558486938, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5023547880690737, |
|
"grad_norm": 21.13667543588383, |
|
"learning_rate": 2.910060778827554e-07, |
|
"logits/chosen": -2.2703707218170166, |
|
"logits/rejected": -2.204939126968384, |
|
"logps/chosen": -368.1454162597656, |
|
"logps/rejected": -415.8702697753906, |
|
"loss": 0.5053, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.9035312533378601, |
|
"rewards/margins": 0.7343918085098267, |
|
"rewards/rejected": -1.637923002243042, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.5232862375719518, |
|
"grad_norm": 25.84731298493337, |
|
"learning_rate": 2.7285261601056697e-07, |
|
"logits/chosen": -2.19517183303833, |
|
"logits/rejected": -2.1099765300750732, |
|
"logps/chosen": -365.8097229003906, |
|
"logps/rejected": -419.41497802734375, |
|
"loss": 0.5115, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.9770700335502625, |
|
"rewards/margins": 0.9047032594680786, |
|
"rewards/rejected": -1.8817732334136963, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.54421768707483, |
|
"grad_norm": 30.85559229261849, |
|
"learning_rate": 2.5457665670441937e-07, |
|
"logits/chosen": -1.9430633783340454, |
|
"logits/rejected": -1.9308143854141235, |
|
"logps/chosen": -361.0708923339844, |
|
"logps/rejected": -423.67242431640625, |
|
"loss": 0.5074, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.0502084493637085, |
|
"rewards/margins": 0.7874974012374878, |
|
"rewards/rejected": -1.8377059698104858, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.565149136577708, |
|
"grad_norm": 24.551312599118287, |
|
"learning_rate": 2.3627616503391812e-07, |
|
"logits/chosen": -1.6331418752670288, |
|
"logits/rejected": -1.550295352935791, |
|
"logps/chosen": -384.88031005859375, |
|
"logps/rejected": -451.5082092285156, |
|
"loss": 0.5198, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.0247470140457153, |
|
"rewards/margins": 0.8930918574333191, |
|
"rewards/rejected": -1.9178390502929688, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.5860805860805861, |
|
"grad_norm": 29.74692013409924, |
|
"learning_rate": 2.1804923757009882e-07, |
|
"logits/chosen": -1.4030331373214722, |
|
"logits/rejected": -1.2896840572357178, |
|
"logps/chosen": -341.4692077636719, |
|
"logps/rejected": -389.22454833984375, |
|
"loss": 0.5213, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.9064887762069702, |
|
"rewards/margins": 0.7257590293884277, |
|
"rewards/rejected": -1.6322479248046875, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.6070120355834642, |
|
"grad_norm": 26.977158157494642, |
|
"learning_rate": 1.9999357655598891e-07, |
|
"logits/chosen": -1.4508934020996094, |
|
"logits/rejected": -1.3269082307815552, |
|
"logps/chosen": -345.9167175292969, |
|
"logps/rejected": -418.6991271972656, |
|
"loss": 0.5151, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.0557668209075928, |
|
"rewards/margins": 0.7933701276779175, |
|
"rewards/rejected": -1.8491369485855103, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.6279434850863422, |
|
"grad_norm": 25.588639007041994, |
|
"learning_rate": 1.8220596619089573e-07, |
|
"logits/chosen": -1.3906519412994385, |
|
"logits/rejected": -1.234431505203247, |
|
"logps/chosen": -424.56378173828125, |
|
"logps/rejected": -464.31463623046875, |
|
"loss": 0.488, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.1725590229034424, |
|
"rewards/margins": 0.8173145055770874, |
|
"rewards/rejected": -1.9898736476898193, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6488749345892203, |
|
"grad_norm": 35.84688001545634, |
|
"learning_rate": 1.647817538357072e-07, |
|
"logits/chosen": -1.2901729345321655, |
|
"logits/rejected": -1.141137719154358, |
|
"logps/chosen": -441.1419982910156, |
|
"logps/rejected": -498.540283203125, |
|
"loss": 0.4995, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -1.4990507364273071, |
|
"rewards/margins": 1.0634124279022217, |
|
"rewards/rejected": -2.5624632835388184, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.6698063840920984, |
|
"grad_norm": 37.73844743050267, |
|
"learning_rate": 1.478143389201113e-07, |
|
"logits/chosen": -1.053849458694458, |
|
"logits/rejected": -0.8557470440864563, |
|
"logps/chosen": -397.40447998046875, |
|
"logps/rejected": -475.0392150878906, |
|
"loss": 0.4813, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.461068868637085, |
|
"rewards/margins": 1.0757315158843994, |
|
"rewards/rejected": -2.5368001461029053, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.6907378335949764, |
|
"grad_norm": 26.325601270856676, |
|
"learning_rate": 1.3139467229135998e-07, |
|
"logits/chosen": -1.1438688039779663, |
|
"logits/rejected": -1.1061335802078247, |
|
"logps/chosen": -375.85955810546875, |
|
"logps/rejected": -456.5414123535156, |
|
"loss": 0.4993, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.1525086164474487, |
|
"rewards/margins": 0.8357551693916321, |
|
"rewards/rejected": -1.988263726234436, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.7116692830978545, |
|
"grad_norm": 37.090497017079386, |
|
"learning_rate": 1.1561076868822755e-07, |
|
"logits/chosen": -0.9682319760322571, |
|
"logits/rejected": -0.8497031927108765, |
|
"logps/chosen": -403.8167419433594, |
|
"logps/rejected": -476.2872009277344, |
|
"loss": 0.4848, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.249403953552246, |
|
"rewards/margins": 0.9320135116577148, |
|
"rewards/rejected": -2.181417226791382, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.7326007326007326, |
|
"grad_norm": 29.0205923314937, |
|
"learning_rate": 1.0054723495346482e-07, |
|
"logits/chosen": -0.8897636532783508, |
|
"logits/rejected": -0.6620567440986633, |
|
"logps/chosen": -401.22882080078125, |
|
"logps/rejected": -475.3289489746094, |
|
"loss": 0.4573, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -1.493096113204956, |
|
"rewards/margins": 0.9968076944351196, |
|
"rewards/rejected": -2.489903688430786, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.7535321821036107, |
|
"grad_norm": 28.465041930216806, |
|
"learning_rate": 8.628481651367875e-08, |
|
"logits/chosen": -0.8191855549812317, |
|
"logits/rejected": -0.5711785554885864, |
|
"logps/chosen": -448.5098571777344, |
|
"logps/rejected": -526.4882202148438, |
|
"loss": 0.5036, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -1.5041682720184326, |
|
"rewards/margins": 1.1857599020004272, |
|
"rewards/rejected": -2.6899282932281494, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.7744636316064888, |
|
"grad_norm": 29.111893282608257, |
|
"learning_rate": 7.289996455765748e-08, |
|
"logits/chosen": -0.8937414288520813, |
|
"logits/rejected": -0.7008529901504517, |
|
"logps/chosen": -409.792724609375, |
|
"logps/rejected": -478.1075134277344, |
|
"loss": 0.5009, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -1.5043065547943115, |
|
"rewards/margins": 0.9667744636535645, |
|
"rewards/rejected": -2.471081018447876, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.7953950811093669, |
|
"grad_norm": 29.70871758620533, |
|
"learning_rate": 6.046442623320145e-08, |
|
"logits/chosen": -0.675798237323761, |
|
"logits/rejected": -0.5979479551315308, |
|
"logps/chosen": -405.40380859375, |
|
"logps/rejected": -542.0068969726562, |
|
"loss": 0.4974, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.6684430837631226, |
|
"rewards/margins": 1.1851465702056885, |
|
"rewards/rejected": -2.8535895347595215, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.8163265306122449, |
|
"grad_norm": 36.269601768258234, |
|
"learning_rate": 4.904486005914027e-08, |
|
"logits/chosen": -0.9742690920829773, |
|
"logits/rejected": -0.7953445315361023, |
|
"logps/chosen": -488.8130798339844, |
|
"logps/rejected": -539.947021484375, |
|
"loss": 0.4893, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.606419324874878, |
|
"rewards/margins": 0.8702549934387207, |
|
"rewards/rejected": -2.4766743183135986, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.837257980115123, |
|
"grad_norm": 30.05583737574385, |
|
"learning_rate": 3.8702478614051345e-08, |
|
"logits/chosen": -0.8497405052185059, |
|
"logits/rejected": -0.6598803400993347, |
|
"logps/chosen": -398.9614562988281, |
|
"logps/rejected": -482.28302001953125, |
|
"loss": 0.5016, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.5102481842041016, |
|
"rewards/margins": 0.9544359445571899, |
|
"rewards/rejected": -2.464684009552002, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.858189429618001, |
|
"grad_norm": 27.172370132213754, |
|
"learning_rate": 2.9492720416985e-08, |
|
"logits/chosen": -1.1038849353790283, |
|
"logits/rejected": -0.9001197814941406, |
|
"logps/chosen": -444.0462951660156, |
|
"logps/rejected": -514.1170654296875, |
|
"loss": 0.5039, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.5119292736053467, |
|
"rewards/margins": 1.0207693576812744, |
|
"rewards/rejected": -2.5326988697052, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.8791208791208791, |
|
"grad_norm": 30.512211155868616, |
|
"learning_rate": 2.1464952759020856e-08, |
|
"logits/chosen": -0.9595249891281128, |
|
"logits/rejected": -0.8507660627365112, |
|
"logps/chosen": -405.39862060546875, |
|
"logps/rejected": -510.281494140625, |
|
"loss": 0.4905, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.5675805807113647, |
|
"rewards/margins": 1.0063538551330566, |
|
"rewards/rejected": -2.573934555053711, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.9000523286237572, |
|
"grad_norm": 28.009936036028865, |
|
"learning_rate": 1.4662207078575684e-08, |
|
"logits/chosen": -0.9552485346794128, |
|
"logits/rejected": -0.760822057723999, |
|
"logps/chosen": -430.9397888183594, |
|
"logps/rejected": -507.698486328125, |
|
"loss": 0.4751, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.497207522392273, |
|
"rewards/margins": 0.9993329048156738, |
|
"rewards/rejected": -2.4965405464172363, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.9209837781266352, |
|
"grad_norm": 29.996752006101683, |
|
"learning_rate": 9.12094829893642e-09, |
|
"logits/chosen": -1.0940220355987549, |
|
"logits/rejected": -0.9033029675483704, |
|
"logps/chosen": -399.84954833984375, |
|
"logps/rejected": -462.63153076171875, |
|
"loss": 0.4862, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.4471704959869385, |
|
"rewards/margins": 0.9819791913032532, |
|
"rewards/rejected": -2.429149866104126, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.9419152276295133, |
|
"grad_norm": 30.711855612709723, |
|
"learning_rate": 4.8708793644441086e-09, |
|
"logits/chosen": -0.8034582138061523, |
|
"logits/rejected": -0.6741777658462524, |
|
"logps/chosen": -414.15447998046875, |
|
"logps/rejected": -519.9739990234375, |
|
"loss": 0.4883, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.5021246671676636, |
|
"rewards/margins": 1.1551309823989868, |
|
"rewards/rejected": -2.6572554111480713, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.9628466771323915, |
|
"grad_norm": 40.76374555079479, |
|
"learning_rate": 1.9347820230782295e-09, |
|
"logits/chosen": -0.977044403553009, |
|
"logits/rejected": -0.7038423418998718, |
|
"logps/chosen": -423.5811462402344, |
|
"logps/rejected": -475.4755859375, |
|
"loss": 0.4981, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.5915157794952393, |
|
"rewards/margins": 0.929116427898407, |
|
"rewards/rejected": -2.520632266998291, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.9837781266352695, |
|
"grad_norm": 26.303634709848364, |
|
"learning_rate": 3.2839470889836627e-10, |
|
"logits/chosen": -0.9590628743171692, |
|
"logits/rejected": -0.8002158403396606, |
|
"logps/chosen": -434.5794982910156, |
|
"logps/rejected": -501.19775390625, |
|
"loss": 0.4747, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.4538209438323975, |
|
"rewards/margins": 0.9469608068466187, |
|
"rewards/rejected": -2.4007816314697266, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.9984301412872841, |
|
"eval_logits/chosen": -0.8246170282363892, |
|
"eval_logits/rejected": -0.6947768330574036, |
|
"eval_logps/chosen": -401.7669677734375, |
|
"eval_logps/rejected": -508.43353271484375, |
|
"eval_loss": 0.4935465157032013, |
|
"eval_rewards/accuracies": 0.78515625, |
|
"eval_rewards/chosen": -1.4744462966918945, |
|
"eval_rewards/margins": 1.0516830682754517, |
|
"eval_rewards/rejected": -2.5261292457580566, |
|
"eval_runtime": 167.9754, |
|
"eval_samples_per_second": 11.907, |
|
"eval_steps_per_second": 0.191, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.9984301412872841, |
|
"step": 477, |
|
"total_flos": 0.0, |
|
"train_loss": 0.537537020957195, |
|
"train_runtime": 15566.6343, |
|
"train_samples_per_second": 3.927, |
|
"train_steps_per_second": 0.031 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 477, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|