|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9968652037617555, |
|
"eval_steps": 500, |
|
"global_step": 159, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.006269592476489028, |
|
"grad_norm": 11.162558389320719, |
|
"learning_rate": 3.125e-08, |
|
"logits/chosen": -2.832691192626953, |
|
"logits/rejected": -2.789004325866699, |
|
"logps/chosen": -379.2402648925781, |
|
"logps/pi_response": -84.25662994384766, |
|
"logps/ref_response": -84.25662994384766, |
|
"logps/rejected": -192.58773803710938, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.06269592476489028, |
|
"grad_norm": 9.199296960060826, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": -2.779388427734375, |
|
"logits/rejected": -2.744753837585449, |
|
"logps/chosen": -243.76174926757812, |
|
"logps/pi_response": -68.54817962646484, |
|
"logps/ref_response": -68.44412231445312, |
|
"logps/rejected": -167.88645935058594, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.4444444477558136, |
|
"rewards/chosen": 0.0011128478217869997, |
|
"rewards/margins": 0.0005714390426874161, |
|
"rewards/rejected": 0.0005414087790995836, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.12539184952978055, |
|
"grad_norm": 9.622073702330978, |
|
"learning_rate": 4.990353313429303e-07, |
|
"logits/chosen": -2.7916176319122314, |
|
"logits/rejected": -2.7705063819885254, |
|
"logps/chosen": -232.59619140625, |
|
"logps/pi_response": -72.38710021972656, |
|
"logps/ref_response": -71.28221893310547, |
|
"logps/rejected": -165.4346466064453, |
|
"loss": 0.6786, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.030751097947359085, |
|
"rewards/margins": 0.021662823855876923, |
|
"rewards/rejected": 0.009088275022804737, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.18808777429467086, |
|
"grad_norm": 8.334770075469805, |
|
"learning_rate": 4.882681251368548e-07, |
|
"logits/chosen": -2.7269303798675537, |
|
"logits/rejected": -2.6703150272369385, |
|
"logps/chosen": -238.45944213867188, |
|
"logps/pi_response": -80.09962463378906, |
|
"logps/ref_response": -64.93635559082031, |
|
"logps/rejected": -164.25949096679688, |
|
"loss": 0.6524, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.02072182670235634, |
|
"rewards/margins": 0.1209292858839035, |
|
"rewards/rejected": -0.10020747035741806, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.2507836990595611, |
|
"grad_norm": 9.807608483085172, |
|
"learning_rate": 4.6604720940421207e-07, |
|
"logits/chosen": -2.65498948097229, |
|
"logits/rejected": -2.608666181564331, |
|
"logps/chosen": -271.5018615722656, |
|
"logps/pi_response": -114.51536560058594, |
|
"logps/ref_response": -69.87471008300781, |
|
"logps/rejected": -211.77072143554688, |
|
"loss": 0.6131, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.11142440885305405, |
|
"rewards/margins": 0.21302208304405212, |
|
"rewards/rejected": -0.32444649934768677, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.31347962382445144, |
|
"grad_norm": 14.498018136229003, |
|
"learning_rate": 4.3344075855595097e-07, |
|
"logits/chosen": -2.6647868156433105, |
|
"logits/rejected": -2.6350674629211426, |
|
"logps/chosen": -267.47705078125, |
|
"logps/pi_response": -130.49932861328125, |
|
"logps/ref_response": -67.23551177978516, |
|
"logps/rejected": -212.48483276367188, |
|
"loss": 0.5801, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.21161659061908722, |
|
"rewards/margins": 0.35675129294395447, |
|
"rewards/rejected": -0.5683678388595581, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.3761755485893417, |
|
"grad_norm": 13.440157501390912, |
|
"learning_rate": 3.920161866827889e-07, |
|
"logits/chosen": -2.6458613872528076, |
|
"logits/rejected": -2.5944952964782715, |
|
"logps/chosen": -237.12710571289062, |
|
"logps/pi_response": -148.73211669921875, |
|
"logps/ref_response": -64.54133605957031, |
|
"logps/rejected": -239.72702026367188, |
|
"loss": 0.5536, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.36507314443588257, |
|
"rewards/margins": 0.4184595048427582, |
|
"rewards/rejected": -0.7835326194763184, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.438871473354232, |
|
"grad_norm": 15.982610593807811, |
|
"learning_rate": 3.4376480090239047e-07, |
|
"logits/chosen": -2.680788040161133, |
|
"logits/rejected": -2.6525492668151855, |
|
"logps/chosen": -269.2456970214844, |
|
"logps/pi_response": -195.72213745117188, |
|
"logps/ref_response": -69.08720397949219, |
|
"logps/rejected": -306.4203186035156, |
|
"loss": 0.5003, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.5016717910766602, |
|
"rewards/margins": 0.7233041524887085, |
|
"rewards/rejected": -1.224975824356079, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.5015673981191222, |
|
"grad_norm": 22.558501492450485, |
|
"learning_rate": 2.910060778827554e-07, |
|
"logits/chosen": -2.7134718894958496, |
|
"logits/rejected": -2.6782937049865723, |
|
"logps/chosen": -313.21136474609375, |
|
"logps/pi_response": -213.4176788330078, |
|
"logps/ref_response": -72.85678100585938, |
|
"logps/rejected": -303.14056396484375, |
|
"loss": 0.5063, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -0.6677788496017456, |
|
"rewards/margins": 0.8193937540054321, |
|
"rewards/rejected": -1.4871724843978882, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.5642633228840125, |
|
"grad_norm": 21.710845718415612, |
|
"learning_rate": 2.3627616503391812e-07, |
|
"logits/chosen": -2.634028911590576, |
|
"logits/rejected": -2.608798027038574, |
|
"logps/chosen": -329.37799072265625, |
|
"logps/pi_response": -211.5565948486328, |
|
"logps/ref_response": -70.06621551513672, |
|
"logps/rejected": -340.3314514160156, |
|
"loss": 0.4772, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -0.7806206941604614, |
|
"rewards/margins": 0.7901986837387085, |
|
"rewards/rejected": -1.57081937789917, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.6269592476489029, |
|
"grad_norm": 27.570197538418466, |
|
"learning_rate": 1.8220596619089573e-07, |
|
"logits/chosen": -2.6406660079956055, |
|
"logits/rejected": -2.584998369216919, |
|
"logps/chosen": -326.03961181640625, |
|
"logps/pi_response": -226.829833984375, |
|
"logps/ref_response": -68.18948364257812, |
|
"logps/rejected": -311.16668701171875, |
|
"loss": 0.4812, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -0.771049976348877, |
|
"rewards/margins": 0.8004252314567566, |
|
"rewards/rejected": -1.5714751482009888, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.6896551724137931, |
|
"grad_norm": 25.855890266739543, |
|
"learning_rate": 1.3139467229135998e-07, |
|
"logits/chosen": -2.5621211528778076, |
|
"logits/rejected": -2.5297341346740723, |
|
"logps/chosen": -303.0425720214844, |
|
"logps/pi_response": -235.69052124023438, |
|
"logps/ref_response": -61.66025924682617, |
|
"logps/rejected": -347.60089111328125, |
|
"loss": 0.4747, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.0712878704071045, |
|
"rewards/margins": 0.6884299516677856, |
|
"rewards/rejected": -1.7597179412841797, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.7523510971786834, |
|
"grad_norm": 25.63885329215509, |
|
"learning_rate": 8.628481651367875e-08, |
|
"logits/chosen": -2.5914602279663086, |
|
"logits/rejected": -2.5509674549102783, |
|
"logps/chosen": -330.34613037109375, |
|
"logps/pi_response": -255.2554473876953, |
|
"logps/ref_response": -64.22006225585938, |
|
"logps/rejected": -360.2675476074219, |
|
"loss": 0.4799, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.0213677883148193, |
|
"rewards/margins": 1.0237469673156738, |
|
"rewards/rejected": -2.045114517211914, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.8150470219435737, |
|
"grad_norm": 26.281388822149008, |
|
"learning_rate": 4.904486005914027e-08, |
|
"logits/chosen": -2.5695884227752686, |
|
"logits/rejected": -2.527682065963745, |
|
"logps/chosen": -339.78851318359375, |
|
"logps/pi_response": -252.6923370361328, |
|
"logps/ref_response": -70.67754364013672, |
|
"logps/rejected": -348.1340026855469, |
|
"loss": 0.4568, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.8407719731330872, |
|
"rewards/margins": 1.1005146503448486, |
|
"rewards/rejected": -1.941286325454712, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.877742946708464, |
|
"grad_norm": 21.11421165620455, |
|
"learning_rate": 2.1464952759020856e-08, |
|
"logits/chosen": -2.5593600273132324, |
|
"logits/rejected": -2.521488904953003, |
|
"logps/chosen": -350.98797607421875, |
|
"logps/pi_response": -244.7444305419922, |
|
"logps/ref_response": -70.74293518066406, |
|
"logps/rejected": -367.61639404296875, |
|
"loss": 0.4479, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -1.0182468891143799, |
|
"rewards/margins": 0.9283415675163269, |
|
"rewards/rejected": -1.9465882778167725, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.9404388714733543, |
|
"grad_norm": 34.097104034294276, |
|
"learning_rate": 4.8708793644441086e-09, |
|
"logits/chosen": -2.5964386463165283, |
|
"logits/rejected": -2.5334842205047607, |
|
"logps/chosen": -350.24432373046875, |
|
"logps/pi_response": -242.6420135498047, |
|
"logps/ref_response": -74.09484100341797, |
|
"logps/rejected": -348.6947021484375, |
|
"loss": 0.4642, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.9555439949035645, |
|
"rewards/margins": 0.8465269207954407, |
|
"rewards/rejected": -1.80207097530365, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.9968652037617555, |
|
"step": 159, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5324955706326466, |
|
"train_runtime": 3623.407, |
|
"train_samples_per_second": 5.624, |
|
"train_steps_per_second": 0.044 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 159, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|