File size: 10,688 Bytes
b2c90ab |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 |
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9968652037617555,
"eval_steps": 500,
"global_step": 159,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.006269592476489028,
"grad_norm": 11.162558389320719,
"learning_rate": 3.125e-08,
"logits/chosen": -2.832691192626953,
"logits/rejected": -2.789004325866699,
"logps/chosen": -379.2402648925781,
"logps/pi_response": -84.25662994384766,
"logps/ref_response": -84.25662994384766,
"logps/rejected": -192.58773803710938,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.06269592476489028,
"grad_norm": 9.199296960060826,
"learning_rate": 3.1249999999999997e-07,
"logits/chosen": -2.779388427734375,
"logits/rejected": -2.744753837585449,
"logps/chosen": -243.76174926757812,
"logps/pi_response": -68.54817962646484,
"logps/ref_response": -68.44412231445312,
"logps/rejected": -167.88645935058594,
"loss": 0.6927,
"rewards/accuracies": 0.4444444477558136,
"rewards/chosen": 0.0011128478217869997,
"rewards/margins": 0.0005714390426874161,
"rewards/rejected": 0.0005414087790995836,
"step": 10
},
{
"epoch": 0.12539184952978055,
"grad_norm": 9.622073702330978,
"learning_rate": 4.990353313429303e-07,
"logits/chosen": -2.7916176319122314,
"logits/rejected": -2.7705063819885254,
"logps/chosen": -232.59619140625,
"logps/pi_response": -72.38710021972656,
"logps/ref_response": -71.28221893310547,
"logps/rejected": -165.4346466064453,
"loss": 0.6786,
"rewards/accuracies": 0.6312500238418579,
"rewards/chosen": 0.030751097947359085,
"rewards/margins": 0.021662823855876923,
"rewards/rejected": 0.009088275022804737,
"step": 20
},
{
"epoch": 0.18808777429467086,
"grad_norm": 8.334770075469805,
"learning_rate": 4.882681251368548e-07,
"logits/chosen": -2.7269303798675537,
"logits/rejected": -2.6703150272369385,
"logps/chosen": -238.45944213867188,
"logps/pi_response": -80.09962463378906,
"logps/ref_response": -64.93635559082031,
"logps/rejected": -164.25949096679688,
"loss": 0.6524,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": 0.02072182670235634,
"rewards/margins": 0.1209292858839035,
"rewards/rejected": -0.10020747035741806,
"step": 30
},
{
"epoch": 0.2507836990595611,
"grad_norm": 9.807608483085172,
"learning_rate": 4.6604720940421207e-07,
"logits/chosen": -2.65498948097229,
"logits/rejected": -2.608666181564331,
"logps/chosen": -271.5018615722656,
"logps/pi_response": -114.51536560058594,
"logps/ref_response": -69.87471008300781,
"logps/rejected": -211.77072143554688,
"loss": 0.6131,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.11142440885305405,
"rewards/margins": 0.21302208304405212,
"rewards/rejected": -0.32444649934768677,
"step": 40
},
{
"epoch": 0.31347962382445144,
"grad_norm": 14.498018136229003,
"learning_rate": 4.3344075855595097e-07,
"logits/chosen": -2.6647868156433105,
"logits/rejected": -2.6350674629211426,
"logps/chosen": -267.47705078125,
"logps/pi_response": -130.49932861328125,
"logps/ref_response": -67.23551177978516,
"logps/rejected": -212.48483276367188,
"loss": 0.5801,
"rewards/accuracies": 0.7562500238418579,
"rewards/chosen": -0.21161659061908722,
"rewards/margins": 0.35675129294395447,
"rewards/rejected": -0.5683678388595581,
"step": 50
},
{
"epoch": 0.3761755485893417,
"grad_norm": 13.440157501390912,
"learning_rate": 3.920161866827889e-07,
"logits/chosen": -2.6458613872528076,
"logits/rejected": -2.5944952964782715,
"logps/chosen": -237.12710571289062,
"logps/pi_response": -148.73211669921875,
"logps/ref_response": -64.54133605957031,
"logps/rejected": -239.72702026367188,
"loss": 0.5536,
"rewards/accuracies": 0.768750011920929,
"rewards/chosen": -0.36507314443588257,
"rewards/margins": 0.4184595048427582,
"rewards/rejected": -0.7835326194763184,
"step": 60
},
{
"epoch": 0.438871473354232,
"grad_norm": 15.982610593807811,
"learning_rate": 3.4376480090239047e-07,
"logits/chosen": -2.680788040161133,
"logits/rejected": -2.6525492668151855,
"logps/chosen": -269.2456970214844,
"logps/pi_response": -195.72213745117188,
"logps/ref_response": -69.08720397949219,
"logps/rejected": -306.4203186035156,
"loss": 0.5003,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.5016717910766602,
"rewards/margins": 0.7233041524887085,
"rewards/rejected": -1.224975824356079,
"step": 70
},
{
"epoch": 0.5015673981191222,
"grad_norm": 22.558501492450485,
"learning_rate": 2.910060778827554e-07,
"logits/chosen": -2.7134718894958496,
"logits/rejected": -2.6782937049865723,
"logps/chosen": -313.21136474609375,
"logps/pi_response": -213.4176788330078,
"logps/ref_response": -72.85678100585938,
"logps/rejected": -303.14056396484375,
"loss": 0.5063,
"rewards/accuracies": 0.831250011920929,
"rewards/chosen": -0.6677788496017456,
"rewards/margins": 0.8193937540054321,
"rewards/rejected": -1.4871724843978882,
"step": 80
},
{
"epoch": 0.5642633228840125,
"grad_norm": 21.710845718415612,
"learning_rate": 2.3627616503391812e-07,
"logits/chosen": -2.634028911590576,
"logits/rejected": -2.608798027038574,
"logps/chosen": -329.37799072265625,
"logps/pi_response": -211.5565948486328,
"logps/ref_response": -70.06621551513672,
"logps/rejected": -340.3314514160156,
"loss": 0.4772,
"rewards/accuracies": 0.84375,
"rewards/chosen": -0.7806206941604614,
"rewards/margins": 0.7901986837387085,
"rewards/rejected": -1.57081937789917,
"step": 90
},
{
"epoch": 0.6269592476489029,
"grad_norm": 27.570197538418466,
"learning_rate": 1.8220596619089573e-07,
"logits/chosen": -2.6406660079956055,
"logits/rejected": -2.584998369216919,
"logps/chosen": -326.03961181640625,
"logps/pi_response": -226.829833984375,
"logps/ref_response": -68.18948364257812,
"logps/rejected": -311.16668701171875,
"loss": 0.4812,
"rewards/accuracies": 0.8062499761581421,
"rewards/chosen": -0.771049976348877,
"rewards/margins": 0.8004252314567566,
"rewards/rejected": -1.5714751482009888,
"step": 100
},
{
"epoch": 0.6896551724137931,
"grad_norm": 25.855890266739543,
"learning_rate": 1.3139467229135998e-07,
"logits/chosen": -2.5621211528778076,
"logits/rejected": -2.5297341346740723,
"logps/chosen": -303.0425720214844,
"logps/pi_response": -235.69052124023438,
"logps/ref_response": -61.66025924682617,
"logps/rejected": -347.60089111328125,
"loss": 0.4747,
"rewards/accuracies": 0.7562500238418579,
"rewards/chosen": -1.0712878704071045,
"rewards/margins": 0.6884299516677856,
"rewards/rejected": -1.7597179412841797,
"step": 110
},
{
"epoch": 0.7523510971786834,
"grad_norm": 25.63885329215509,
"learning_rate": 8.628481651367875e-08,
"logits/chosen": -2.5914602279663086,
"logits/rejected": -2.5509674549102783,
"logps/chosen": -330.34613037109375,
"logps/pi_response": -255.2554473876953,
"logps/ref_response": -64.22006225585938,
"logps/rejected": -360.2675476074219,
"loss": 0.4799,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -1.0213677883148193,
"rewards/margins": 1.0237469673156738,
"rewards/rejected": -2.045114517211914,
"step": 120
},
{
"epoch": 0.8150470219435737,
"grad_norm": 26.281388822149008,
"learning_rate": 4.904486005914027e-08,
"logits/chosen": -2.5695884227752686,
"logits/rejected": -2.527682065963745,
"logps/chosen": -339.78851318359375,
"logps/pi_response": -252.6923370361328,
"logps/ref_response": -70.67754364013672,
"logps/rejected": -348.1340026855469,
"loss": 0.4568,
"rewards/accuracies": 0.8125,
"rewards/chosen": -0.8407719731330872,
"rewards/margins": 1.1005146503448486,
"rewards/rejected": -1.941286325454712,
"step": 130
},
{
"epoch": 0.877742946708464,
"grad_norm": 21.11421165620455,
"learning_rate": 2.1464952759020856e-08,
"logits/chosen": -2.5593600273132324,
"logits/rejected": -2.521488904953003,
"logps/chosen": -350.98797607421875,
"logps/pi_response": -244.7444305419922,
"logps/ref_response": -70.74293518066406,
"logps/rejected": -367.61639404296875,
"loss": 0.4479,
"rewards/accuracies": 0.8062499761581421,
"rewards/chosen": -1.0182468891143799,
"rewards/margins": 0.9283415675163269,
"rewards/rejected": -1.9465882778167725,
"step": 140
},
{
"epoch": 0.9404388714733543,
"grad_norm": 34.097104034294276,
"learning_rate": 4.8708793644441086e-09,
"logits/chosen": -2.5964386463165283,
"logits/rejected": -2.5334842205047607,
"logps/chosen": -350.24432373046875,
"logps/pi_response": -242.6420135498047,
"logps/ref_response": -74.09484100341797,
"logps/rejected": -348.6947021484375,
"loss": 0.4642,
"rewards/accuracies": 0.7562500238418579,
"rewards/chosen": -0.9555439949035645,
"rewards/margins": 0.8465269207954407,
"rewards/rejected": -1.80207097530365,
"step": 150
},
{
"epoch": 0.9968652037617555,
"step": 159,
"total_flos": 0.0,
"train_loss": 0.5324955706326466,
"train_runtime": 3623.407,
"train_samples_per_second": 5.624,
"train_steps_per_second": 0.044
}
],
"logging_steps": 10,
"max_steps": 159,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}
|