|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 478, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02092050209205021, |
|
"grad_norm": 1.8475663661956787, |
|
"learning_rate": 4.1666666666666667e-07, |
|
"logits/chosen": 0.3203125, |
|
"logits/rejected": 0.400390625, |
|
"logps/chosen": -1.34375, |
|
"logps/rejected": -1.4140625, |
|
"loss": 0.7551, |
|
"nll_loss": 0.0, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -1.34375, |
|
"rewards/margins": 0.07421875, |
|
"rewards/rejected": -1.4140625, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.04184100418410042, |
|
"grad_norm": 1.5401519536972046, |
|
"learning_rate": 8.333333333333333e-07, |
|
"logits/chosen": 0.310546875, |
|
"logits/rejected": 0.34765625, |
|
"logps/chosen": -1.34375, |
|
"logps/rejected": -1.4453125, |
|
"loss": 0.7511, |
|
"nll_loss": 0.0, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -1.34375, |
|
"rewards/margins": 0.09521484375, |
|
"rewards/rejected": -1.4453125, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06276150627615062, |
|
"grad_norm": 0.9768715500831604, |
|
"learning_rate": 9.995691082675907e-07, |
|
"logits/chosen": 0.427734375, |
|
"logits/rejected": 0.4609375, |
|
"logps/chosen": -1.3125, |
|
"logps/rejected": -1.5234375, |
|
"loss": 0.7174, |
|
"nll_loss": 0.0, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -1.3125, |
|
"rewards/margins": 0.21484375, |
|
"rewards/rejected": -1.5234375, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.08368200836820083, |
|
"grad_norm": 1.1228318214416504, |
|
"learning_rate": 9.969385700404345e-07, |
|
"logits/chosen": 0.296875, |
|
"logits/rejected": 0.357421875, |
|
"logps/chosen": -1.2734375, |
|
"logps/rejected": -1.40625, |
|
"loss": 0.7385, |
|
"nll_loss": 0.0, |
|
"rewards/accuracies": 0.534375011920929, |
|
"rewards/chosen": -1.2734375, |
|
"rewards/margins": 0.13671875, |
|
"rewards/rejected": -1.40625, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.10460251046025104, |
|
"grad_norm": 1.303710699081421, |
|
"learning_rate": 9.91929453572245e-07, |
|
"logits/chosen": 0.302734375, |
|
"logits/rejected": 0.42578125, |
|
"logps/chosen": -1.078125, |
|
"logps/rejected": -1.359375, |
|
"loss": 0.7204, |
|
"nll_loss": 0.0, |
|
"rewards/accuracies": 0.5843750238418579, |
|
"rewards/chosen": -1.078125, |
|
"rewards/margins": 0.28125, |
|
"rewards/rejected": -1.359375, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.12552301255230125, |
|
"grad_norm": 1.6195344924926758, |
|
"learning_rate": 9.845657348152955e-07, |
|
"logits/chosen": 0.30078125, |
|
"logits/rejected": 0.376953125, |
|
"logps/chosen": -1.171875, |
|
"logps/rejected": -1.34375, |
|
"loss": 0.7488, |
|
"nll_loss": 0.0, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.171875, |
|
"rewards/margins": 0.169921875, |
|
"rewards/rejected": -1.34375, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.14644351464435146, |
|
"grad_norm": 0.9580543041229248, |
|
"learning_rate": 9.748826599393632e-07, |
|
"logits/chosen": 0.1787109375, |
|
"logits/rejected": 0.25, |
|
"logps/chosen": -1.078125, |
|
"logps/rejected": -1.3984375, |
|
"loss": 0.6996, |
|
"nll_loss": 0.0, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.078125, |
|
"rewards/margins": 0.314453125, |
|
"rewards/rejected": -1.3984375, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.16736401673640167, |
|
"grad_norm": 1.0547847747802734, |
|
"learning_rate": 9.629265766272291e-07, |
|
"logits/chosen": 0.271484375, |
|
"logits/rejected": 0.380859375, |
|
"logps/chosen": -1.125, |
|
"logps/rejected": -1.328125, |
|
"loss": 0.7164, |
|
"nll_loss": 0.0, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -1.125, |
|
"rewards/margins": 0.2001953125, |
|
"rewards/rejected": -1.328125, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.18828451882845187, |
|
"grad_norm": 1.3746100664138794, |
|
"learning_rate": 9.487547122331964e-07, |
|
"logits/chosen": 0.259765625, |
|
"logits/rejected": 0.369140625, |
|
"logps/chosen": -1.140625, |
|
"logps/rejected": -1.3515625, |
|
"loss": 0.72, |
|
"nll_loss": 0.0, |
|
"rewards/accuracies": 0.565625011920929, |
|
"rewards/chosen": -1.140625, |
|
"rewards/margins": 0.212890625, |
|
"rewards/rejected": -1.3515625, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.20920502092050208, |
|
"grad_norm": 1.3906371593475342, |
|
"learning_rate": 9.324348998664548e-07, |
|
"logits/chosen": 0.3125, |
|
"logits/rejected": 0.396484375, |
|
"logps/chosen": -1.1328125, |
|
"logps/rejected": -1.359375, |
|
"loss": 0.7105, |
|
"nll_loss": 0.0, |
|
"rewards/accuracies": 0.590624988079071, |
|
"rewards/chosen": -1.1328125, |
|
"rewards/margins": 0.2236328125, |
|
"rewards/rejected": -1.359375, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2301255230125523, |
|
"grad_norm": 1.1979008913040161, |
|
"learning_rate": 9.140452537103941e-07, |
|
"logits/chosen": 0.310546875, |
|
"logits/rejected": 0.380859375, |
|
"logps/chosen": -1.15625, |
|
"logps/rejected": -1.3046875, |
|
"loss": 0.7282, |
|
"nll_loss": 0.0, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -1.15625, |
|
"rewards/margins": 0.150390625, |
|
"rewards/rejected": -1.3046875, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.2510460251046025, |
|
"grad_norm": 1.7712326049804688, |
|
"learning_rate": 8.936737951319275e-07, |
|
"logits/chosen": 0.287109375, |
|
"logits/rejected": 0.40234375, |
|
"logps/chosen": -1.109375, |
|
"logps/rejected": -1.375, |
|
"loss": 0.7061, |
|
"nll_loss": 0.0, |
|
"rewards/accuracies": 0.596875011920929, |
|
"rewards/chosen": -1.109375, |
|
"rewards/margins": 0.267578125, |
|
"rewards/rejected": -1.375, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.2719665271966527, |
|
"grad_norm": 1.0993026494979858, |
|
"learning_rate": 8.714180313704489e-07, |
|
"logits/chosen": 0.310546875, |
|
"logits/rejected": 0.43359375, |
|
"logps/chosen": -1.171875, |
|
"logps/rejected": -1.4609375, |
|
"loss": 0.7141, |
|
"nll_loss": 0.0, |
|
"rewards/accuracies": 0.5718749761581421, |
|
"rewards/chosen": -1.171875, |
|
"rewards/margins": 0.2890625, |
|
"rewards/rejected": -1.4609375, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.2928870292887029, |
|
"grad_norm": 1.2873952388763428, |
|
"learning_rate": 8.473844888230064e-07, |
|
"logits/chosen": 0.388671875, |
|
"logits/rejected": 0.484375, |
|
"logps/chosen": -1.2109375, |
|
"logps/rejected": -1.3984375, |
|
"loss": 0.7206, |
|
"nll_loss": 0.0, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.2109375, |
|
"rewards/margins": 0.1865234375, |
|
"rewards/rejected": -1.3984375, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.3138075313807531, |
|
"grad_norm": 1.2388606071472168, |
|
"learning_rate": 8.216882031596096e-07, |
|
"logits/chosen": 0.416015625, |
|
"logits/rejected": 0.47265625, |
|
"logps/chosen": -1.1640625, |
|
"logps/rejected": -1.4765625, |
|
"loss": 0.7093, |
|
"nll_loss": 0.0, |
|
"rewards/accuracies": 0.5843750238418579, |
|
"rewards/chosen": -1.1640625, |
|
"rewards/margins": 0.306640625, |
|
"rewards/rejected": -1.4765625, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.33472803347280333, |
|
"grad_norm": 1.259517788887024, |
|
"learning_rate": 7.944521687092142e-07, |
|
"logits/chosen": 0.41796875, |
|
"logits/rejected": 0.546875, |
|
"logps/chosen": -1.1484375, |
|
"logps/rejected": -1.3828125, |
|
"loss": 0.7045, |
|
"nll_loss": 0.0, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.1484375, |
|
"rewards/margins": 0.2314453125, |
|
"rewards/rejected": -1.3828125, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.35564853556485354, |
|
"grad_norm": 1.333101749420166, |
|
"learning_rate": 7.658067497518772e-07, |
|
"logits/chosen": 0.34375, |
|
"logits/rejected": 0.462890625, |
|
"logps/chosen": -1.15625, |
|
"logps/rejected": -1.4375, |
|
"loss": 0.7071, |
|
"nll_loss": 0.0, |
|
"rewards/accuracies": 0.596875011920929, |
|
"rewards/chosen": -1.15625, |
|
"rewards/margins": 0.279296875, |
|
"rewards/rejected": -1.4375, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.37656903765690375, |
|
"grad_norm": 1.4813274145126343, |
|
"learning_rate": 7.358890565349105e-07, |
|
"logits/chosen": 0.39453125, |
|
"logits/rejected": 0.46484375, |
|
"logps/chosen": -1.1875, |
|
"logps/rejected": -1.4921875, |
|
"loss": 0.7057, |
|
"nll_loss": 0.0, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -1.1875, |
|
"rewards/margins": 0.302734375, |
|
"rewards/rejected": -1.4921875, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.39748953974895396, |
|
"grad_norm": 2.0108981132507324, |
|
"learning_rate": 7.048422889997115e-07, |
|
"logits/chosen": 0.287109375, |
|
"logits/rejected": 0.392578125, |
|
"logps/chosen": -1.2265625, |
|
"logps/rejected": -1.59375, |
|
"loss": 0.6979, |
|
"nll_loss": 0.0, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.2265625, |
|
"rewards/margins": 0.369140625, |
|
"rewards/rejected": -1.59375, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.41841004184100417, |
|
"grad_norm": 1.4848254919052124, |
|
"learning_rate": 6.72815051360494e-07, |
|
"logits/chosen": 0.384765625, |
|
"logits/rejected": 0.5, |
|
"logps/chosen": -1.1796875, |
|
"logps/rejected": -1.4765625, |
|
"loss": 0.6763, |
|
"nll_loss": 0.0, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -1.1796875, |
|
"rewards/margins": 0.29296875, |
|
"rewards/rejected": -1.4765625, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.4393305439330544, |
|
"grad_norm": 1.0059075355529785, |
|
"learning_rate": 6.399606408156687e-07, |
|
"logits/chosen": 0.41015625, |
|
"logits/rejected": 0.466796875, |
|
"logps/chosen": -1.2265625, |
|
"logps/rejected": -1.5078125, |
|
"loss": 0.6888, |
|
"nll_loss": 0.0, |
|
"rewards/accuracies": 0.5718749761581421, |
|
"rewards/chosen": -1.2265625, |
|
"rewards/margins": 0.28125, |
|
"rewards/rejected": -1.5078125, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.4602510460251046, |
|
"grad_norm": 1.5360616445541382, |
|
"learning_rate": 6.064363137964225e-07, |
|
"logits/chosen": 0.435546875, |
|
"logits/rejected": 0.53515625, |
|
"logps/chosen": -1.203125, |
|
"logps/rejected": -1.5546875, |
|
"loss": 0.6654, |
|
"nll_loss": 0.0, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -1.203125, |
|
"rewards/margins": 0.345703125, |
|
"rewards/rejected": -1.5546875, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.4811715481171548, |
|
"grad_norm": 5.390141487121582, |
|
"learning_rate": 5.724025332645793e-07, |
|
"logits/chosen": 0.458984375, |
|
"logits/rejected": 0.53515625, |
|
"logps/chosen": -1.28125, |
|
"logps/rejected": -1.4921875, |
|
"loss": 0.6899, |
|
"nll_loss": 0.0, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.28125, |
|
"rewards/margins": 0.216796875, |
|
"rewards/rejected": -1.4921875, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.502092050209205, |
|
"grad_norm": 1.5487908124923706, |
|
"learning_rate": 5.380222006625179e-07, |
|
"logits/chosen": 0.38671875, |
|
"logits/rejected": 0.515625, |
|
"logps/chosen": -1.234375, |
|
"logps/rejected": -1.6484375, |
|
"loss": 0.6761, |
|
"nll_loss": 0.0, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.234375, |
|
"rewards/margins": 0.4140625, |
|
"rewards/rejected": -1.6484375, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.5230125523012552, |
|
"grad_norm": 1.3993537425994873, |
|
"learning_rate": 5.034598761913916e-07, |
|
"logits/chosen": 0.38671875, |
|
"logits/rejected": 0.52734375, |
|
"logps/chosen": -1.2578125, |
|
"logps/rejected": -1.625, |
|
"loss": 0.677, |
|
"nll_loss": 0.0, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -1.2578125, |
|
"rewards/margins": 0.373046875, |
|
"rewards/rejected": -1.625, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5439330543933054, |
|
"grad_norm": 1.6489665508270264, |
|
"learning_rate": 4.688809911497609e-07, |
|
"logits/chosen": 0.4140625, |
|
"logits/rejected": 0.5625, |
|
"logps/chosen": -1.2890625, |
|
"logps/rejected": -1.671875, |
|
"loss": 0.6837, |
|
"nll_loss": 0.0, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -1.2890625, |
|
"rewards/margins": 0.3828125, |
|
"rewards/rejected": -1.671875, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.5648535564853556, |
|
"grad_norm": 1.4435418844223022, |
|
"learning_rate": 4.344510561027498e-07, |
|
"logits/chosen": 0.388671875, |
|
"logits/rejected": 0.4921875, |
|
"logps/chosen": -1.3203125, |
|
"logps/rejected": -1.703125, |
|
"loss": 0.6721, |
|
"nll_loss": 0.0, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -1.3203125, |
|
"rewards/margins": 0.38671875, |
|
"rewards/rejected": -1.703125, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.5857740585774058, |
|
"grad_norm": 1.013711929321289, |
|
"learning_rate": 4.003348686717949e-07, |
|
"logits/chosen": 0.439453125, |
|
"logits/rejected": 0.5625, |
|
"logps/chosen": -1.296875, |
|
"logps/rejected": -1.6484375, |
|
"loss": 0.6779, |
|
"nll_loss": 0.0, |
|
"rewards/accuracies": 0.628125011920929, |
|
"rewards/chosen": -1.296875, |
|
"rewards/margins": 0.35546875, |
|
"rewards/rejected": -1.6484375, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.606694560669456, |
|
"grad_norm": 1.2121633291244507, |
|
"learning_rate": 3.666957247368757e-07, |
|
"logits/chosen": 0.3984375, |
|
"logits/rejected": 0.5703125, |
|
"logps/chosen": -1.2890625, |
|
"logps/rejected": -1.65625, |
|
"loss": 0.6906, |
|
"nll_loss": 0.0, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.2890625, |
|
"rewards/margins": 0.365234375, |
|
"rewards/rejected": -1.65625, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.6276150627615062, |
|
"grad_norm": 1.591893196105957, |
|
"learning_rate": 3.3369463682677234e-07, |
|
"logits/chosen": 0.3984375, |
|
"logits/rejected": 0.55078125, |
|
"logps/chosen": -1.3671875, |
|
"logps/rejected": -1.6796875, |
|
"loss": 0.6659, |
|
"nll_loss": 0.0, |
|
"rewards/accuracies": 0.628125011920929, |
|
"rewards/chosen": -1.3671875, |
|
"rewards/margins": 0.3125, |
|
"rewards/rejected": -1.6796875, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6485355648535565, |
|
"grad_norm": 1.368701696395874, |
|
"learning_rate": 3.014895634385014e-07, |
|
"logits/chosen": 0.431640625, |
|
"logits/rejected": 0.54296875, |
|
"logps/chosen": -1.3671875, |
|
"logps/rejected": -1.734375, |
|
"loss": 0.6656, |
|
"nll_loss": 0.0, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -1.3671875, |
|
"rewards/margins": 0.3671875, |
|
"rewards/rejected": -1.734375, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.6694560669456067, |
|
"grad_norm": 1.4159173965454102, |
|
"learning_rate": 2.7023465297476424e-07, |
|
"logits/chosen": 0.462890625, |
|
"logits/rejected": 0.6171875, |
|
"logps/chosen": -1.390625, |
|
"logps/rejected": -1.75, |
|
"loss": 0.6505, |
|
"nll_loss": 0.0, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.390625, |
|
"rewards/margins": 0.361328125, |
|
"rewards/rejected": -1.75, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.6903765690376569, |
|
"grad_norm": 1.2280809879302979, |
|
"learning_rate": 2.4007950591826913e-07, |
|
"logits/chosen": 0.42578125, |
|
"logits/rejected": 0.53515625, |
|
"logps/chosen": -1.3203125, |
|
"logps/rejected": -1.7421875, |
|
"loss": 0.6651, |
|
"nll_loss": 0.0, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -1.3203125, |
|
"rewards/margins": 0.419921875, |
|
"rewards/rejected": -1.7421875, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.7112970711297071, |
|
"grad_norm": 1.3542604446411133, |
|
"learning_rate": 2.1116845877450805e-07, |
|
"logits/chosen": 0.4765625, |
|
"logits/rejected": 0.61328125, |
|
"logps/chosen": -1.359375, |
|
"logps/rejected": -1.7578125, |
|
"loss": 0.667, |
|
"nll_loss": 0.0, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.359375, |
|
"rewards/margins": 0.400390625, |
|
"rewards/rejected": -1.7578125, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.7322175732217573, |
|
"grad_norm": 1.5316176414489746, |
|
"learning_rate": 1.8363989321036577e-07, |
|
"logits/chosen": 0.330078125, |
|
"logits/rejected": 0.49609375, |
|
"logps/chosen": -1.4140625, |
|
"logps/rejected": -1.7109375, |
|
"loss": 0.6679, |
|
"nll_loss": 0.0, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -1.4140625, |
|
"rewards/margins": 0.294921875, |
|
"rewards/rejected": -1.7109375, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.7531380753138075, |
|
"grad_norm": 1.7469818592071533, |
|
"learning_rate": 1.5762557369534708e-07, |
|
"logits/chosen": 0.48046875, |
|
"logits/rejected": 0.58203125, |
|
"logps/chosen": -1.421875, |
|
"logps/rejected": -1.75, |
|
"loss": 0.6766, |
|
"nll_loss": 0.0, |
|
"rewards/accuracies": 0.6031249761581421, |
|
"rewards/chosen": -1.421875, |
|
"rewards/margins": 0.33203125, |
|
"rewards/rejected": -1.75, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.7740585774058577, |
|
"grad_norm": 1.427762746810913, |
|
"learning_rate": 1.332500168157748e-07, |
|
"logits/chosen": 0.41015625, |
|
"logits/rejected": 0.5234375, |
|
"logps/chosen": -1.3671875, |
|
"logps/rejected": -1.7890625, |
|
"loss": 0.668, |
|
"nll_loss": 0.0, |
|
"rewards/accuracies": 0.628125011920929, |
|
"rewards/chosen": -1.3671875, |
|
"rewards/margins": 0.421875, |
|
"rewards/rejected": -1.7890625, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.7949790794979079, |
|
"grad_norm": 1.8916194438934326, |
|
"learning_rate": 1.1062989528071681e-07, |
|
"logits/chosen": 0.462890625, |
|
"logits/rejected": 0.51171875, |
|
"logps/chosen": -1.3671875, |
|
"logps/rejected": -1.8125, |
|
"loss": 0.6799, |
|
"nll_loss": 0.0, |
|
"rewards/accuracies": 0.6343749761581421, |
|
"rewards/chosen": -1.3671875, |
|
"rewards/margins": 0.447265625, |
|
"rewards/rejected": -1.8125, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.8158995815899581, |
|
"grad_norm": 2.0285046100616455, |
|
"learning_rate": 8.987347947234192e-08, |
|
"logits/chosen": 0.478515625, |
|
"logits/rejected": 0.625, |
|
"logps/chosen": -1.375, |
|
"logps/rejected": -1.859375, |
|
"loss": 0.6716, |
|
"nll_loss": 0.0, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": -1.375, |
|
"rewards/margins": 0.482421875, |
|
"rewards/rejected": -1.859375, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.8368200836820083, |
|
"grad_norm": 1.5867072343826294, |
|
"learning_rate": 7.108011921370727e-08, |
|
"logits/chosen": 0.44140625, |
|
"logits/rejected": 0.58984375, |
|
"logps/chosen": -1.453125, |
|
"logps/rejected": -1.8203125, |
|
"loss": 0.6794, |
|
"nll_loss": 0.0, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -1.453125, |
|
"rewards/margins": 0.36328125, |
|
"rewards/rejected": -1.8203125, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8577405857740585, |
|
"grad_norm": 1.4711084365844727, |
|
"learning_rate": 5.433976823447262e-08, |
|
"logits/chosen": 0.5, |
|
"logits/rejected": 0.60546875, |
|
"logps/chosen": -1.421875, |
|
"logps/rejected": -1.8515625, |
|
"loss": 0.6766, |
|
"nll_loss": 0.0, |
|
"rewards/accuracies": 0.6656249761581421, |
|
"rewards/chosen": -1.421875, |
|
"rewards/margins": 0.4296875, |
|
"rewards/rejected": -1.8515625, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.8786610878661087, |
|
"grad_norm": 1.7933145761489868, |
|
"learning_rate": 3.973255361067346e-08, |
|
"logits/chosen": 0.333984375, |
|
"logits/rejected": 0.470703125, |
|
"logps/chosen": -1.4765625, |
|
"logps/rejected": -1.7578125, |
|
"loss": 0.6645, |
|
"nll_loss": 0.0, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -1.4765625, |
|
"rewards/margins": 0.28125, |
|
"rewards/rejected": -1.7578125, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.899581589958159, |
|
"grad_norm": 1.1009119749069214, |
|
"learning_rate": 2.732839223940914e-08, |
|
"logits/chosen": 0.34375, |
|
"logits/rejected": 0.455078125, |
|
"logps/chosen": -1.4375, |
|
"logps/rejected": -1.8359375, |
|
"loss": 0.6838, |
|
"nll_loss": 0.0, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.4375, |
|
"rewards/margins": 0.400390625, |
|
"rewards/rejected": -1.8359375, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.9205020920502092, |
|
"grad_norm": 3.219727039337158, |
|
"learning_rate": 1.7186656184179473e-08, |
|
"logits/chosen": 0.40625, |
|
"logits/rejected": 0.515625, |
|
"logps/chosen": -1.34375, |
|
"logps/rejected": -1.7421875, |
|
"loss": 0.7008, |
|
"nll_loss": 0.0, |
|
"rewards/accuracies": 0.6343749761581421, |
|
"rewards/chosen": -1.34375, |
|
"rewards/margins": 0.39453125, |
|
"rewards/rejected": -1.7421875, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.9414225941422594, |
|
"grad_norm": 1.8418503999710083, |
|
"learning_rate": 9.355888492680153e-09, |
|
"logits/chosen": 0.435546875, |
|
"logits/rejected": 0.56640625, |
|
"logps/chosen": -1.4375, |
|
"logps/rejected": -1.828125, |
|
"loss": 0.6758, |
|
"nll_loss": 0.0, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -1.4375, |
|
"rewards/margins": 0.396484375, |
|
"rewards/rejected": -1.828125, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.9623430962343096, |
|
"grad_norm": 1.1154942512512207, |
|
"learning_rate": 3.873570847285012e-09, |
|
"logits/chosen": 0.396484375, |
|
"logits/rejected": 0.5390625, |
|
"logps/chosen": -1.4140625, |
|
"logps/rejected": -1.890625, |
|
"loss": 0.6633, |
|
"nll_loss": 0.0, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.4140625, |
|
"rewards/margins": 0.474609375, |
|
"rewards/rejected": -1.890625, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.9832635983263598, |
|
"grad_norm": 1.7730712890625, |
|
"learning_rate": 7.65944160348142e-10, |
|
"logits/chosen": 0.41796875, |
|
"logits/rejected": 0.58203125, |
|
"logps/chosen": -1.4296875, |
|
"logps/rejected": -1.796875, |
|
"loss": 0.6715, |
|
"nll_loss": 0.0, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -1.4296875, |
|
"rewards/margins": 0.36328125, |
|
"rewards/rejected": -1.796875, |
|
"step": 470 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 478, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|