ShenaoZhang's picture
Model save
db17a3a verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9968652037617555,
"eval_steps": 500,
"global_step": 159,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 3.125e-08,
"logits/chosen": -2.6282713413238525,
"logits/rejected": -2.5908496379852295,
"logps/chosen": -197.16619873046875,
"logps/pi_response": -79.30451965332031,
"logps/ref_response": -79.30451965332031,
"logps/rejected": -296.1330261230469,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.06,
"learning_rate": 3.1249999999999997e-07,
"logits/chosen": -2.6781411170959473,
"logits/rejected": -2.6469974517822266,
"logps/chosen": -346.4255065917969,
"logps/pi_response": -118.85271453857422,
"logps/ref_response": -117.75043487548828,
"logps/rejected": -370.81414794921875,
"loss": 0.6897,
"rewards/accuracies": 0.4722222089767456,
"rewards/chosen": -0.02221786230802536,
"rewards/margins": 0.002993849106132984,
"rewards/rejected": -0.02521171048283577,
"step": 10
},
{
"epoch": 0.13,
"learning_rate": 4.990353313429303e-07,
"logits/chosen": -2.6510815620422363,
"logits/rejected": -2.643944501876831,
"logps/chosen": -367.3860168457031,
"logps/pi_response": -144.99758911132812,
"logps/ref_response": -130.18136596679688,
"logps/rejected": -418.2279357910156,
"loss": 0.6531,
"rewards/accuracies": 0.65625,
"rewards/chosen": -0.34063997864723206,
"rewards/margins": 0.11587037891149521,
"rewards/rejected": -0.4565103054046631,
"step": 20
},
{
"epoch": 0.19,
"learning_rate": 4.882681251368548e-07,
"logits/chosen": -2.4943203926086426,
"logits/rejected": -2.419273853302002,
"logps/chosen": -414.4820251464844,
"logps/pi_response": -131.7759552001953,
"logps/ref_response": -112.0391616821289,
"logps/rejected": -451.5794372558594,
"loss": 0.6399,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.7901811003684998,
"rewards/margins": 0.368924617767334,
"rewards/rejected": -1.1591057777404785,
"step": 30
},
{
"epoch": 0.25,
"learning_rate": 4.6604720940421207e-07,
"logits/chosen": -2.362710952758789,
"logits/rejected": -2.3332810401916504,
"logps/chosen": -440.0569763183594,
"logps/pi_response": -165.1667938232422,
"logps/ref_response": -109.482421875,
"logps/rejected": -516.9385986328125,
"loss": 0.6276,
"rewards/accuracies": 0.668749988079071,
"rewards/chosen": -1.1469193696975708,
"rewards/margins": 0.32388854026794434,
"rewards/rejected": -1.4708077907562256,
"step": 40
},
{
"epoch": 0.31,
"learning_rate": 4.3344075855595097e-07,
"logits/chosen": -2.3507821559906006,
"logits/rejected": -2.292423725128174,
"logps/chosen": -402.9019775390625,
"logps/pi_response": -151.48883056640625,
"logps/ref_response": -116.75199127197266,
"logps/rejected": -471.33294677734375,
"loss": 0.6044,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": -0.7677600979804993,
"rewards/margins": 0.36667126417160034,
"rewards/rejected": -1.1344313621520996,
"step": 50
},
{
"epoch": 0.38,
"learning_rate": 3.920161866827889e-07,
"logits/chosen": -2.2195403575897217,
"logits/rejected": -2.180490255355835,
"logps/chosen": -431.85101318359375,
"logps/pi_response": -163.049072265625,
"logps/ref_response": -107.99072265625,
"logps/rejected": -479.72698974609375,
"loss": 0.6012,
"rewards/accuracies": 0.625,
"rewards/chosen": -1.1244434118270874,
"rewards/margins": 0.3633125424385071,
"rewards/rejected": -1.4877557754516602,
"step": 60
},
{
"epoch": 0.44,
"learning_rate": 3.4376480090239047e-07,
"logits/chosen": -2.2236759662628174,
"logits/rejected": -2.145097255706787,
"logps/chosen": -418.8814392089844,
"logps/pi_response": -171.4234619140625,
"logps/ref_response": -112.27213287353516,
"logps/rejected": -477.97998046875,
"loss": 0.5864,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": -1.0089619159698486,
"rewards/margins": 0.407695472240448,
"rewards/rejected": -1.4166573286056519,
"step": 70
},
{
"epoch": 0.5,
"learning_rate": 2.910060778827554e-07,
"logits/chosen": -2.168750047683716,
"logits/rejected": -2.08182954788208,
"logps/chosen": -450.20745849609375,
"logps/pi_response": -176.2133331298828,
"logps/ref_response": -100.95310974121094,
"logps/rejected": -498.7420349121094,
"loss": 0.5775,
"rewards/accuracies": 0.668749988079071,
"rewards/chosen": -1.27684485912323,
"rewards/margins": 0.3819858431816101,
"rewards/rejected": -1.6588308811187744,
"step": 80
},
{
"epoch": 0.56,
"learning_rate": 2.3627616503391812e-07,
"logits/chosen": -2.2347817420959473,
"logits/rejected": -2.1916308403015137,
"logps/chosen": -392.2938537597656,
"logps/pi_response": -164.12486267089844,
"logps/ref_response": -100.01224517822266,
"logps/rejected": -519.1055297851562,
"loss": 0.5703,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -1.00107741355896,
"rewards/margins": 0.6102837920188904,
"rewards/rejected": -1.6113611459732056,
"step": 90
},
{
"epoch": 0.63,
"learning_rate": 1.8220596619089573e-07,
"logits/chosen": -2.1057143211364746,
"logits/rejected": -2.0601727962493896,
"logps/chosen": -470.12554931640625,
"logps/pi_response": -193.02027893066406,
"logps/ref_response": -112.04179382324219,
"logps/rejected": -601.5619506835938,
"loss": 0.5477,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": -1.430586814880371,
"rewards/margins": 0.5693450570106506,
"rewards/rejected": -1.9999316930770874,
"step": 100
},
{
"epoch": 0.69,
"learning_rate": 1.3139467229135998e-07,
"logits/chosen": -2.1225686073303223,
"logits/rejected": -2.095275402069092,
"logps/chosen": -470.36651611328125,
"logps/pi_response": -188.7615509033203,
"logps/ref_response": -100.41735076904297,
"logps/rejected": -545.0133056640625,
"loss": 0.5734,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": -1.5548226833343506,
"rewards/margins": 0.403084933757782,
"rewards/rejected": -1.9579073190689087,
"step": 110
},
{
"epoch": 0.75,
"learning_rate": 8.628481651367875e-08,
"logits/chosen": -2.1088550090789795,
"logits/rejected": -2.0325331687927246,
"logps/chosen": -483.610595703125,
"logps/pi_response": -217.5375518798828,
"logps/ref_response": -129.7823944091797,
"logps/rejected": -566.143798828125,
"loss": 0.542,
"rewards/accuracies": 0.71875,
"rewards/chosen": -1.4614275693893433,
"rewards/margins": 0.5255736112594604,
"rewards/rejected": -1.9870010614395142,
"step": 120
},
{
"epoch": 0.82,
"learning_rate": 4.904486005914027e-08,
"logits/chosen": -2.1483001708984375,
"logits/rejected": -2.087111234664917,
"logps/chosen": -477.526123046875,
"logps/pi_response": -191.5972900390625,
"logps/ref_response": -111.270751953125,
"logps/rejected": -556.4031982421875,
"loss": 0.561,
"rewards/accuracies": 0.75,
"rewards/chosen": -1.4053503274917603,
"rewards/margins": 0.597406268119812,
"rewards/rejected": -2.0027565956115723,
"step": 130
},
{
"epoch": 0.88,
"learning_rate": 2.1464952759020856e-08,
"logits/chosen": -2.1172375679016113,
"logits/rejected": -2.0181617736816406,
"logps/chosen": -475.329345703125,
"logps/pi_response": -208.83413696289062,
"logps/ref_response": -119.419189453125,
"logps/rejected": -562.1836547851562,
"loss": 0.5439,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -1.4041850566864014,
"rewards/margins": 0.5541211366653442,
"rewards/rejected": -1.9583065509796143,
"step": 140
},
{
"epoch": 0.94,
"learning_rate": 4.8708793644441086e-09,
"logits/chosen": -2.0521388053894043,
"logits/rejected": -1.9707081317901611,
"logps/chosen": -441.0538635253906,
"logps/pi_response": -184.4268798828125,
"logps/ref_response": -101.12681579589844,
"logps/rejected": -518.763671875,
"loss": 0.5564,
"rewards/accuracies": 0.706250011920929,
"rewards/chosen": -1.354651689529419,
"rewards/margins": 0.5879614353179932,
"rewards/rejected": -1.9426130056381226,
"step": 150
},
{
"epoch": 1.0,
"step": 159,
"total_flos": 0.0,
"train_loss": 0.5899531646344647,
"train_runtime": 4183.7712,
"train_samples_per_second": 4.871,
"train_steps_per_second": 0.038
}
],
"logging_steps": 10,
"max_steps": 159,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}