|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9968652037617555, |
|
"eval_steps": 500, |
|
"global_step": 159, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.125e-08, |
|
"logits/chosen": -2.6282713413238525, |
|
"logits/rejected": -2.5908496379852295, |
|
"logps/chosen": -197.16619873046875, |
|
"logps/pi_response": -79.30451965332031, |
|
"logps/ref_response": -79.30451965332031, |
|
"logps/rejected": -296.1330261230469, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": -2.6781411170959473, |
|
"logits/rejected": -2.6469974517822266, |
|
"logps/chosen": -346.4255065917969, |
|
"logps/pi_response": -118.85271453857422, |
|
"logps/ref_response": -117.75043487548828, |
|
"logps/rejected": -370.81414794921875, |
|
"loss": 0.6897, |
|
"rewards/accuracies": 0.4722222089767456, |
|
"rewards/chosen": -0.02221786230802536, |
|
"rewards/margins": 0.002993849106132984, |
|
"rewards/rejected": -0.02521171048283577, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.990353313429303e-07, |
|
"logits/chosen": -2.6510815620422363, |
|
"logits/rejected": -2.643944501876831, |
|
"logps/chosen": -367.3860168457031, |
|
"logps/pi_response": -144.99758911132812, |
|
"logps/ref_response": -130.18136596679688, |
|
"logps/rejected": -418.2279357910156, |
|
"loss": 0.6531, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.34063997864723206, |
|
"rewards/margins": 0.11587037891149521, |
|
"rewards/rejected": -0.4565103054046631, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.882681251368548e-07, |
|
"logits/chosen": -2.4943203926086426, |
|
"logits/rejected": -2.419273853302002, |
|
"logps/chosen": -414.4820251464844, |
|
"logps/pi_response": -131.7759552001953, |
|
"logps/ref_response": -112.0391616821289, |
|
"logps/rejected": -451.5794372558594, |
|
"loss": 0.6399, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.7901811003684998, |
|
"rewards/margins": 0.368924617767334, |
|
"rewards/rejected": -1.1591057777404785, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.6604720940421207e-07, |
|
"logits/chosen": -2.362710952758789, |
|
"logits/rejected": -2.3332810401916504, |
|
"logps/chosen": -440.0569763183594, |
|
"logps/pi_response": -165.1667938232422, |
|
"logps/ref_response": -109.482421875, |
|
"logps/rejected": -516.9385986328125, |
|
"loss": 0.6276, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.1469193696975708, |
|
"rewards/margins": 0.32388854026794434, |
|
"rewards/rejected": -1.4708077907562256, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.3344075855595097e-07, |
|
"logits/chosen": -2.3507821559906006, |
|
"logits/rejected": -2.292423725128174, |
|
"logps/chosen": -402.9019775390625, |
|
"logps/pi_response": -151.48883056640625, |
|
"logps/ref_response": -116.75199127197266, |
|
"logps/rejected": -471.33294677734375, |
|
"loss": 0.6044, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.7677600979804993, |
|
"rewards/margins": 0.36667126417160034, |
|
"rewards/rejected": -1.1344313621520996, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.920161866827889e-07, |
|
"logits/chosen": -2.2195403575897217, |
|
"logits/rejected": -2.180490255355835, |
|
"logps/chosen": -431.85101318359375, |
|
"logps/pi_response": -163.049072265625, |
|
"logps/ref_response": -107.99072265625, |
|
"logps/rejected": -479.72698974609375, |
|
"loss": 0.6012, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.1244434118270874, |
|
"rewards/margins": 0.3633125424385071, |
|
"rewards/rejected": -1.4877557754516602, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.4376480090239047e-07, |
|
"logits/chosen": -2.2236759662628174, |
|
"logits/rejected": -2.145097255706787, |
|
"logps/chosen": -418.8814392089844, |
|
"logps/pi_response": -171.4234619140625, |
|
"logps/ref_response": -112.27213287353516, |
|
"logps/rejected": -477.97998046875, |
|
"loss": 0.5864, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.0089619159698486, |
|
"rewards/margins": 0.407695472240448, |
|
"rewards/rejected": -1.4166573286056519, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.910060778827554e-07, |
|
"logits/chosen": -2.168750047683716, |
|
"logits/rejected": -2.08182954788208, |
|
"logps/chosen": -450.20745849609375, |
|
"logps/pi_response": -176.2133331298828, |
|
"logps/ref_response": -100.95310974121094, |
|
"logps/rejected": -498.7420349121094, |
|
"loss": 0.5775, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.27684485912323, |
|
"rewards/margins": 0.3819858431816101, |
|
"rewards/rejected": -1.6588308811187744, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.3627616503391812e-07, |
|
"logits/chosen": -2.2347817420959473, |
|
"logits/rejected": -2.1916308403015137, |
|
"logps/chosen": -392.2938537597656, |
|
"logps/pi_response": -164.12486267089844, |
|
"logps/ref_response": -100.01224517822266, |
|
"logps/rejected": -519.1055297851562, |
|
"loss": 0.5703, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.00107741355896, |
|
"rewards/margins": 0.6102837920188904, |
|
"rewards/rejected": -1.6113611459732056, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8220596619089573e-07, |
|
"logits/chosen": -2.1057143211364746, |
|
"logits/rejected": -2.0601727962493896, |
|
"logps/chosen": -470.12554931640625, |
|
"logps/pi_response": -193.02027893066406, |
|
"logps/ref_response": -112.04179382324219, |
|
"logps/rejected": -601.5619506835938, |
|
"loss": 0.5477, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.430586814880371, |
|
"rewards/margins": 0.5693450570106506, |
|
"rewards/rejected": -1.9999316930770874, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3139467229135998e-07, |
|
"logits/chosen": -2.1225686073303223, |
|
"logits/rejected": -2.095275402069092, |
|
"logps/chosen": -470.36651611328125, |
|
"logps/pi_response": -188.7615509033203, |
|
"logps/ref_response": -100.41735076904297, |
|
"logps/rejected": -545.0133056640625, |
|
"loss": 0.5734, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.5548226833343506, |
|
"rewards/margins": 0.403084933757782, |
|
"rewards/rejected": -1.9579073190689087, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.628481651367875e-08, |
|
"logits/chosen": -2.1088550090789795, |
|
"logits/rejected": -2.0325331687927246, |
|
"logps/chosen": -483.610595703125, |
|
"logps/pi_response": -217.5375518798828, |
|
"logps/ref_response": -129.7823944091797, |
|
"logps/rejected": -566.143798828125, |
|
"loss": 0.542, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.4614275693893433, |
|
"rewards/margins": 0.5255736112594604, |
|
"rewards/rejected": -1.9870010614395142, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.904486005914027e-08, |
|
"logits/chosen": -2.1483001708984375, |
|
"logits/rejected": -2.087111234664917, |
|
"logps/chosen": -477.526123046875, |
|
"logps/pi_response": -191.5972900390625, |
|
"logps/ref_response": -111.270751953125, |
|
"logps/rejected": -556.4031982421875, |
|
"loss": 0.561, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.4053503274917603, |
|
"rewards/margins": 0.597406268119812, |
|
"rewards/rejected": -2.0027565956115723, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.1464952759020856e-08, |
|
"logits/chosen": -2.1172375679016113, |
|
"logits/rejected": -2.0181617736816406, |
|
"logps/chosen": -475.329345703125, |
|
"logps/pi_response": -208.83413696289062, |
|
"logps/ref_response": -119.419189453125, |
|
"logps/rejected": -562.1836547851562, |
|
"loss": 0.5439, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.4041850566864014, |
|
"rewards/margins": 0.5541211366653442, |
|
"rewards/rejected": -1.9583065509796143, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.8708793644441086e-09, |
|
"logits/chosen": -2.0521388053894043, |
|
"logits/rejected": -1.9707081317901611, |
|
"logps/chosen": -441.0538635253906, |
|
"logps/pi_response": -184.4268798828125, |
|
"logps/ref_response": -101.12681579589844, |
|
"logps/rejected": -518.763671875, |
|
"loss": 0.5564, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.354651689529419, |
|
"rewards/margins": 0.5879614353179932, |
|
"rewards/rejected": -1.9426130056381226, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 159, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5899531646344647, |
|
"train_runtime": 4183.7712, |
|
"train_samples_per_second": 4.871, |
|
"train_steps_per_second": 0.038 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 159, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|