|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.06, |
|
"eval_steps": 500, |
|
"global_step": 30, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.002, |
|
"grad_norm": 60.206947326660156, |
|
"learning_rate": 4.8333333333333334e-05, |
|
"logits/chosen": -1.116684913635254, |
|
"logits/rejected": -1.2412071228027344, |
|
"logps/chosen": -80.99737548828125, |
|
"logps/rejected": -208.84976196289062, |
|
"loss": 0.8252, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 19.664356231689453, |
|
"rewards/margins": 11.318536758422852, |
|
"rewards/rejected": 8.345821380615234, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.004, |
|
"grad_norm": 18.464937210083008, |
|
"learning_rate": 4.666666666666667e-05, |
|
"logits/chosen": -1.2304766178131104, |
|
"logits/rejected": -1.1085100173950195, |
|
"logps/chosen": -44.50049591064453, |
|
"logps/rejected": -95.98702239990234, |
|
"loss": 1.0253, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 9.29911994934082, |
|
"rewards/margins": 5.345921516418457, |
|
"rewards/rejected": 3.9531972408294678, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.006, |
|
"grad_norm": 21.883440017700195, |
|
"learning_rate": 4.5e-05, |
|
"logits/chosen": -1.190551519393921, |
|
"logits/rejected": -0.9011133313179016, |
|
"logps/chosen": -102.20394897460938, |
|
"logps/rejected": -98.943603515625, |
|
"loss": 0.3271, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 14.595328330993652, |
|
"rewards/margins": 10.861374855041504, |
|
"rewards/rejected": 3.7339534759521484, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.008, |
|
"grad_norm": 6.222319643711671e-05, |
|
"learning_rate": 4.3333333333333334e-05, |
|
"logits/chosen": -0.4864046573638916, |
|
"logits/rejected": -0.9188252091407776, |
|
"logps/chosen": -127.2591781616211, |
|
"logps/rejected": -402.8990783691406, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 18.617298126220703, |
|
"rewards/margins": 22.434566497802734, |
|
"rewards/rejected": -3.817267656326294, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.3858901858329773, |
|
"learning_rate": 4.166666666666667e-05, |
|
"logits/chosen": -1.0422379970550537, |
|
"logits/rejected": -0.8618497848510742, |
|
"logps/chosen": -73.0203857421875, |
|
"logps/rejected": -301.347412109375, |
|
"loss": 0.0053, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 25.020734786987305, |
|
"rewards/margins": 27.691850662231445, |
|
"rewards/rejected": -2.6711173057556152, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.012, |
|
"grad_norm": 0.0033887920435518026, |
|
"learning_rate": 4e-05, |
|
"logits/chosen": -1.2303388118743896, |
|
"logits/rejected": -0.7831270694732666, |
|
"logps/chosen": -85.19882202148438, |
|
"logps/rejected": -286.6585693359375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 22.92217254638672, |
|
"rewards/margins": 28.057987213134766, |
|
"rewards/rejected": -5.135812282562256, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.014, |
|
"grad_norm": 0.01970483362674713, |
|
"learning_rate": 3.8333333333333334e-05, |
|
"logits/chosen": -1.266550064086914, |
|
"logits/rejected": -0.7790937423706055, |
|
"logps/chosen": -104.19335174560547, |
|
"logps/rejected": -176.42501831054688, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 12.132671356201172, |
|
"rewards/margins": 16.39078140258789, |
|
"rewards/rejected": -4.2581095695495605, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.016, |
|
"grad_norm": 207.87570190429688, |
|
"learning_rate": 3.6666666666666666e-05, |
|
"logits/chosen": -1.1942288875579834, |
|
"logits/rejected": -0.5387299060821533, |
|
"logps/chosen": -330.1429748535156, |
|
"logps/rejected": -239.27011108398438, |
|
"loss": 3.0167, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 9.409097671508789, |
|
"rewards/margins": 14.1237154006958, |
|
"rewards/rejected": -4.714616298675537, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.018, |
|
"grad_norm": 0.0003472985699772835, |
|
"learning_rate": 3.5e-05, |
|
"logits/chosen": -1.2635735273361206, |
|
"logits/rejected": -0.413591206073761, |
|
"logps/chosen": -185.6806640625, |
|
"logps/rejected": -166.2919921875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 24.54703712463379, |
|
"rewards/margins": 25.826885223388672, |
|
"rewards/rejected": -1.279848337173462, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.05313471704721451, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"logits/chosen": -1.0146455764770508, |
|
"logits/rejected": -0.8916444182395935, |
|
"logps/chosen": -209.36700439453125, |
|
"logps/rejected": -468.6343994140625, |
|
"loss": 0.0004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 22.948640823364258, |
|
"rewards/margins": 28.48465919494629, |
|
"rewards/rejected": -5.536019325256348, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.022, |
|
"grad_norm": 142.11721801757812, |
|
"learning_rate": 3.1666666666666666e-05, |
|
"logits/chosen": -0.9070144891738892, |
|
"logits/rejected": -0.6460937261581421, |
|
"logps/chosen": -159.49267578125, |
|
"logps/rejected": -283.4989929199219, |
|
"loss": 1.5215, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 18.536209106445312, |
|
"rewards/margins": 17.37179183959961, |
|
"rewards/rejected": 1.1644160747528076, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.024, |
|
"grad_norm": 2.082591663565836e-07, |
|
"learning_rate": 3e-05, |
|
"logits/chosen": -1.2319185733795166, |
|
"logits/rejected": -0.69745934009552, |
|
"logps/chosen": -118.86239624023438, |
|
"logps/rejected": -291.8245544433594, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 15.314111709594727, |
|
"rewards/margins": 24.709243774414062, |
|
"rewards/rejected": -9.395132064819336, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.026, |
|
"grad_norm": 0.2945432960987091, |
|
"learning_rate": 2.8333333333333335e-05, |
|
"logits/chosen": -1.1761776208877563, |
|
"logits/rejected": -0.7650622725486755, |
|
"logps/chosen": -149.94891357421875, |
|
"logps/rejected": -175.96791076660156, |
|
"loss": 0.0014, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 13.455711364746094, |
|
"rewards/margins": 17.48712158203125, |
|
"rewards/rejected": -4.031410217285156, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.028, |
|
"grad_norm": 1.873824954032898, |
|
"learning_rate": 2.6666666666666667e-05, |
|
"logits/chosen": -0.8233789801597595, |
|
"logits/rejected": -1.152618646621704, |
|
"logps/chosen": -146.26565551757812, |
|
"logps/rejected": -507.0345458984375, |
|
"loss": 0.0099, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 20.955230712890625, |
|
"rewards/margins": 25.935028076171875, |
|
"rewards/rejected": -4.979795932769775, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 28.382314682006836, |
|
"learning_rate": 2.5e-05, |
|
"logits/chosen": -1.1895701885223389, |
|
"logits/rejected": -0.8884562849998474, |
|
"logps/chosen": -93.35548400878906, |
|
"logps/rejected": -189.5703125, |
|
"loss": 0.1604, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 11.567254066467285, |
|
"rewards/margins": 15.98021411895752, |
|
"rewards/rejected": -4.412960052490234, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.032, |
|
"grad_norm": 0.0003264884580858052, |
|
"learning_rate": 2.3333333333333336e-05, |
|
"logits/chosen": -1.0162668228149414, |
|
"logits/rejected": -0.9473219513893127, |
|
"logps/chosen": -269.1289367675781, |
|
"logps/rejected": -664.7567138671875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 15.435200691223145, |
|
"rewards/margins": 40.45425796508789, |
|
"rewards/rejected": -25.01905632019043, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.034, |
|
"grad_norm": 39.93281936645508, |
|
"learning_rate": 2.1666666666666667e-05, |
|
"logits/chosen": -1.261150598526001, |
|
"logits/rejected": -0.5414608120918274, |
|
"logps/chosen": -169.63702392578125, |
|
"logps/rejected": -218.22845458984375, |
|
"loss": 2.1803, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 19.81353759765625, |
|
"rewards/margins": 21.26726722717285, |
|
"rewards/rejected": -1.4537286758422852, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.036, |
|
"grad_norm": 0.3639410138130188, |
|
"learning_rate": 2e-05, |
|
"logits/chosen": -1.1032836437225342, |
|
"logits/rejected": -0.42093324661254883, |
|
"logps/chosen": -250.34255981445312, |
|
"logps/rejected": -339.02777099609375, |
|
"loss": 0.001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 29.49982452392578, |
|
"rewards/margins": 29.380714416503906, |
|
"rewards/rejected": 0.11910903453826904, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.038, |
|
"grad_norm": 91.46900939941406, |
|
"learning_rate": 1.8333333333333333e-05, |
|
"logits/chosen": -1.382272720336914, |
|
"logits/rejected": -1.3159658908843994, |
|
"logps/chosen": -157.1051788330078, |
|
"logps/rejected": -93.73989868164062, |
|
"loss": 4.247, |
|
"rewards/accuracies": 0.25, |
|
"rewards/chosen": 1.4054617881774902, |
|
"rewards/margins": -1.071337103843689, |
|
"rewards/rejected": 2.4767990112304688, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.07625256478786469, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"logits/chosen": -1.2797192335128784, |
|
"logits/rejected": -0.41436293721199036, |
|
"logps/chosen": -233.0648956298828, |
|
"logps/rejected": -280.9295959472656, |
|
"loss": 0.0004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 14.561626434326172, |
|
"rewards/margins": 24.783857345581055, |
|
"rewards/rejected": -10.222232818603516, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.042, |
|
"grad_norm": 0.0006307783187367022, |
|
"learning_rate": 1.5e-05, |
|
"logits/chosen": -1.200671672821045, |
|
"logits/rejected": -0.8377301692962646, |
|
"logps/chosen": -181.38812255859375, |
|
"logps/rejected": -400.5192565917969, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 20.238813400268555, |
|
"rewards/margins": 29.048757553100586, |
|
"rewards/rejected": -8.809943199157715, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.044, |
|
"grad_norm": 3.4681459510466084e-05, |
|
"learning_rate": 1.3333333333333333e-05, |
|
"logits/chosen": -1.0809839963912964, |
|
"logits/rejected": -1.0748283863067627, |
|
"logps/chosen": -210.24159240722656, |
|
"logps/rejected": -478.85113525390625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 14.9466552734375, |
|
"rewards/margins": 30.102947235107422, |
|
"rewards/rejected": -15.156291007995605, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.046, |
|
"grad_norm": 1.9936389435315505e-05, |
|
"learning_rate": 1.1666666666666668e-05, |
|
"logits/chosen": -1.1689856052398682, |
|
"logits/rejected": -0.40747758746147156, |
|
"logps/chosen": -278.3692932128906, |
|
"logps/rejected": -260.0756530761719, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 22.959720611572266, |
|
"rewards/margins": 28.647663116455078, |
|
"rewards/rejected": -5.687943458557129, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.048, |
|
"grad_norm": 2.3284033886739053e-05, |
|
"learning_rate": 1e-05, |
|
"logits/chosen": -0.9488776922225952, |
|
"logits/rejected": -0.9771822094917297, |
|
"logps/chosen": -107.5537109375, |
|
"logps/rejected": -420.6499328613281, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 13.997261047363281, |
|
"rewards/margins": 30.08607292175293, |
|
"rewards/rejected": -16.08881187438965, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.00019201346731279045, |
|
"learning_rate": 8.333333333333334e-06, |
|
"logits/chosen": -1.0477547645568848, |
|
"logits/rejected": -0.8248160481452942, |
|
"logps/chosen": -200.435546875, |
|
"logps/rejected": -410.39874267578125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 12.744895935058594, |
|
"rewards/margins": 28.151798248291016, |
|
"rewards/rejected": -15.406902313232422, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.052, |
|
"grad_norm": 9.264203981729224e-06, |
|
"learning_rate": 6.666666666666667e-06, |
|
"logits/chosen": -0.795575737953186, |
|
"logits/rejected": -0.8969402313232422, |
|
"logps/chosen": -187.31686401367188, |
|
"logps/rejected": -461.4927673339844, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 17.49878692626953, |
|
"rewards/margins": 30.602787017822266, |
|
"rewards/rejected": -13.10400104522705, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.054, |
|
"grad_norm": 0.0012664368841797113, |
|
"learning_rate": 5e-06, |
|
"logits/chosen": -1.146821141242981, |
|
"logits/rejected": -1.307145118713379, |
|
"logps/chosen": -82.47093963623047, |
|
"logps/rejected": -296.2177734375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 10.118415832519531, |
|
"rewards/margins": 22.095069885253906, |
|
"rewards/rejected": -11.976654052734375, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.056, |
|
"grad_norm": 24.4777889251709, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"logits/chosen": -1.1492491960525513, |
|
"logits/rejected": -0.9397568702697754, |
|
"logps/chosen": -207.41566467285156, |
|
"logps/rejected": -408.0826110839844, |
|
"loss": 0.219, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 14.095235824584961, |
|
"rewards/margins": 25.648344039916992, |
|
"rewards/rejected": -11.553108215332031, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.058, |
|
"grad_norm": 0.1801503300666809, |
|
"learning_rate": 1.6666666666666667e-06, |
|
"logits/chosen": -1.2135852575302124, |
|
"logits/rejected": -0.22595801949501038, |
|
"logps/chosen": -190.64984130859375, |
|
"logps/rejected": -219.69593811035156, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 9.875904083251953, |
|
"rewards/margins": 18.048542022705078, |
|
"rewards/rejected": -8.172636985778809, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.11778837442398071, |
|
"learning_rate": 0.0, |
|
"logits/chosen": -0.8404010534286499, |
|
"logits/rejected": -0.7674828171730042, |
|
"logps/chosen": -144.8819580078125, |
|
"logps/rejected": -553.426513671875, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 18.967082977294922, |
|
"rewards/margins": 35.870948791503906, |
|
"rewards/rejected": -16.903865814208984, |
|
"step": 30 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 30, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|