|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9990817263544536, |
|
"eval_steps": 100, |
|
"global_step": 204, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.3809523809523811e-07, |
|
"logits/chosen": -2.5948691368103027, |
|
"logits/rejected": -2.452101707458496, |
|
"logps/chosen": -288.6771240234375, |
|
"logps/rejected": -270.8803405761719, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.380952380952381e-06, |
|
"logits/chosen": -2.4060661792755127, |
|
"logits/rejected": -2.3895983695983887, |
|
"logps/chosen": -248.78944396972656, |
|
"logps/rejected": -272.0387878417969, |
|
"loss": 0.6902, |
|
"rewards/accuracies": 0.5138888955116272, |
|
"rewards/chosen": -0.012765132822096348, |
|
"rewards/margins": 0.0059960586950182915, |
|
"rewards/rejected": -0.01876119151711464, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.761904761904762e-06, |
|
"logits/chosen": -2.352856159210205, |
|
"logits/rejected": -2.2108333110809326, |
|
"logps/chosen": -330.4653625488281, |
|
"logps/rejected": -309.5657043457031, |
|
"loss": 0.6543, |
|
"rewards/accuracies": 0.659375011920929, |
|
"rewards/chosen": -0.05031045526266098, |
|
"rewards/margins": 0.09622685611248016, |
|
"rewards/rejected": -0.14653730392456055, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.970219740227693e-06, |
|
"logits/chosen": -1.890295386314392, |
|
"logits/rejected": -1.7602672576904297, |
|
"logps/chosen": -304.2027282714844, |
|
"logps/rejected": -310.0560607910156, |
|
"loss": 0.6122, |
|
"rewards/accuracies": 0.6656249761581421, |
|
"rewards/chosen": -0.05764678865671158, |
|
"rewards/margins": 0.20967629551887512, |
|
"rewards/rejected": -0.2673230767250061, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.868186180746792e-06, |
|
"logits/chosen": -1.6625322103500366, |
|
"logits/rejected": -1.408319115638733, |
|
"logps/chosen": -332.21380615234375, |
|
"logps/rejected": -359.38226318359375, |
|
"loss": 0.5921, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.0873696357011795, |
|
"rewards/margins": 0.3436659276485443, |
|
"rewards/rejected": -0.431035578250885, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.696530612642871e-06, |
|
"logits/chosen": -1.2149860858917236, |
|
"logits/rejected": -0.9494598507881165, |
|
"logps/chosen": -344.01007080078125, |
|
"logps/rejected": -387.1136779785156, |
|
"loss": 0.5646, |
|
"rewards/accuracies": 0.746874988079071, |
|
"rewards/chosen": -0.3123381733894348, |
|
"rewards/margins": 0.41091781854629517, |
|
"rewards/rejected": -0.72325599193573, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.460299516441777e-06, |
|
"logits/chosen": -1.3099194765090942, |
|
"logits/rejected": -1.0444400310516357, |
|
"logps/chosen": -308.3590393066406, |
|
"logps/rejected": -356.48309326171875, |
|
"loss": 0.5712, |
|
"rewards/accuracies": 0.7093750238418579, |
|
"rewards/chosen": -0.07990916073322296, |
|
"rewards/margins": 0.42140746116638184, |
|
"rewards/rejected": -0.5013166666030884, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.1664378205239085e-06, |
|
"logits/chosen": -1.1891834735870361, |
|
"logits/rejected": -0.8295331001281738, |
|
"logps/chosen": -328.58770751953125, |
|
"logps/rejected": -367.3239440917969, |
|
"loss": 0.565, |
|
"rewards/accuracies": 0.659375011920929, |
|
"rewards/chosen": -0.2833566665649414, |
|
"rewards/margins": 0.43687087297439575, |
|
"rewards/rejected": -0.7202275395393372, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.8235847280454626e-06, |
|
"logits/chosen": -1.118951439857483, |
|
"logits/rejected": -0.7426460385322571, |
|
"logps/chosen": -330.37640380859375, |
|
"logps/rejected": -368.56915283203125, |
|
"loss": 0.5259, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.24417515099048615, |
|
"rewards/margins": 0.4794433116912842, |
|
"rewards/rejected": -0.7236183881759644, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.441819734087963e-06, |
|
"logits/chosen": -1.1813547611236572, |
|
"logits/rejected": -0.7541359663009644, |
|
"logps/chosen": -337.6168518066406, |
|
"logps/rejected": -367.4094543457031, |
|
"loss": 0.5487, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.11910835653543472, |
|
"rewards/margins": 0.5179867744445801, |
|
"rewards/rejected": -0.6370951533317566, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.0323662998460396e-06, |
|
"logits/chosen": -1.077327013015747, |
|
"logits/rejected": -0.598007082939148, |
|
"logps/chosen": -346.1620788574219, |
|
"logps/rejected": -371.0578918457031, |
|
"loss": 0.5347, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.2570808529853821, |
|
"rewards/margins": 0.4765000343322754, |
|
"rewards/rejected": -0.7335808873176575, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_logits/chosen": -0.9797883033752441, |
|
"eval_logits/rejected": -0.5705389380455017, |
|
"eval_logps/chosen": -348.43701171875, |
|
"eval_logps/rejected": -380.08721923828125, |
|
"eval_loss": 0.5570356845855713, |
|
"eval_rewards/accuracies": 0.7120000123977661, |
|
"eval_rewards/chosen": -0.3268292248249054, |
|
"eval_rewards/margins": 0.49906718730926514, |
|
"eval_rewards/rejected": -0.8258963227272034, |
|
"eval_runtime": 384.1665, |
|
"eval_samples_per_second": 5.206, |
|
"eval_steps_per_second": 0.651, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.6072618954988867e-06, |
|
"logits/chosen": -1.0047966241836548, |
|
"logits/rejected": -0.5113744735717773, |
|
"logps/chosen": -352.7798767089844, |
|
"logps/rejected": -395.8839416503906, |
|
"loss": 0.5652, |
|
"rewards/accuracies": 0.6968749761581421, |
|
"rewards/chosen": -0.37514516711235046, |
|
"rewards/margins": 0.49685588479042053, |
|
"rewards/rejected": -0.872001051902771, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.1790041121336223e-06, |
|
"logits/chosen": -1.1386340856552124, |
|
"logits/rejected": -0.762003481388092, |
|
"logps/chosen": -347.37567138671875, |
|
"logps/rejected": -381.8182373046875, |
|
"loss": 0.5298, |
|
"rewards/accuracies": 0.721875011920929, |
|
"rewards/chosen": -0.32593947649002075, |
|
"rewards/margins": 0.47125840187072754, |
|
"rewards/rejected": -0.7971979379653931, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.760183246631777e-06, |
|
"logits/chosen": -0.9937411546707153, |
|
"logits/rejected": -0.5911135673522949, |
|
"logps/chosen": -341.10308837890625, |
|
"logps/rejected": -390.31622314453125, |
|
"loss": 0.5382, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.22077293694019318, |
|
"rewards/margins": 0.6057422757148743, |
|
"rewards/rejected": -0.8265151977539062, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3631121611097364e-06, |
|
"logits/chosen": -0.8524423837661743, |
|
"logits/rejected": -0.5514906048774719, |
|
"logps/chosen": -347.8735046386719, |
|
"logps/rejected": -380.4819641113281, |
|
"loss": 0.5419, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": -0.3039913773536682, |
|
"rewards/margins": 0.49449166655540466, |
|
"rewards/rejected": -0.7984830141067505, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 9.994642986290797e-07, |
|
"logits/chosen": -0.9539240002632141, |
|
"logits/rejected": -0.48703765869140625, |
|
"logps/chosen": -338.36041259765625, |
|
"logps/rejected": -382.77618408203125, |
|
"loss": 0.5449, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.3446277379989624, |
|
"rewards/margins": 0.566214382648468, |
|
"rewards/rejected": -0.9108421206474304, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6.799304971075383e-07, |
|
"logits/chosen": -0.9565147161483765, |
|
"logits/rejected": -0.4371515214443207, |
|
"logps/chosen": -359.60650634765625, |
|
"logps/rejected": -392.94366455078125, |
|
"loss": 0.533, |
|
"rewards/accuracies": 0.746874988079071, |
|
"rewards/chosen": -0.3234427571296692, |
|
"rewards/margins": 0.5231602787971497, |
|
"rewards/rejected": -0.8466030359268188, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.1390469071538183e-07, |
|
"logits/chosen": -0.8408013582229614, |
|
"logits/rejected": -0.5169156789779663, |
|
"logps/chosen": -344.161376953125, |
|
"logps/rejected": -401.06927490234375, |
|
"loss": 0.5348, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.2773435711860657, |
|
"rewards/margins": 0.7136318683624268, |
|
"rewards/rejected": -0.9909754991531372, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.092077387824884e-07, |
|
"logits/chosen": -1.0086092948913574, |
|
"logits/rejected": -0.6254789233207703, |
|
"logps/chosen": -348.04437255859375, |
|
"logps/rejected": -398.12921142578125, |
|
"loss": 0.5478, |
|
"rewards/accuracies": 0.721875011920929, |
|
"rewards/chosen": -0.273074209690094, |
|
"rewards/margins": 0.5786523818969727, |
|
"rewards/rejected": -0.8517265319824219, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 7.185750133542168e-08, |
|
"logits/chosen": -0.9045581817626953, |
|
"logits/rejected": -0.6502401828765869, |
|
"logps/chosen": -323.7873840332031, |
|
"logps/rejected": -377.43109130859375, |
|
"loss": 0.5416, |
|
"rewards/accuracies": 0.6968749761581421, |
|
"rewards/chosen": -0.3004080057144165, |
|
"rewards/margins": 0.5847684741020203, |
|
"rewards/rejected": -0.8851765394210815, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 5.891920784984184e-09, |
|
"logits/chosen": -1.004695177078247, |
|
"logits/rejected": -0.5996636748313904, |
|
"logps/chosen": -338.21856689453125, |
|
"logps/rejected": -390.23956298828125, |
|
"loss": 0.5154, |
|
"rewards/accuracies": 0.778124988079071, |
|
"rewards/chosen": -0.263592392206192, |
|
"rewards/margins": 0.6431443691253662, |
|
"rewards/rejected": -0.9067367315292358, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_logits/chosen": -0.9595882296562195, |
|
"eval_logits/rejected": -0.5385043621063232, |
|
"eval_logps/chosen": -346.7320556640625, |
|
"eval_logps/rejected": -380.200927734375, |
|
"eval_loss": 0.5470749735832214, |
|
"eval_rewards/accuracies": 0.7139999866485596, |
|
"eval_rewards/chosen": -0.3097793161869049, |
|
"eval_rewards/margins": 0.5172543525695801, |
|
"eval_rewards/rejected": -0.8270336985588074, |
|
"eval_runtime": 384.197, |
|
"eval_samples_per_second": 5.206, |
|
"eval_steps_per_second": 0.651, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 204, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5623768936185276, |
|
"train_runtime": 9619.6269, |
|
"train_samples_per_second": 2.717, |
|
"train_steps_per_second": 0.021 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 204, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|