|
{ |
|
"best_metric": 0.6825469136238098, |
|
"best_model_checkpoint": "/mnt/data/shesj/Trained/RL4CoT/DPO/Parallel_Iter2_numglueCorrect_iter2_10lang.json/checkpoint-200", |
|
"epoch": 0.050327126321087066, |
|
"eval_steps": 100, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5e-08, |
|
"logits/chosen": -0.7881901264190674, |
|
"logits/rejected": -0.7754368782043457, |
|
"logps/chosen": -5.556678295135498, |
|
"logps/rejected": -8.082754135131836, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.3187499940395355, |
|
"rewards/chosen": 0.0005767763941548765, |
|
"rewards/margins": -0.000614482443779707, |
|
"rewards/rejected": 0.0011912587797269225, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1e-07, |
|
"logits/chosen": -0.7774807214736938, |
|
"logits/rejected": -0.7521709203720093, |
|
"logps/chosen": -6.2856526374816895, |
|
"logps/rejected": -7.786572456359863, |
|
"loss": 0.6935, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.0011454308405518532, |
|
"rewards/margins": 0.002339282538741827, |
|
"rewards/rejected": -0.003484714310616255, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.5e-07, |
|
"logits/chosen": -0.7695692777633667, |
|
"logits/rejected": -0.7617800831794739, |
|
"logps/chosen": -5.672076225280762, |
|
"logps/rejected": -7.90362548828125, |
|
"loss": 0.6935, |
|
"rewards/accuracies": 0.4124999940395355, |
|
"rewards/chosen": -0.004858463071286678, |
|
"rewards/margins": -0.00798516534268856, |
|
"rewards/rejected": 0.003126702504232526, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2e-07, |
|
"logits/chosen": -0.8169188499450684, |
|
"logits/rejected": -0.8234481811523438, |
|
"logps/chosen": -5.951030731201172, |
|
"logps/rejected": -7.665135383605957, |
|
"loss": 0.6924, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.0009545508655719459, |
|
"rewards/margins": -0.0022635911591351032, |
|
"rewards/rejected": 0.00130904046818614, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.5e-07, |
|
"logits/chosen": -0.7988893389701843, |
|
"logits/rejected": -0.7831005454063416, |
|
"logps/chosen": -4.960128307342529, |
|
"logps/rejected": -7.793705940246582, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.0017494624480605125, |
|
"rewards/margins": 0.0019936964381486177, |
|
"rewards/rejected": -0.0002442340482957661, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3e-07, |
|
"logits/chosen": -0.7896796464920044, |
|
"logits/rejected": -0.7605875730514526, |
|
"logps/chosen": -6.406218528747559, |
|
"logps/rejected": -8.445697784423828, |
|
"loss": 0.6923, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.0006792292697355151, |
|
"rewards/margins": -0.0016146342968568206, |
|
"rewards/rejected": 0.0009354048524983227, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.5e-07, |
|
"logits/chosen": -0.8104821443557739, |
|
"logits/rejected": -0.7983841896057129, |
|
"logps/chosen": -6.952303409576416, |
|
"logps/rejected": -8.65689754486084, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": 0.0003856793628074229, |
|
"rewards/margins": 0.0037465274799615145, |
|
"rewards/rejected": -0.0033608481753617525, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4e-07, |
|
"logits/chosen": -0.8198621869087219, |
|
"logits/rejected": -0.8019220232963562, |
|
"logps/chosen": -6.161223888397217, |
|
"logps/rejected": -7.956850528717041, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.001837434945628047, |
|
"rewards/margins": 0.005904150195419788, |
|
"rewards/rejected": -0.004066715482622385, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.5e-07, |
|
"logits/chosen": -0.7631333470344543, |
|
"logits/rejected": -0.7561143636703491, |
|
"logps/chosen": -5.855575084686279, |
|
"logps/rejected": -7.01950740814209, |
|
"loss": 0.6913, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.0016343919560313225, |
|
"rewards/margins": 0.005311951506882906, |
|
"rewards/rejected": -0.0036775595508515835, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": -0.7467092871665955, |
|
"logits/rejected": -0.7552592754364014, |
|
"logps/chosen": -7.219940185546875, |
|
"logps/rejected": -7.984251976013184, |
|
"loss": 0.6907, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.0006344284047372639, |
|
"rewards/margins": 0.0040216282941401005, |
|
"rewards/rejected": -0.003387199714779854, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.5e-07, |
|
"logits/chosen": -0.8183493614196777, |
|
"logits/rejected": -0.8048542737960815, |
|
"logps/chosen": -5.986401557922363, |
|
"logps/rejected": -7.050605773925781, |
|
"loss": 0.6903, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.0044477893970906734, |
|
"rewards/margins": 0.013396045193076134, |
|
"rewards/rejected": -0.008948257192969322, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 6e-07, |
|
"logits/chosen": -0.7246443033218384, |
|
"logits/rejected": -0.7153327465057373, |
|
"logps/chosen": -6.37067985534668, |
|
"logps/rejected": -7.855441093444824, |
|
"loss": 0.69, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.0028912366833537817, |
|
"rewards/margins": 0.0029723027255386114, |
|
"rewards/rejected": -0.005863540340214968, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 6.5e-07, |
|
"logits/chosen": -0.7883706092834473, |
|
"logits/rejected": -0.7892045974731445, |
|
"logps/chosen": -5.0366129875183105, |
|
"logps/rejected": -6.685678005218506, |
|
"loss": 0.689, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.003989654593169689, |
|
"rewards/margins": 0.0065727815963327885, |
|
"rewards/rejected": -0.002583127235993743, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 7e-07, |
|
"logits/chosen": -0.7610381245613098, |
|
"logits/rejected": -0.767534613609314, |
|
"logps/chosen": -6.8763604164123535, |
|
"logps/rejected": -8.272597312927246, |
|
"loss": 0.687, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.0005852003814652562, |
|
"rewards/margins": 0.012984293513000011, |
|
"rewards/rejected": -0.013569491915404797, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 7.5e-07, |
|
"logits/chosen": -0.7938845753669739, |
|
"logits/rejected": -0.7884698510169983, |
|
"logps/chosen": -6.220009803771973, |
|
"logps/rejected": -7.81838321685791, |
|
"loss": 0.685, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.008494245819747448, |
|
"rewards/margins": 0.02036314085125923, |
|
"rewards/rejected": -0.011868895962834358, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 8e-07, |
|
"logits/chosen": -0.760898232460022, |
|
"logits/rejected": -0.7529922127723694, |
|
"logps/chosen": -6.070019245147705, |
|
"logps/rejected": -8.474264144897461, |
|
"loss": 0.6809, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.0008083779248408973, |
|
"rewards/margins": 0.0290432907640934, |
|
"rewards/rejected": -0.029851669445633888, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 8.499999999999999e-07, |
|
"logits/chosen": -0.8255828619003296, |
|
"logits/rejected": -0.8029024004936218, |
|
"logps/chosen": -5.739585876464844, |
|
"logps/rejected": -8.894620895385742, |
|
"loss": 0.681, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.0006468339124694467, |
|
"rewards/margins": 0.039313118904829025, |
|
"rewards/rejected": -0.03866628557443619, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9e-07, |
|
"logits/chosen": -0.8031052350997925, |
|
"logits/rejected": -0.7612560987472534, |
|
"logps/chosen": -6.660666465759277, |
|
"logps/rejected": -10.91639232635498, |
|
"loss": 0.677, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.008988827466964722, |
|
"rewards/margins": 0.03577885776758194, |
|
"rewards/rejected": -0.04476768523454666, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.499999999999999e-07, |
|
"logits/chosen": -0.8087406158447266, |
|
"logits/rejected": -0.7717125415802002, |
|
"logps/chosen": -6.990227699279785, |
|
"logps/rejected": -10.181965827941895, |
|
"loss": 0.6766, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.017899103462696075, |
|
"rewards/margins": 0.03709184005856514, |
|
"rewards/rejected": -0.054990947246551514, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -0.7920883297920227, |
|
"logits/rejected": -0.7615999579429626, |
|
"logps/chosen": -7.010110378265381, |
|
"logps/rejected": -8.589981079101562, |
|
"loss": 0.6742, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.01662164181470871, |
|
"rewards/margins": 0.04322618246078491, |
|
"rewards/rejected": -0.05984782055020332, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_logits/chosen": -1.2141907215118408, |
|
"eval_logits/rejected": -1.2049294710159302, |
|
"eval_logps/chosen": -6.552766799926758, |
|
"eval_logps/rejected": -8.47075366973877, |
|
"eval_loss": 0.6869122385978699, |
|
"eval_rewards/accuracies": 0.5723472833633423, |
|
"eval_rewards/chosen": -0.021150289103388786, |
|
"eval_rewards/margins": 0.02127229794859886, |
|
"eval_rewards/rejected": -0.0424225889146328, |
|
"eval_runtime": 628.2123, |
|
"eval_samples_per_second": 31.588, |
|
"eval_steps_per_second": 0.495, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.999829128320873e-07, |
|
"logits/chosen": -0.7386836409568787, |
|
"logits/rejected": -0.7065194845199585, |
|
"logps/chosen": -7.015887260437012, |
|
"logps/rejected": -8.969260215759277, |
|
"loss": 0.6691, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.02153836190700531, |
|
"rewards/margins": 0.05296989530324936, |
|
"rewards/rejected": -0.07450826466083527, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.999316524962345e-07, |
|
"logits/chosen": -0.8299457430839539, |
|
"logits/rejected": -0.8254146575927734, |
|
"logps/chosen": -6.386677265167236, |
|
"logps/rejected": -8.159158706665039, |
|
"loss": 0.6626, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.019821835681796074, |
|
"rewards/margins": 0.08776978403329849, |
|
"rewards/rejected": -0.10759161412715912, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.998462224960173e-07, |
|
"logits/chosen": -0.7512461543083191, |
|
"logits/rejected": -0.7053896188735962, |
|
"logps/chosen": -7.265576362609863, |
|
"logps/rejected": -10.415300369262695, |
|
"loss": 0.6565, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.04903126507997513, |
|
"rewards/margins": 0.10108338296413422, |
|
"rewards/rejected": -0.15011465549468994, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.99726628670463e-07, |
|
"logits/chosen": -0.8122448921203613, |
|
"logits/rejected": -0.7945531010627747, |
|
"logps/chosen": -6.279524326324463, |
|
"logps/rejected": -8.167196273803711, |
|
"loss": 0.6583, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.04174378514289856, |
|
"rewards/margins": 0.07122843712568283, |
|
"rewards/rejected": -0.11297222226858139, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.995728791936505e-07, |
|
"logits/chosen": -0.7480685114860535, |
|
"logits/rejected": -0.7061656713485718, |
|
"logps/chosen": -6.890868186950684, |
|
"logps/rejected": -10.21199893951416, |
|
"loss": 0.651, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.07250909507274628, |
|
"rewards/margins": 0.08439986407756805, |
|
"rewards/rejected": -0.15690895915031433, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.993849845741523e-07, |
|
"logits/chosen": -0.7156326174736023, |
|
"logits/rejected": -0.7210611701011658, |
|
"logps/chosen": -7.967876434326172, |
|
"logps/rejected": -11.226155281066895, |
|
"loss": 0.6563, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.0788055881857872, |
|
"rewards/margins": 0.14646434783935547, |
|
"rewards/rejected": -0.22526994347572327, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.991629576543163e-07, |
|
"logits/chosen": -0.8028038740158081, |
|
"logits/rejected": -0.7880641222000122, |
|
"logps/chosen": -7.9293036460876465, |
|
"logps/rejected": -12.446617126464844, |
|
"loss": 0.6393, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.07287696748971939, |
|
"rewards/margins": 0.16767558455467224, |
|
"rewards/rejected": -0.24055257439613342, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.989068136093872e-07, |
|
"logits/chosen": -0.6804400682449341, |
|
"logits/rejected": -0.6678518056869507, |
|
"logps/chosen": -7.790997505187988, |
|
"logps/rejected": -10.521781921386719, |
|
"loss": 0.6429, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.10002864897251129, |
|
"rewards/margins": 0.16521799564361572, |
|
"rewards/rejected": -0.2652466297149658, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.986165699464705e-07, |
|
"logits/chosen": -0.7420132160186768, |
|
"logits/rejected": -0.7359737157821655, |
|
"logps/chosen": -7.692935943603516, |
|
"logps/rejected": -11.80825138092041, |
|
"loss": 0.6275, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.12237729877233505, |
|
"rewards/margins": 0.2165246307849884, |
|
"rewards/rejected": -0.33890193700790405, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.982922465033348e-07, |
|
"logits/chosen": -0.6650699377059937, |
|
"logits/rejected": -0.6643859148025513, |
|
"logps/chosen": -8.27735710144043, |
|
"logps/rejected": -11.08592700958252, |
|
"loss": 0.6316, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.18646416068077087, |
|
"rewards/margins": 0.17933328449726105, |
|
"rewards/rejected": -0.3657974600791931, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.979338654470567e-07, |
|
"logits/chosen": -0.6874249577522278, |
|
"logits/rejected": -0.6583540439605713, |
|
"logps/chosen": -8.387764930725098, |
|
"logps/rejected": -10.597826957702637, |
|
"loss": 0.6355, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.15453846752643585, |
|
"rewards/margins": 0.15605905652046204, |
|
"rewards/rejected": -0.3105975389480591, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.975414512725056e-07, |
|
"logits/chosen": -0.6604259610176086, |
|
"logits/rejected": -0.654133677482605, |
|
"logps/chosen": -8.139281272888184, |
|
"logps/rejected": -11.677125930786133, |
|
"loss": 0.6281, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.1922483593225479, |
|
"rewards/margins": 0.17367199063301086, |
|
"rewards/rejected": -0.36592036485671997, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.971150308006687e-07, |
|
"logits/chosen": -0.6868435144424438, |
|
"logits/rejected": -0.6831103563308716, |
|
"logps/chosen": -7.650822639465332, |
|
"logps/rejected": -13.520294189453125, |
|
"loss": 0.6184, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.1557883322238922, |
|
"rewards/margins": 0.309310644865036, |
|
"rewards/rejected": -0.4650990068912506, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.966546331768192e-07, |
|
"logits/chosen": -0.6930921673774719, |
|
"logits/rejected": -0.656936764717102, |
|
"logps/chosen": -7.236788749694824, |
|
"logps/rejected": -12.021242141723633, |
|
"loss": 0.617, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.16039922833442688, |
|
"rewards/margins": 0.18796458840370178, |
|
"rewards/rejected": -0.34836381673812866, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.961602898685223e-07, |
|
"logits/chosen": -0.6678417921066284, |
|
"logits/rejected": -0.6494520306587219, |
|
"logps/chosen": -8.197237014770508, |
|
"logps/rejected": -13.004777908325195, |
|
"loss": 0.6192, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.16785219311714172, |
|
"rewards/margins": 0.2895987629890442, |
|
"rewards/rejected": -0.4574509561061859, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.956320346634875e-07, |
|
"logits/chosen": -0.6635026931762695, |
|
"logits/rejected": -0.6535638570785522, |
|
"logps/chosen": -8.445914268493652, |
|
"logps/rejected": -14.642396926879883, |
|
"loss": 0.6054, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.2178197205066681, |
|
"rewards/margins": 0.33262819051742554, |
|
"rewards/rejected": -0.5504478812217712, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.95069903667256e-07, |
|
"logits/chosen": -0.6291212439537048, |
|
"logits/rejected": -0.5968618392944336, |
|
"logps/chosen": -8.441099166870117, |
|
"logps/rejected": -13.59777545928955, |
|
"loss": 0.6019, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.262218177318573, |
|
"rewards/margins": 0.2801818251609802, |
|
"rewards/rejected": -0.5424000024795532, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.944739353007341e-07, |
|
"logits/chosen": -0.6783192753791809, |
|
"logits/rejected": -0.6327847242355347, |
|
"logps/chosen": -8.718297004699707, |
|
"logps/rejected": -15.599523544311523, |
|
"loss": 0.5953, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.2528052031993866, |
|
"rewards/margins": 0.31637701392173767, |
|
"rewards/rejected": -0.569182276725769, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.938441702975689e-07, |
|
"logits/chosen": -0.6378843784332275, |
|
"logits/rejected": -0.6440542936325073, |
|
"logps/chosen": -9.802359580993652, |
|
"logps/rejected": -14.98701286315918, |
|
"loss": 0.5907, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.3599366247653961, |
|
"rewards/margins": 0.2988061010837555, |
|
"rewards/rejected": -0.6587426066398621, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.931806517013612e-07, |
|
"logits/chosen": -0.6049096584320068, |
|
"logits/rejected": -0.6118007302284241, |
|
"logps/chosen": -7.990042686462402, |
|
"logps/rejected": -13.457636833190918, |
|
"loss": 0.5959, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.28124743700027466, |
|
"rewards/margins": 0.39520224928855896, |
|
"rewards/rejected": -0.6764496564865112, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_logits/chosen": -1.100651502609253, |
|
"eval_logits/rejected": -1.090796947479248, |
|
"eval_logps/chosen": -9.126388549804688, |
|
"eval_logps/rejected": -11.862701416015625, |
|
"eval_loss": 0.6825469136238098, |
|
"eval_rewards/accuracies": 0.5799839496612549, |
|
"eval_rewards/chosen": -0.27851250767707825, |
|
"eval_rewards/margins": 0.10310473293066025, |
|
"eval_rewards/rejected": -0.3816172480583191, |
|
"eval_runtime": 646.4588, |
|
"eval_samples_per_second": 30.696, |
|
"eval_steps_per_second": 0.481, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 2000, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|